<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">ploscomp</journal-id>
<journal-title-group>
<journal-title>PLOS Computational Biology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1553-734X</issn>
<issn pub-type="epub">1553-7358</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">PCOMPBIOL-D-19-01767</article-id>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007663</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Genome-wide association studies</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Genome-wide association studies</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Human genetics</subject><subj-group><subject>Genome-wide association studies</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Heredity</subject><subj-group><subject>Genetic mapping</subject><subj-group><subject>Haplotypes</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Applied mathematics</subject><subj-group><subject>Algorithms</subject><subj-group><subject>Kernel methods</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject><subj-group><subject>Algorithms</subject><subj-group><subject>Kernel methods</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genetic loci</subject><subj-group><subject>Alleles</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Heredity</subject><subj-group><subject>Genetic mapping</subject><subj-group><subject>Variant genotypes</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Organisms</subject><subj-group><subject>Eukaryota</subject><subj-group><subject>Plants</subject><subj-group><subject>Grasses</subject><subj-group><subject>Rice</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Animal studies</subject><subj-group><subject>Experimental organism systems</subject><subj-group><subject>Plant and algal models</subject><subj-group><subject>Rice</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Software engineering</subject><subj-group><subject>Source code</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Engineering and technology</subject><subj-group><subject>Software engineering</subject><subj-group><subject>Source code</subject></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>RAINBOW: Haplotype-based genome-wide association study using a novel SNP-set method</article-title>
<alt-title alt-title-type="running-head">RAINBOW: Haplotype-based GWAS using a SNP-set method</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-7486-7438</contrib-id>
<name name-style="western">
<surname>Hamazaki</surname> <given-names>Kosuke</given-names></name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Investigation</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Validation</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"/>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-6747-7036</contrib-id>
<name name-style="western">
<surname>Iwata</surname> <given-names>Hiroyoshi</given-names></name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Project administration</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"/>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001">
<addr-line>Department of Agricultural and Environmental Biology, Graduate School of Agricultural and Life Sciences, The University of Tokyo, Tokyo, Japan</addr-line>
</aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Pertea</surname> <given-names>Mihaela</given-names></name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1">
<addr-line>Johns Hopkins University, UNITED STATES</addr-line>
</aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">aiwata@mail.ecc.u-tokyo.ac.jp</email></corresp>
</author-notes>
<pub-date pub-type="collection">
<month>2</month>
<year>2020</year>
</pub-date>
<pub-date pub-type="epub">
<day>14</day>
<month>2</month>
<year>2020</year>
</pub-date>
<volume>16</volume>
<issue>2</issue>
<elocation-id>e1007663</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>10</month>
<year>2019</year>
</date>
<date date-type="accepted">
<day>18</day>
<month>1</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Hamazaki, Iwata</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pcbi.1007663"/>
<abstract>
<p>Difficulty in detecting rare variants is one of the problems in conventional genome-wide association studies (GWAS). The problem is closely related to the complex gene compositions comprising multiple alleles, such as haplotypes. Several single nucleotide polymorphism (SNP) set approaches have been proposed to solve this problem. These methods, however, have been rarely discussed in connection with haplotypes. In this study, we developed a novel SNP-set method named “RAINBOW” and applied the method to haplotype-based GWAS by regarding a haplotype block as a SNP-set. Combining haplotype block estimation and SNP-set GWAS, haplotype-based GWAS can be conducted without prior information of haplotypes. We prepared 100 datasets of simulated phenotypic data and real marker genotype data of <italic>Oryza sativa</italic> subsp. <italic>indica</italic>, and performed GWAS of the datasets. We compared the power of our method, the conventional single-SNP GWAS, the conventional haplotype-based GWAS, and the conventional SNP-set GWAS. Our proposed method was shown to be superior to these in three aspects: (1) controlling false positives; (2) in detecting causal variants without relying on the linkage disequilibrium if causal variants were genotyped in the dataset; and (3) it showed greater power than the other methods, i.e., it was able to detect causal variants that were not detected by the others, primarily when the causal variants were located very close to each other, and the directions of their effects were opposite. By using the SNP-set approach as in this study, we expect that detecting not only rare variants but also genes with complex mechanisms, such as genes with multiple causal variants, can be realized. RAINBOW was implemented as an R package named “RAINBOWR” and is available from CRAN (<ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/RAINBOWR/index.html" xlink:type="simple">https://cran.r-project.org/web/packages/RAINBOWR/index.html</ext-link>) and GitHub (<ext-link ext-link-type="uri" xlink:href="https://github.com/KosukeHamazaki/RAINBOWR" xlink:type="simple">https://github.com/KosukeHamazaki/RAINBOWR</ext-link>).</p>
</abstract>
<abstract abstract-type="summary">
<title>Author summary</title>
<p>Detecting rare variants has been one of the most problematic problems in GWAS. Here, we proposed a novel SNP-set GWAS approach, which is superior in controlling false positives and detecting rare variants compared with conventional approaches, and implemented this method as an R package named “RAINBOWR” (Reliable Association INference By Optimizing Weights with R). In this article, we introduce the application of RAINBOW to haplotype-based GWAS by regarding a haplotype block as a SNP-set, which enables one to perform haplotype-based GWAS without prior haplotype information. We showed that the haplotype-based GWAS with the RAINBOW package succeeded in detecting causal variants with complex mechanisms that were not detected by any other conventional methods. RAINBOW also offers a fast single-SNP GWAS method. RAINBOW offers not only a SNP-set GWAS that can be applied to universal situations but also one that is faster with the restircted situations using linear kernel for constructing the Gram matrix of SNP-set of interest. We also used Rcpp (functions for using C++ in R) for the RAINBOW implementation to achieve faster computation. We believe that our package will lead to the detection of novel genes associated with biologically and agronomically essential traits.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100003382</institution-id>
<institution>Core Research for Evolutional Science and Technology</institution>
</institution-wrap>
</funding-source>
<award-id>JPMJCR16O2</award-id>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-6747-7036</contrib-id>
<name name-style="western">
<surname>Iwata</surname> <given-names>Hiroyoshi</given-names></name>
</principal-award-recipient>
</award-group>
<funding-statement>This work was supported by JST CREST (<ext-link ext-link-type="uri" xlink:href="https://www.jst.go.jp/kisoken/crest/en/index.html" xlink:type="simple">https://www.jst.go.jp/kisoken/crest/en/index.html</ext-link>) Grant Number JPMJCR16O2, Japan. The funders had no role in study design, data collection, and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="0"/>
<page-count count="17"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>PLOS Publication Stage</meta-name>
<meta-value>vor-update-to-uncorrected-proof</meta-value>
</custom-meta>
<custom-meta>
<meta-name>Publication Update</meta-name>
<meta-value>2020-02-27</meta-value>
</custom-meta>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>We implemented the method to an R package named RAINBOWR. RAINBOWR is deposited in the CRAN (Comprehensive R Archive Network), <ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/RAINBOWR/index.html" xlink:type="simple">https://cran.r-project.org/web/packages/RAINBOWR/index.html</ext-link>, and in the "KosukeHamazaki/RAINBOWR" repository in the GitHub, <ext-link ext-link-type="uri" xlink:href="https://github.com/KosukeHamazaki/RAINBOW" xlink:type="simple">https://github.com/KosukeHamazaki/RAINBOW</ext-link>. The datasets and scripts generated and analyzed during the current study are available from the “KosukeHamazaki/HGRAINBOW‘repository in the GitHub,<ext-link ext-link-type="uri" xlink:href="https://github.com/KosukeHamazaki/HGRAINBOW" xlink:type="simple">https://github.com/KosukeHamazaki/HGRAINBOW</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<p>This is a <italic>PLOS Computational Biology</italic> Software paper.</p>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>With the decreasing cost and increasing throughput of next-generation sequencing, the number of accessions that can be used for genome-wide association study (GWAS) is increasing [<xref ref-type="bibr" rid="pcbi.1007663.ref001">1</xref>–<xref ref-type="bibr" rid="pcbi.1007663.ref003">3</xref>]. Using such large sequencing data, GWAS is now widely used not only in human but also in plant and animal genetics and breeding, and has identified novel genes related to important agronomic traits [<xref ref-type="bibr" rid="pcbi.1007663.ref004">4</xref>–<xref ref-type="bibr" rid="pcbi.1007663.ref006">6</xref>]. One example of large next-generation sequencing data is that of the “3,000 rice genomes project” as used in this study [<xref ref-type="bibr" rid="pcbi.1007663.ref007">7</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref008">8</xref>], data from which are available in the “Rice SNP-Seek Database” [<xref ref-type="bibr" rid="pcbi.1007663.ref009">9</xref>–<xref ref-type="bibr" rid="pcbi.1007663.ref011">11</xref>]. GWAS results using these data have already been reported [<xref ref-type="bibr" rid="pcbi.1007663.ref012">12</xref>].</p>
<p>Despite the enhancement of such public data, the conventional GWAS method still faces obstacles in the detection of unknown candidate genes. One common example is its difficulty in detecting rare alleles or rare variants. One problem caused by rare variants is that the non-causal markers that have a strong linkage disequilibrium (LD) with one causal rare variant indicate a higher detection power than the true causal rare variant, which may interfere with the detection of the true causal variant. This phenomenon is known as “synthetic association”, and often happens when the minor allele frequency (MAF) of the non-causal marker is higher than that of the true rare variant [<xref ref-type="bibr" rid="pcbi.1007663.ref013">13</xref>]. This problem is closely related to the complex gene compositions comprising multiple alleles such as haplotypes because genes related to important agronomic traits often consist of multiple rare alleles, and this is why haplotypes are hard to detect using GWAS [<xref ref-type="bibr" rid="pcbi.1007663.ref014">14</xref>].</p>
<p>Several methods have been proposed to solve this problem. The sequence kernel association test (SKAT) is one of the methods used to detect rare variants, and has been used mainly in human genomics [<xref ref-type="bibr" rid="pcbi.1007663.ref015">15</xref>]. The SKAT employs a single nucleotide polymorphism (SNP) set approach, which tests multiple SNPs in each SNP-set at the same time. The SKAT evaluates the significance of the variance explained by a SNP-set of interest as a random effect using a mixed effect model approach [<xref ref-type="bibr" rid="pcbi.1007663.ref016">16</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref017">17</xref>]. The fatal drawback of the original SKAT is that the model does not take the effects of family relatedness into account as a random effect, which results in false positives for GWAS in materials with a strong population structure or family relatedness, such as in the world collection of rice germplasm used in this study. Several methods were also proposed to overcome another SKAT drawback: a weighting scheme of the SKAT for rare and common variants can lead to loss of power of common variants, but their models also do not include the term for correcting the confounding effects of family relatedness [<xref ref-type="bibr" rid="pcbi.1007663.ref018">18</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref019">19</xref>].</p>
<p>To solve the fatal drawbacks of the original SKAT, several methods whose models include the term of family relatedness as random effects to control false positives have been previously proposed [<xref ref-type="bibr" rid="pcbi.1007663.ref020">20</xref>–<xref ref-type="bibr" rid="pcbi.1007663.ref022">22</xref>]. From a statistical point of view, these methods usually perform the score test [<xref ref-type="bibr" rid="pcbi.1007663.ref023">23</xref>], which is a computationally efficient method since it requires variance component estimation only for the null model. In terms of the detection power, however, the score test is not necessarily the best method for testing the random effects in the mixed effects model [<xref ref-type="bibr" rid="pcbi.1007663.ref024">24</xref>]. The likelihood-ratio (LR) test [<xref ref-type="bibr" rid="pcbi.1007663.ref025">25</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref026">26</xref>] is another candidate used to test the variance of a SNP-set of interest, and several methods have been proposed that use the LR test for SNP-set GWAS in family samples [<xref ref-type="bibr" rid="pcbi.1007663.ref024">24</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref027">27</xref>]. In particular, Lippert <italic>et al</italic>. implemented a computationally efficient SNP-set GWAS method using the LR test, and reported that the LR test showed greater power than the score test [<xref ref-type="bibr" rid="pcbi.1007663.ref024">24</xref>]. Despite being such an efficient method, Lippert <italic>et al</italic>. mainly used a linear kernel for constructing the Gram matrix from each SNP-set, and therefore other kernels, such as a Gaussian kernel or an exponential kernel, cannot be used for constructing the Gram matrix in their method.</p>
<p>Haplotype-based approaches, which try to improve the detection power of causal haplotypes, make sense from the point of view that a gene functions as one gene set, not as each SNP in the gene set. These haplotype-based approaches are expected to control false positives better than the single-SNP method because the haplotype-based methods focus on the entire haplotype block, not on each SNP in the haplotype block. These methods are also expected to reveal the complex mechanism of causal haplotypes that cannot be detected when focusing on one SNP, such as repulsion states between two causal quantitative trait loci (QTL) located close to each other. However, only a few methods for haplotype-based GWAS have so far been proposed. In plant genomics, Yano <italic>et al</italic>. performed a haplotype-based GWAS by testing the effects of haplotypes while regarding dummy variables of haplotype groups as fixed effects, and found new candidate genes related to heading date for rice [<xref ref-type="bibr" rid="pcbi.1007663.ref028">28</xref>]. Other approaches have been proposed in animal genomics, which estimated ancestral haplotype effects by regarding them as random effects [<xref ref-type="bibr" rid="pcbi.1007663.ref029">29</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref030">30</xref>]. In their methods, each pairwise element of a covariance matrix for the random effects was determined as 1 if individuals belong to the same ancestral haplotype, and 0 if otherwise. However, these conventional haplotype-based GWAS methods require haplotype information a priori, and it is not so easy to apply these methods at the genome-wide level.</p>
<p>In this study, we extended the multi-kernel mixed effects model more generally to take family relatedness into account, while enabling computational speed-up for some limited cases, and developed a novel SNP-set GWAS approach named RAINBOW (Reliable Association INference By Optimizing Weights). We also estimated haplotype blocks from genome-wide marker genotype data, and used them as SNP-sets for analysis with RAINBOW to enable haplotype-based GWAS without prior haplotype information.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Materials and methods</title>
<p>All statistical analyses in this study were conducted using R version 3.6.0 [<xref ref-type="bibr" rid="pcbi.1007663.ref031">31</xref>], and figures were produced using the R package ggplot2 version 3.2.1 [<xref ref-type="bibr" rid="pcbi.1007663.ref032">32</xref>]. Our R package, RAINBOWR, was implemented using the R packages Rcpp version 1.0.2 [<xref ref-type="bibr" rid="pcbi.1007663.ref033">33</xref>–<xref ref-type="bibr" rid="pcbi.1007663.ref035">35</xref>] and RcppEigen version 0.3.3.5.0 [<xref ref-type="bibr" rid="pcbi.1007663.ref036">36</xref>] to reduce the computational time required for solving the multi-kernel mixed-effects model described below. The overall simulation framework in this study is shown in <xref ref-type="supplementary-material" rid="pcbi.1007663.s004">S1 Fig</xref> as a flow chart.</p>
<sec id="sec003">
<title>Methods for RAINBOW</title>
<p>In this subsection, we describe the basic idea of RAINBOW.</p>
<sec id="sec004">
<title>RAINBOW model</title>
<p>The RAINBOW model can be written as
<disp-formula id="pcbi.1007663.e001"><alternatives><graphic id="pcbi.1007663.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e001" xlink:type="simple"/><mml:math display="block" id="M1"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi mathvariant="bold">y</mml:mi><mml:mo>=</mml:mo><mml:mi mathvariant="bold">X</mml:mi><mml:mi mathvariant="bold-italic">β</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold">Z</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold">Z</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:mo>+</mml:mo><mml:mi mathvariant="bold-italic">ϵ</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(1)</label></disp-formula>
where <bold>y</bold> is a <italic>n</italic> × 1 vector of phenotypic values, <bold>X<italic>β</italic></bold> is a <italic>n</italic> × 1 vector of fixed effects including an intercept, a term to correct the population structure and other covariates, <bold>Z</bold><sub>c</sub><bold>u</bold><sub>c</sub> and <inline-formula id="pcbi.1007663.e002"><alternatives><graphic id="pcbi.1007663.e002g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e002" xlink:type="simple"/><mml:math display="inline" id="M2"><mml:mrow><mml:msub><mml:mi mathvariant="bold">Z</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:mrow></mml:math></alternatives></inline-formula> are <italic>n</italic> × 1 vectors of random effects, and <bold><italic>ϵ</italic></bold> is a <italic>n</italic> × 1 vector of residual errors. Here <bold><italic>β</italic></bold> is a <italic>p</italic> × 1 vector of fixed effects, where <italic>p</italic> is the number of fixed effects. <bold>u</bold><sub>c</sub> and <inline-formula id="pcbi.1007663.e003"><alternatives><graphic id="pcbi.1007663.e003g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e003" xlink:type="simple"/><mml:math display="inline" id="M3"><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula> are <italic>m</italic><sub>c</sub> × 1 and <inline-formula id="pcbi.1007663.e004"><alternatives><graphic id="pcbi.1007663.e004g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e004" xlink:type="simple"/><mml:math display="inline" id="M4"><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:mo>×</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula> vector of genotypic values respectively, where <italic>m</italic><sub>c</sub> is the number of genotypes for additive polygenetic effects and <inline-formula id="pcbi.1007663.e005"><alternatives><graphic id="pcbi.1007663.e005g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e005" xlink:type="simple"/><mml:math display="inline" id="M5"><mml:msub><mml:mi>m</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula> is the number of genotypes for <italic>i</italic>-th SNP-set of interest. <bold>X</bold>, <bold>Z</bold><sub>c</sub> and <inline-formula id="pcbi.1007663.e006"><alternatives><graphic id="pcbi.1007663.e006g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e006" xlink:type="simple"/><mml:math display="inline" id="M6"><mml:msub><mml:mi mathvariant="bold">Z</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula> are <italic>n</italic> × <italic>p</italic>, <italic>n</italic> × <italic>m</italic><sub>c</sub> and <inline-formula id="pcbi.1007663.e007"><alternatives><graphic id="pcbi.1007663.e007g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e007" xlink:type="simple"/><mml:math display="inline" id="M7"><mml:mrow><mml:mi>n</mml:mi><mml:mo>×</mml:mo><mml:msub><mml:mi>m</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:mrow></mml:math></alternatives></inline-formula> design matrices that correspond to <bold><italic>β</italic></bold>, <bold>u</bold><sub>c</sub> and <inline-formula id="pcbi.1007663.e008"><alternatives><graphic id="pcbi.1007663.e008g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e008" xlink:type="simple"/><mml:math display="inline" id="M8"><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula> respectively. As the following formula <xref ref-type="disp-formula" rid="pcbi.1007663.e009">Eq 2</xref>, we assume that the polygenetic effect <bold>u</bold><sub>c</sub> follows the multivariate normal distribution whose variance-covariance matrix is proportional to the additive numerator relationship matrix <bold>K</bold><sub>c</sub>.
<disp-formula id="pcbi.1007663.e009"><alternatives><graphic id="pcbi.1007663.e009g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e009" xlink:type="simple"/><mml:math display="block" id="M9"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:mo>∼</mml:mo><mml:mtext>MVN</mml:mtext><mml:mo>(</mml:mo><mml:mn mathvariant="bold">0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:msubsup><mml:mi>σ</mml:mi><mml:mtext>c</mml:mtext><mml:mn>2</mml:mn></mml:msubsup><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(2)</label></disp-formula>
where <inline-formula id="pcbi.1007663.e010"><alternatives><graphic id="pcbi.1007663.e010g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e010" xlink:type="simple"/><mml:math display="inline" id="M10"><mml:msubsup><mml:mi>σ</mml:mi><mml:mtext>c</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula> is the additive genetic variance to be estimated in the “Estimation of variance components” section, and here <italic>m</italic><sub>c</sub> × <italic>m</italic><sub>c</sub> matrix <bold>K</bold><sub>c</sub> = <bold>A</bold>, where <bold>A</bold> is the known additive genetic relationship matrix estimated from marker genotype data <bold>W</bold><sub>c</sub> [<xref ref-type="bibr" rid="pcbi.1007663.ref037">37</xref>].</p>
<p>We also assume that the random effects from <italic>i</italic>-th SNP-set of interest <inline-formula id="pcbi.1007663.e011"><alternatives><graphic id="pcbi.1007663.e011g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e011" xlink:type="simple"/><mml:math display="inline" id="M11"><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula> follows the multivariate normal distribution whose variance-covariance matrix is proportional to the Gram matrix <inline-formula id="pcbi.1007663.e012"><alternatives><graphic id="pcbi.1007663.e012g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e012" xlink:type="simple"/><mml:math display="inline" id="M12"><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>.
<disp-formula id="pcbi.1007663.e013"><alternatives><graphic id="pcbi.1007663.e013g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e013" xlink:type="simple"/><mml:math display="block" id="M13"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:mo>∼</mml:mo><mml:mtext>MVN</mml:mtext><mml:mo>(</mml:mo><mml:mn mathvariant="bold">0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:msubsup><mml:mi>σ</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mn>2</mml:mn></mml:msubsup><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(3)</label></disp-formula>
where <inline-formula id="pcbi.1007663.e014"><alternatives><graphic id="pcbi.1007663.e014g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e014" xlink:type="simple"/><mml:math display="inline" id="M14"><mml:msubsup><mml:mi>σ</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula> is the genetic variance for <italic>i</italic>-th SNP-set to be estimated in the “Estimation of variance components” section, and <inline-formula id="pcbi.1007663.e015"><alternatives><graphic id="pcbi.1007663.e015g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e015" xlink:type="simple"/><mml:math display="inline" id="M15"><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula> is the known <inline-formula id="pcbi.1007663.e016"><alternatives><graphic id="pcbi.1007663.e016g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e016" xlink:type="simple"/><mml:math display="inline" id="M16"><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:mo>×</mml:mo><mml:msub><mml:mi>m</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:mrow></mml:math></alternatives></inline-formula> Gram matrix estimated from marker genotype data <inline-formula id="pcbi.1007663.e017"><alternatives><graphic id="pcbi.1007663.e017g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e017" xlink:type="simple"/><mml:math display="inline" id="M17"><mml:msub><mml:mi mathvariant="bold">W</mml:mi><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula> belonging to the <italic>i</italic>-th SNP-set. We offer a linear, an exponential and a Gaussian kernel for the Gram matrix <inline-formula id="pcbi.1007663.e018"><alternatives><graphic id="pcbi.1007663.e018g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e018" xlink:type="simple"/><mml:math display="inline" id="M18"><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>, and faster computation can be realized for the linear kernel case (Supplementary Note in <xref ref-type="supplementary-material" rid="pcbi.1007663.s001">S1 Appendix</xref>) [<xref ref-type="bibr" rid="pcbi.1007663.ref024">24</xref>].</p>
<p>Finally, the residual term is assumed to identically and independently follow a normal distribution as shown in the following equation.
<disp-formula id="pcbi.1007663.e019"><alternatives><graphic id="pcbi.1007663.e019g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e019" xlink:type="simple"/><mml:math display="block" id="M19"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi mathvariant="bold-italic">ϵ</mml:mi><mml:mo>∼</mml:mo><mml:mtext>MVN</mml:mtext><mml:mo>(</mml:mo><mml:mn mathvariant="bold">0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold">I</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:msubsup><mml:mi>σ</mml:mi><mml:mrow><mml:mtext>e</mml:mtext></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(4)</label></disp-formula>
where <bold>I</bold><sub><italic>n</italic></sub> is a <italic>n</italic> × <italic>n</italic> identity matrix and <inline-formula id="pcbi.1007663.e020"><alternatives><graphic id="pcbi.1007663.e020g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e020" xlink:type="simple"/><mml:math display="inline" id="M20"><mml:msubsup><mml:mi>σ</mml:mi><mml:mtext>e</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula> is estimated in the “Estimation of variance components” section.</p>
</sec>
<sec id="sec005">
<title>Estimation of variance components</title>
<p>The variance components were estimated by maximum-likelihood (ML) [<xref ref-type="bibr" rid="pcbi.1007663.ref026">26</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref038">38</xref>] and restricted maximum-likelihood (REML) [<xref ref-type="bibr" rid="pcbi.1007663.ref039">39</xref>]. Here we explain how to obtain ML and REML estimates of <xref ref-type="disp-formula" rid="pcbi.1007663.e001">Eq 1</xref> for the general <inline-formula id="pcbi.1007663.e021"><alternatives><graphic id="pcbi.1007663.e021g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e021" xlink:type="simple"/><mml:math display="inline" id="M21"><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>.</p>
<p>First we estimated the weights (we define <italic>w</italic><sub>c</sub> and <inline-formula id="pcbi.1007663.e022"><alternatives><graphic id="pcbi.1007663.e022g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e022" xlink:type="simple"/><mml:math display="inline" id="M22"><mml:msub><mml:mi>w</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>) between the genetic variances (<inline-formula id="pcbi.1007663.e023"><alternatives><graphic id="pcbi.1007663.e023g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e023" xlink:type="simple"/><mml:math display="inline" id="M23"><mml:msubsup><mml:mi>σ</mml:mi><mml:mtext>c</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula> and <inline-formula id="pcbi.1007663.e024"><alternatives><graphic id="pcbi.1007663.e024g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e024" xlink:type="simple"/><mml:math display="inline" id="M24"><mml:msubsup><mml:mi>σ</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula>) by the following algorithm.
<list list-type="order">
<list-item><p>Setting initial parameters for <italic>w</italic><sub>c</sub> and <inline-formula id="pcbi.1007663.e025"><alternatives><graphic id="pcbi.1007663.e025g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e025" xlink:type="simple"/><mml:math display="inline" id="M25"><mml:msub><mml:mi>w</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>:
<disp-formula id="pcbi.1007663.e026"><alternatives><graphic id="pcbi.1007663.e026g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e026" xlink:type="simple"/><mml:math display="block" id="M26"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(5)</label></disp-formula></p></list-item>
<list-item><p>Computing the following <italic>n</italic> × <italic>n</italic> matrix <bold>K</bold><sub>s</sub>:
<disp-formula id="pcbi.1007663.e027"><alternatives><graphic id="pcbi.1007663.e027g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e027" xlink:type="simple"/><mml:math display="block" id="M27"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:mtext>s</mml:mtext></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="bold">Z</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:msubsup><mml:mi mathvariant="bold">Z</mml:mi><mml:mrow><mml:mtext>c</mml:mtext></mml:mrow><mml:mtext>T</mml:mtext></mml:msubsup><mml:msub><mml:mi>w</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold">Z</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:msubsup><mml:mi mathvariant="bold">Z</mml:mi><mml:mrow><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mtext>T</mml:mtext></mml:msubsup><mml:msub><mml:mi>w</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(6)</label></disp-formula></p></list-item>
<list-item><p>Solving the following single-kernel linear mixed model (LMM) by using EMMA (efficient mixed model association) or GEMMA (genome-wide efficient mixed model association) [<xref ref-type="bibr" rid="pcbi.1007663.ref040">40</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref041">41</xref>].
<disp-formula id="pcbi.1007663.e028"><alternatives><graphic id="pcbi.1007663.e028g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e028" xlink:type="simple"/><mml:math display="block" id="M28"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi mathvariant="bold">y</mml:mi><mml:mo>=</mml:mo><mml:mi mathvariant="bold">X</mml:mi><mml:mi mathvariant="bold-italic">β</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:mtext>s</mml:mtext></mml:msub><mml:mo>+</mml:mo><mml:mi mathvariant="bold-italic">ϵ</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(7)</label></disp-formula>
where
<disp-formula id="pcbi.1007663.e029"><alternatives><graphic id="pcbi.1007663.e029g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e029" xlink:type="simple"/><mml:math display="block" id="M29"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:mtext>s</mml:mtext></mml:msub><mml:mo>∼</mml:mo><mml:mtext>MVN</mml:mtext><mml:mo>(</mml:mo><mml:mn mathvariant="bold">0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:mtext>s</mml:mtext></mml:msub><mml:msubsup><mml:mi>σ</mml:mi><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup><mml:mo>.</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(8)</label></disp-formula></p></list-item>
<list-item><p>Computing the full log likelihood (<italic>l</italic><sub>F</sub>) or the restricted log likelihood (<italic>l</italic><sub>R</sub>) of <xref ref-type="disp-formula" rid="pcbi.1007663.e028">Eq 7</xref> by using estimated parameters; <inline-formula id="pcbi.1007663.e030"><alternatives><graphic id="pcbi.1007663.e030g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e030" xlink:type="simple"/><mml:math display="inline" id="M30"><mml:mover accent="true"><mml:mi>β</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></alternatives></inline-formula>, <inline-formula id="pcbi.1007663.e031"><alternatives><graphic id="pcbi.1007663.e031g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e031" xlink:type="simple"/><mml:math display="inline" id="M31"><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula> and <inline-formula id="pcbi.1007663.e032"><alternatives><graphic id="pcbi.1007663.e032g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e032" xlink:type="simple"/><mml:math display="inline" id="M32"><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>e</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula>:
<disp-formula id="pcbi.1007663.e033"><alternatives><graphic id="pcbi.1007663.e033g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e033" xlink:type="simple"/><mml:math display="block" id="M33"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi>l</mml:mi><mml:mtext>F</mml:mtext></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi mathvariant="bold">y</mml:mi><mml:mo>;</mml:mo><mml:mover accent="true"><mml:mi mathvariant="bold-italic">β</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>,</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext></mml:msub><mml:mo>,</mml:mo><mml:mover accent="true"><mml:mi>δ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>[</mml:mo><mml:mo>−</mml:mo><mml:mi>n</mml:mi><mml:mspace width="2pt"/><mml:mtext>log</mml:mtext><mml:mo>(</mml:mo><mml:mn>2</mml:mn><mml:mi>π</mml:mi><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup><mml:mo>)</mml:mo><mml:mo>−</mml:mo><mml:mtext>log</mml:mtext><mml:mspace width="2pt"/><mml:mrow><mml:mo>|</mml:mo><mml:mover accent="true"><mml:mi mathvariant="bold">H</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>|</mml:mo></mml:mrow><mml:mo/></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:mo/><mml:mo>−</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:mfrac><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mi mathvariant="bold">y</mml:mi><mml:mo>−</mml:mo><mml:mi mathvariant="bold">X</mml:mi><mml:mover accent="true"><mml:mi mathvariant="bold-italic">β</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mspace width="4.pt"/><mml:mtext>T</mml:mtext></mml:mrow></mml:msup><mml:mspace width="4.pt"/><mml:msup><mml:mover accent="true"><mml:mi mathvariant="bold">H</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mi mathvariant="bold">y</mml:mi><mml:mo>−</mml:mo><mml:mi mathvariant="bold">X</mml:mi><mml:mover accent="true"><mml:mi mathvariant="bold-italic">β</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>)</mml:mo></mml:mrow><mml:mo>]</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd><mml:mrow/></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(9)</label></disp-formula>
<disp-formula id="pcbi.1007663.e034"><alternatives><graphic id="pcbi.1007663.e034g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e034" xlink:type="simple"/><mml:math display="block" id="M34"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi>l</mml:mi><mml:mtext>R</mml:mtext></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi mathvariant="bold">y</mml:mi><mml:mo>;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext></mml:msub><mml:mo>,</mml:mo><mml:mover accent="true"><mml:mi>δ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>l</mml:mi><mml:mtext>F</mml:mtext></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi mathvariant="bold">y</mml:mi><mml:mo>;</mml:mo><mml:mover accent="true"><mml:mi mathvariant="bold-italic">β</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>,</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext></mml:msub><mml:mo>,</mml:mo><mml:mover accent="true"><mml:mi>δ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:mo>+</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo>[</mml:mo><mml:mi>p</mml:mi><mml:mspace width="2pt"/><mml:mtext>log</mml:mtext><mml:mspace width="2pt"/><mml:mo>(</mml:mo><mml:mn>2</mml:mn><mml:mi>π</mml:mi><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup><mml:mo>)</mml:mo><mml:mo>+</mml:mo><mml:mtext>log</mml:mtext><mml:mo>|</mml:mo><mml:msup><mml:mi mathvariant="bold">X</mml:mi><mml:mtext>T</mml:mtext></mml:msup><mml:mi mathvariant="bold">X</mml:mi><mml:mo>|</mml:mo><mml:mo/></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:mo/><mml:mo>−</mml:mo><mml:mtext>log</mml:mtext><mml:mo>|</mml:mo><mml:msup><mml:mi mathvariant="bold">X</mml:mi><mml:mtext>T</mml:mtext></mml:msup><mml:msup><mml:mover accent="true"><mml:mi mathvariant="bold">H</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mi mathvariant="bold">X</mml:mi><mml:mo>|</mml:mo><mml:mo>]</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(10)</label></disp-formula>
Here <inline-formula id="pcbi.1007663.e035"><alternatives><graphic id="pcbi.1007663.e035g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e035" xlink:type="simple"/><mml:math display="inline" id="M35"><mml:mover accent="true"><mml:mi mathvariant="bold">H</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></alternatives></inline-formula> is
<disp-formula id="pcbi.1007663.e036"><alternatives><graphic id="pcbi.1007663.e036g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e036" xlink:type="simple"/><mml:math display="block" id="M36"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="bold">H</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mfrac><mml:mover accent="true"><mml:mi mathvariant="bold">V</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:mfrac><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="bold">K</mml:mi><mml:mtext>s</mml:mtext></mml:msub><mml:mo>+</mml:mo><mml:mover accent="true"><mml:mi>δ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:msub><mml:mi mathvariant="bold">I</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(11)</label></disp-formula>
where <inline-formula id="pcbi.1007663.e037"><alternatives><graphic id="pcbi.1007663.e037g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e037" xlink:type="simple"/><mml:math display="inline" id="M37"><mml:mover accent="true"><mml:mi mathvariant="bold">V</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></alternatives></inline-formula> is a phenotypic variance-covariance matrix and <inline-formula id="pcbi.1007663.e038"><alternatives><graphic id="pcbi.1007663.e038g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e038" xlink:type="simple"/><mml:math display="inline" id="M38"><mml:mrow><mml:mover accent="true"><mml:mi>δ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>e</mml:mtext><mml:mn>2</mml:mn></mml:msubsup><mml:mo>/</mml:mo><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></alternatives></inline-formula>.</p></list-item>
<list-item><p>Optimizing <italic>w</italic><sub>c</sub> and <inline-formula id="pcbi.1007663.e039"><alternatives><graphic id="pcbi.1007663.e039g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e039" xlink:type="simple"/><mml:math display="inline" id="M39"><mml:msub><mml:mi>w</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula> over maximization of the full/restricted log likelihood by using L-BFGS optimization method through repeating step 2-4 [<xref ref-type="bibr" rid="pcbi.1007663.ref042">42</xref>].</p></list-item>
</list></p>
<p>After estimating the weights <italic>w</italic><sub>c</sub> and <inline-formula id="pcbi.1007663.e040"><alternatives><graphic id="pcbi.1007663.e040g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e040" xlink:type="simple"/><mml:math display="inline" id="M40"><mml:msub><mml:mi>w</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>, we estimated the variance components (<inline-formula id="pcbi.1007663.e041"><alternatives><graphic id="pcbi.1007663.e041g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e041" xlink:type="simple"/><mml:math display="inline" id="M41"><mml:msubsup><mml:mi>σ</mml:mi><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula> and <inline-formula id="pcbi.1007663.e042"><alternatives><graphic id="pcbi.1007663.e042g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e042" xlink:type="simple"/><mml:math display="inline" id="M42"><mml:msubsup><mml:mi>σ</mml:mi><mml:mtext>e</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula>) of the model Eqs <xref ref-type="disp-formula" rid="pcbi.1007663.e028">7</xref> and <xref ref-type="disp-formula" rid="pcbi.1007663.e029">8</xref> by EMMA/GEMMA using <inline-formula id="pcbi.1007663.e043"><alternatives><graphic id="pcbi.1007663.e043g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e043" xlink:type="simple"/><mml:math display="inline" id="M43"><mml:msub><mml:mover accent="true"><mml:mi>w</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>c</mml:mtext></mml:msub></mml:math></alternatives></inline-formula> and <inline-formula id="pcbi.1007663.e044"><alternatives><graphic id="pcbi.1007663.e044g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e044" xlink:type="simple"/><mml:math display="inline" id="M44"><mml:msub><mml:mover accent="true"><mml:mi>w</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub></mml:math></alternatives></inline-formula>. Then we obtained <inline-formula id="pcbi.1007663.e045"><alternatives><graphic id="pcbi.1007663.e045g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e045" xlink:type="simple"/><mml:math display="inline" id="M45"><mml:mrow><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>c</mml:mtext><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>w</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>c</mml:mtext></mml:msub><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></alternatives></inline-formula> and <inline-formula id="pcbi.1007663.e046"><alternatives><graphic id="pcbi.1007663.e046g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e046" xlink:type="simple"/><mml:math display="inline" id="M46"><mml:mrow><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>w</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:msub><mml:msubsup><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mtext>s</mml:mtext><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></alternatives></inline-formula>.</p>
<p>Our fitting method, as described above, is a two-step approach, which first estimates the weights of genetic variances, and then estimates the variance components of the model shown in Eqs <xref ref-type="disp-formula" rid="pcbi.1007663.e028">7</xref> and <xref ref-type="disp-formula" rid="pcbi.1007663.e029">8</xref> by EMMA/GEMMA with the estimated weights. On the other hand, some fitting methods that directly estimate the variance components for <xref ref-type="disp-formula" rid="pcbi.1007663.e001">Eq 1</xref> via AIREML (average information REML) [<xref ref-type="bibr" rid="pcbi.1007663.ref043">43</xref>] have also been proposed and implemented in some packages/software [<xref ref-type="bibr" rid="pcbi.1007663.ref044">44</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref045">45</xref>]. The advantage of our two-step approach compared with the direct estimation approach via AIREML is that the search space of the weights is limited to the interval [0, 1], and the convergence is relatively warranted [<xref ref-type="bibr" rid="pcbi.1007663.ref046">46</xref>] even when the heritability is too low/high.</p>
</sec>
<sec id="sec006">
<title>Likelihood ratio test for GWAS</title>
<p>To test the significance of each SNP-set, we performed the LR test of whether <inline-formula id="pcbi.1007663.e047"><alternatives><graphic id="pcbi.1007663.e047g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e047" xlink:type="simple"/><mml:math display="inline" id="M47"><mml:mrow><mml:msubsup><mml:mi>σ</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula> or not. As a null hypothesis, the following model, which does not include the term of SNP-set effects was assumed. 
<disp-formula id="pcbi.1007663.e048"><alternatives><graphic id="pcbi.1007663.e048g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e048" xlink:type="simple"/><mml:math display="block" id="M48"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi mathvariant="bold">y</mml:mi><mml:mo>=</mml:mo><mml:mi mathvariant="bold">X</mml:mi><mml:mi mathvariant="bold-italic">β</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold">Z</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:msub><mml:mi mathvariant="bold">u</mml:mi><mml:mtext>c</mml:mtext></mml:msub><mml:mo>+</mml:mo><mml:mi mathvariant="bold-italic">ϵ</mml:mi><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(12)</label></disp-formula>
In contrast, as an alternative hypothesis model, the multi-kernel linear mixed model (MKLMM) of <xref ref-type="disp-formula" rid="pcbi.1007663.e001">Eq 1</xref> was assumed. Therefore, we computed the following deviance after the estimation of variance components for each SNP-set.
<disp-formula id="pcbi.1007663.e049"><alternatives><graphic id="pcbi.1007663.e049g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e049" xlink:type="simple"/><mml:math display="block" id="M49"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi>D</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>×</mml:mo><mml:mo>(</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>l</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mtext>R</mml:mtext><mml:mo>,</mml:mo><mml:mi>m</mml:mi><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>l</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mtext>R</mml:mtext><mml:mo>,</mml:mo><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(13)</label></disp-formula>
where <inline-formula id="pcbi.1007663.e050"><alternatives><graphic id="pcbi.1007663.e050g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e050" xlink:type="simple"/><mml:math display="inline" id="M50"><mml:msub><mml:mover accent="true"><mml:mi>l</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mtext>R</mml:mtext><mml:mo>,</mml:mo><mml:mi>m</mml:mi><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:math></alternatives></inline-formula> is the maximum of the restricted log likelihood for the model of <xref ref-type="disp-formula" rid="pcbi.1007663.e001">Eq 1</xref> and <inline-formula id="pcbi.1007663.e051"><alternatives><graphic id="pcbi.1007663.e051g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e051" xlink:type="simple"/><mml:math display="inline" id="M51"><mml:msub><mml:mover accent="true"><mml:mi>l</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mtext>R</mml:mtext><mml:mo>,</mml:mo><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:math></alternatives></inline-formula> is the maximum of the restricted log likelihood for the model of <xref ref-type="disp-formula" rid="pcbi.1007663.e048">Eq 12</xref>.</p>
<p>Finally, we tested the significance of <inline-formula id="pcbi.1007663.e052"><alternatives><graphic id="pcbi.1007663.e052g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e052" xlink:type="simple"/><mml:math display="inline" id="M52"><mml:msubsup><mml:mi>σ</mml:mi><mml:msub><mml:mtext>r</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mn>2</mml:mn></mml:msubsup></mml:math></alternatives></inline-formula> and calculate the <italic>p</italic>-value by assuming that the deviance in <xref ref-type="disp-formula" rid="pcbi.1007663.e049">Eq 13</xref> followed the mixture of two chi-square distributions with different degrees of freedom [<xref ref-type="bibr" rid="pcbi.1007663.ref047">47</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref048">48</xref>].
<disp-formula id="pcbi.1007663.e053"><alternatives><graphic id="pcbi.1007663.e053g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e053" xlink:type="simple"/><mml:math display="block" id="M53"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi>D</mml:mi><mml:mo>∼</mml:mo><mml:msub><mml:mi>π</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:msubsup><mml:mi>χ</mml:mi><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo>+</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>−</mml:mo><mml:msub><mml:mi>π</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:msubsup><mml:mi>χ</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(14)</label></disp-formula>
where <italic>π</italic><sub>0</sub> is the mixture parameter and here we used <italic>π</italic><sub>0</sub> = 1/2.</p>
</sec>
</sec>
<sec id="sec007">
<title>Materials and simulations</title>
<sec id="sec008">
<title>Genotype data</title>
<p>In this study, 414 accessions of <italic>Oryza sativa</italic> subsp. <italic>indica</italic> were collected from “the 3,000 rice genomes project” (<xref ref-type="supplementary-material" rid="pcbi.1007663.s002">S1 Table</xref>) [<xref ref-type="bibr" rid="pcbi.1007663.ref007">7</xref>]. We used a marker genotype consisting of core SNPs defined by the Rice SNP-Seek Database as “404k CoreSNP Dataset”. Imputations were imputed using Beagle version 5.0 [<xref ref-type="bibr" rid="pcbi.1007663.ref049">49</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref050">50</xref>]. We analyzed only bi-allelic sites over all accessions with a MAF ≥ 0.025 by using VCFtools version 0.1.15 [<xref ref-type="bibr" rid="pcbi.1007663.ref051">51</xref>]. In the following analysis, genotypes are represented as -1 (homozygous of the reference allele), 1 (homozygous of the alternative allele) or 0 (heterozygous of the reference and alternative alleles). As a result of this data processing, marker genotypes with 112,630 SNPs were used for the following simulation study.</p>
</sec>
<sec id="sec009">
<title>Estimation of haplotype block</title>
<p>To perform haplotype-based GWAS by regarding each haplotype block as a SNP-set, haplotype blocks were estimated from marker genotype data by using PLINK 1.9 [<xref ref-type="bibr" rid="pcbi.1007663.ref052">52</xref>–<xref ref-type="bibr" rid="pcbi.1007663.ref054">54</xref>]. As a result of estimation, we obtained 15,275 haplotype blocks consisting of 78,237 SNPs.</p>
</sec>
<sec id="sec010">
<title>Simulation of phenotype data</title>
<p>We considered two scenarios to validate our novel haplotype-based GWAS approach. In both models, phenotypic values were simulated as follows.
<disp-formula id="pcbi.1007663.e054"><alternatives><graphic id="pcbi.1007663.e054g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e054" xlink:type="simple"/><mml:math display="block" id="M54"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi mathvariant="bold">y</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="bold">X</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold">X</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:msub><mml:mi>β</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold">X</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:msub><mml:mi>β</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:mi mathvariant="bold">Z</mml:mi><mml:mi mathvariant="bold">u</mml:mi><mml:mo>+</mml:mo><mml:mi mathvariant="bold">e</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(15)</label></disp-formula>
where <bold>y</bold> is the vector of simulated phenotypic values of 414 accessions, <bold>X</bold><sub>1</sub>, <bold>X</bold><sub>2</sub> and <bold>X</bold><sub>3</sub> correspond to three quantitative trait nucleotides (QTNs) scored as -1, 0 or 1 (hereinafter, referred to as “QTN1”, “QTN2” and “QTN3” respectively), <italic>β</italic><sub>1</sub>, <italic>β</italic><sub>2</sub> and <italic>β</italic><sub>3</sub> are scalars representing the effects of the three QTNs, <bold>u</bold> is the vector of polygenetic effects, and <bold>e</bold> is the vector of the residuals.</p>
<p>Here, QTN1 and QTN2 were randomly selected from all genome-wide SNPs to satisfy that they belonged to the same haplotype block that harbored more than 4 SNPs. QTN3 was randomly selected from all the SNPs. We assumed that the effects of QTN1 and QTN2 had a variance 4 times greater than that of the effects of QTN3 to mainly check the detection power for the haplotype block. More details about the other terms are described in <xref ref-type="supplementary-material" rid="pcbi.1007663.s001">S1 Appendix</xref>.</p>
<p>The difference between two scenarios is based on the directions of the two QTN effects <italic>β</italic><sub>1</sub> and <italic>β</italic><sub>2</sub>. Scenario 1 assumed that the directions of two effects were identical. That is,
<disp-formula id="pcbi.1007663.e055"><alternatives><graphic id="pcbi.1007663.e055g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e055" xlink:type="simple"/><mml:math display="block" id="M55"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mtable><mml:mtr><mml:mtd columnalign="left"><mml:msub><mml:mi>β</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>ρ</mml:mi><mml:mn>12</mml:mn></mml:msub><mml:mo>≥</mml:mo><mml:mn>0</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd columnalign="left"><mml:mrow><mml:mo>−</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>ρ</mml:mi><mml:mn>12</mml:mn></mml:msub><mml:mo>&lt;</mml:mo><mml:mn>0</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo/><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(16)</label></disp-formula>
where <italic>ρ</italic><sub>12</sub> is Pearson’s correlation coefficient between <bold>X</bold><sub>1</sub> and <bold>X</bold><sub>2</sub>. We call this model as “coupling”.</p>
<p>Conversely, scenario 2 assumed that the directions of the two effects were opposite. That is,
<disp-formula id="pcbi.1007663.e056"><alternatives><graphic id="pcbi.1007663.e056g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e056" xlink:type="simple"/><mml:math display="block" id="M56"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi>β</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mtable><mml:mtr><mml:mtd columnalign="left"><mml:mrow><mml:mo>−</mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>ρ</mml:mi><mml:mn>12</mml:mn></mml:msub><mml:mo>≥</mml:mo><mml:mn>0</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd columnalign="left"><mml:msub><mml:mi>β</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>ρ</mml:mi><mml:mn>12</mml:mn></mml:msub><mml:mo>&lt;</mml:mo><mml:mn>0</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo/><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(17)</label></disp-formula></p>
<p>We call this scenario 2 as “repulsion”.</p>
</sec>
</sec>
<sec id="sec011">
<title>Evaluation of RAINBOW</title>
<sec id="sec012">
<title>Comparison of four methods</title>
<p>To validate our novel approach, we compared the following four methods: a single-SNP GWAS [<xref ref-type="bibr" rid="pcbi.1007663.ref055">55</xref>], a haplotype-based GWAS introduced by Yano <italic>et al</italic>. (hereinafter, referred to as “HGF”) [<xref ref-type="bibr" rid="pcbi.1007663.ref028">28</xref>], the SKAT [<xref ref-type="bibr" rid="pcbi.1007663.ref015">15</xref>] as a SNP-set approach, and our novel approach, RAINBOW. For all methods, to account for the population structure, the two eigen vectors (which correspond to the top two eigen values) of the additive genetic relationship matrix were included in the model as fixed effects. The details of these four methods are described in <xref ref-type="supplementary-material" rid="pcbi.1007663.s001">S1 Appendix</xref>.</p>
</sec>
<sec id="sec013">
<title>Evaluation of the simulation results</title>
<p>The value of −log<sub>10</sub>(<italic>p</italic>) of each marker or haplotype block was calculated by the four GWAS methods 100 times for the two simulated scenarios, coupling and repulsion. In this study, the following summary statistics were used to evaluate the simulation results.</p>
<p>−log<sub>10</sub>(<italic>p</italic>) <bold>and</bold> −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic></sub>). The first summary statistic is −log<sub>10</sub>(<italic>p</italic>) of each causal SNP or haplotype block itself. For haplotype-based GWAS methods, HGF, SKAT and RAINBOW, the significance of <italic>β</italic><sub>1</sub> and <italic>β</italic><sub>2</sub> was represented by −log<sub>10</sub>(<italic>p</italic>) of the causal haplotype block to which <bold>X</bold><sub>1</sub> and <bold>X</bold><sub>2</sub> belong. In the single-SNP GWAS method, the −log<sub>10</sub>(<italic>p</italic>) of <italic>β</italic><sub>1</sub> and <italic>β</italic><sub>2</sub> were calculated separately, even though these SNPs were in the same haplotype. To compare the single-SNP GWAS method with the haplotype-based GWAS methods, the −log<sub>10</sub>(<italic>p</italic>) values were averaged over <italic>β</italic><sub>1</sub> and <italic>β</italic><sub>2</sub>.</p>
<p>As some of these methods showed the results of inflated −log<sub>10</sub>(<italic>p</italic>), we defined the following summary statistic to evaluate the degree of inflation.
<disp-formula id="pcbi.1007663.e057"><alternatives><graphic id="pcbi.1007663.e057g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e057" xlink:type="simple"/><mml:math display="block" id="M57"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>f</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>L</mml:mi></mml:mfrac><mml:munderover><mml:mo>∑</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>L</mml:mi></mml:munderover><mml:mo>(</mml:mo><mml:mo>−</mml:mo><mml:msub><mml:mtext>log</mml:mtext><mml:mn>10</mml:mn></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mo>,</mml:mo><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(18)</label></disp-formula>
where <italic>p</italic><sub><italic>false</italic>,<italic>l</italic></sub> is the <italic>l</italic><sup>th</sup> <italic>p</italic>-values for false positives arranged in increasing order. In this study, <italic>L</italic> was set as 10. Then we adjusted −log<sub>10</sub>(<italic>p</italic>) of the causal by using the inflator (<xref ref-type="disp-formula" rid="pcbi.1007663.e057">Eq 18</xref>) as follows.
<disp-formula id="pcbi.1007663.e058"><alternatives><graphic id="pcbi.1007663.e058g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007663.e058" xlink:type="simple"/><mml:math display="block" id="M58"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mo>−</mml:mo><mml:msub><mml:mtext>log</mml:mtext><mml:mn>10</mml:mn></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>a</mml:mi></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>−</mml:mo><mml:msub><mml:mtext>log</mml:mtext><mml:mn>10</mml:mn></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>p</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>−</mml:mo><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>f</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(19)</label></disp-formula>
where <italic>p</italic><sub><italic>a</italic></sub> is the <italic>p</italic>-value adjusted by the inflator.</p>
<p>Here, we calculated each summary statistic in two ways. The first method is to calculate each summary statistic by directly using −log<sub>10</sub>(<italic>p</italic>) of each causal SNP / haplotype block. The other method is to calculate the summary statistics by regarding multiple SNPs or haplotype blocks within the extent of the LD as one set. In this study, we defined SNPs or haplotype blocks that satisfy the condition that they are within 300 kb from the focused SNP or haplotype block and the condition that their square of the correlation coefficients with the focused SNP or haplotype block are 0.35 or more as one set considering the LD. The highest value of −log<sub>10</sub>(<italic>p</italic>) in the LD region was assumed to represent the values of the SNPs or haplotype blocks within the extent of the LD.</p>
</sec>
<sec id="sec014">
<title>Recall, precision and F-measure</title>
<p>We calculated the recall, precision and F-measure means as other summary statistics to evaluate the GWAS results. These summary statistics were calculated from the numbers of SNPs or haplotype blocks that were true positives, false positives, false negatives and true negatives. Here, we regarded a SNP or haplotype block as “positive” when that SNP or haplotype block exceeded the threshold. In this study, the value of −log<sub>10</sub>(<italic>p</italic>) so that the FDR (false discovery rate) was 0.01 was set as the threshold by using the Benjamini-Hochberg method [<xref ref-type="bibr" rid="pcbi.1007663.ref056">56</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref057">57</xref>]. In addition, these three summary statistics, recall, precision and F-measure, were calculated by assuming that the highest value of −log<sub>10</sub>(<italic>p</italic>) in the LD region represented the values of the SNPs or haplotype blocks within the extent of the LD.</p>
<p>Therefore, recall represents the proportion of causals detected by GWAS. In contrast, precision represents the ratio of the detected SNPs or haplotype blocks that were causals. Finally, F-measure was calculated as the harmonic mean of the recall and the precision, which evaluates the GWAS results comprehensively. The greater these three summary statistics, the better the results of GWAS are. Here we simply took the average of each summary statistic from all the 100 simulation results.</p>
</sec>
<sec id="sec015">
<title>AUC for regions around causals</title>
<p>We calculated the mean of the AUC (area under the curve) for regions around the causals as a summary statistic. AUC refers to the area under the ROC (receiver operating characteristic) curve obtained by plotting the false positive rate on the horizontal axis and the true positive rate on the vertical axis when the threshold is varied. In this study, the AUC was calculated for the SNPs or haplotype blocks near the causal SNP / haplotype block (QTN1 and QTN2). In other words, the non-causal markers that had a strong LD with the causal SNP / haplotype block were regarded as false positives under this summary statistic. Therefore, this summary statistic indicates the extent to which the causal itself can be detected by GWAS without relying on the LD. Here, when taking the average of the AUCs obtained from the simulation results, two methods were used, either using all the 100 results or only using the results whose QTN1 and QTN2 were “detected”. Here, QTN1 and QTN2 were regarded as “detected” if −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic></sub>) ≥ 1.5 for each method.</p>
</sec>
</sec>
<sec id="sec016">
<title>Availability of data and material</title>
<p>RAINBOW was implemented as an R package named “RAINBOWR”, which offers the single-SNP GWAS method [<xref ref-type="bibr" rid="pcbi.1007663.ref041">41</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref055">55</xref>] and a novel SNP-set method that includes faster computation for the linear kernel [<xref ref-type="bibr" rid="pcbi.1007663.ref024">24</xref>]. A stable version of RAINBOWR is available from the CRAN (Comprehensive R Archive Network), <ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/RAINBOWR/index.html" xlink:type="simple">https://cran.r-project.org/web/packages/RAINBOWR/index.html</ext-link>. The latest version of RAINBOWR is also available from the “KosukeHamazaki/RAINBOWR” repository in the GitHub, <ext-link ext-link-type="uri" xlink:href="https://github.com/KosukeHamazaki/RAINBOWR" xlink:type="simple">https://github.com/KosukeHamazaki/RAINBOWR</ext-link>. Source codes for the R package RAINBOWR are deposited in <xref ref-type="supplementary-material" rid="pcbi.1007663.s003">S1 File</xref>. The datasets generated and analyzed during the current study and their source codes are also available from the “KosukeHamazaki/HGRAINBOW” repository in the GitHub, <ext-link ext-link-type="uri" xlink:href="https://github.com/KosukeHamazaki/HGRAINBOW" xlink:type="simple">https://github.com/KosukeHamazaki/HGRAINBOW</ext-link>.</p>
</sec>
</sec>
<sec id="sec017" sec-type="results">
<title>Results</title>
<sec id="sec018">
<title>The detection power of four methods</title>
<p>The detection power of the four methods was evaluated by the value of −log<sub>10</sub>(<italic>p</italic>) and −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic></sub>) of QTN1 and QTN2 for the two models, coupling and repulsion (<xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1</xref>). RAINBOW outperformed the other methods when the significance was evaluated by the causal itself (<xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1a, 1c, 1e and 1g</xref>). However, when the significance was evaluated by the highest values of SNPs or haplotypes within the extent of the LD, other methods, e.g., HGF (k = 2, k-medoids method), showed a greater detection power than RAINBOW (<xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1b</xref>). When the detection power was evaluated by taking the extent of inflation into account, RAINBOW showed as great a power as HGF (k = 2, 3) even if the significance was evaluated by the unit of the LD block (<xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1d</xref>). Moreover, although the detection power of all the GWAS methods for the repulsion scenario was less than that for the coupling scenario, the tendency for RAINBOW to outperform the other methods was clearer for the repulsion scenario than the coupling scenario (<xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1</xref>). Finally, as compared with the other haplotype-based GWAS methods, RAINBOW showed smaller variation among iterations, indicating that the causal variants can be stably detected (<xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1</xref>).</p>
<fig id="pcbi.1007663.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007663.g001</object-id>
<label>Fig 1</label>
<caption>
<title>The detection power of each GWAS method.</title>
<p>Boxplot of the detection power evaluated by −log<sub>10</sub>(<italic>p</italic>) and −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic></sub>). <bold>a</bold>-<bold>d</bold>: The results for the “coupling” scenario. <bold>e</bold>-<bold>h</bold>: The results for the “repulsion” scenario. <bold>a</bold>,<bold>b</bold>,<bold>e</bold>,<bold>f</bold>: The results evaluated by −log<sub>10</sub>(<italic>p</italic>) with the scale on the vertical axis aligned in these four figures. <bold>c</bold>,<bold>d</bold>,<bold>g</bold>,<bold>h</bold>: The results evaluated by −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic></sub>) with the scale on the vertical axis aligned in these four figures. <bold>a</bold>,<bold>c</bold>,<bold>e</bold>,<bold>g</bold>: The results evaluated by the unit of the causal SNP or haplotype block itself. <bold>b</bold>,<bold>d</bold>,<bold>f</bold>,<bold>h</bold>: The results evaluated by the unit of the regions within the extent of LD. The abbreviation of each method is as follows. <bold>R</bold>: RAINBOW. <bold>SS</bold>: Single-SNP GWAS. <bold>H2k</bold>-<bold>H4p</bold>: HGF methods. The numbers in the method names correspond to the numbers of the groups they assume. The last letters of the methods are “k” or “p”. “k” corresponds to the k-medoids method and “p” corresponds to UPGMA method for the grouping method. <bold>SK</bold>: SKAT.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.g001" xlink:type="simple"/>
</fig>
<p>The detection power for QTN3 was also evaluated. The single-SNP GWAS method showed a greater power than RAINBOW when evaluated by −log<sub>10</sub>(<italic>p</italic>) (a,b,e,f in <xref ref-type="supplementary-material" rid="pcbi.1007663.s005">S2 Fig</xref>). However, if the detection power was evaluated by −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic></sub>), RAINBOW showed as great a power as single-SNP GWAS (c,d,g,h in <xref ref-type="supplementary-material" rid="pcbi.1007663.s005">S2 Fig</xref>). Contrary to the results for QTN1 and QTN2, the detection power of all the GWAS methods for the repulsion scenario was greater than for the coupling scenario (<xref ref-type="supplementary-material" rid="pcbi.1007663.s005">S2 Fig</xref>).</p>
</sec>
<sec id="sec019">
<title>Recall, precision and F-measure</title>
<p>The characteristics of each GWAS method were evaluated by the recall, precision and F-measure means (<xref ref-type="fig" rid="pcbi.1007663.g002">Fig 2</xref>). For the mean of recall, the HGF methods and SKAT showed higher values than RAINBOW and single-SNP GWAS for both scenarios (<xref ref-type="fig" rid="pcbi.1007663.g002">Fig 2</xref>). However, the haplotype-based GWAS methods other than RAINBOW showed low precision. That is, these methods may cause too many false positives. In contrast, RAINBOW and single-SNP GWAS showed higher precision than the remainders, and RAINBOW showed the highest precision among all the scenarios. From the results for the three summary statistics, RAINBOW also showed the highest value for F-measure among the methods. In particular, for the repulsion scenario, the recall of RAINBOW was also higher than that of single-SNP GWAS, which resulted in the large difference of F-measure between these two methods (<xref ref-type="fig" rid="pcbi.1007663.g002">Fig 2b</xref>). A similar tendency was also confirmed when changing the criterion of how to determine the threshold for the Bonferroni’s correction [<xref ref-type="bibr" rid="pcbi.1007663.ref058">58</xref>] for the significance level <italic>α</italic> = 0.01 (<xref ref-type="supplementary-material" rid="pcbi.1007663.s006">S3 Fig</xref>).</p>
<fig id="pcbi.1007663.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007663.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Recall, precision and F-measure of each GWAS method.</title>
<p>Bar plot of the mean of each summary statistic for 100 simulation results. The red bars show the results for recall, the green bars show the results for precision, and the blue bars show the results for F-measure. <bold>a</bold>: Results for the coupling scenario. <bold>b</bold>: Results for the repulsion scenario. The abbreviations of each method are the same as those of <xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1</xref>.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.g002" xlink:type="simple"/>
</fig>
<p>To compare the three summary statistics of the two scenarios in more detail, these values for each QTN were also calculated (<xref ref-type="supplementary-material" rid="pcbi.1007663.s007">S4 Fig</xref>). For both scenarios, RAINBOW showed the highest recall for QTN1 and QTN2 among the methods (a, b in <xref ref-type="supplementary-material" rid="pcbi.1007663.s007">S4 Fig</xref>). In particular, RAINBOW outperformed the other methods in all summary statistics for QTN1 and QTN2 for the repulsion scenario. However, it showed lower recall for QTN3 than the other methods (c, d in <xref ref-type="supplementary-material" rid="pcbi.1007663.s007">S4 Fig</xref>). In particular, the recall of RAINBOW for QTN3 was 0 for the coupling scenario (c in <xref ref-type="supplementary-material" rid="pcbi.1007663.s007">S4 Fig</xref>). In addition, the three summary statistics of QTN3 for the repulsion scenario were greater than those for the coupling scenario in almost all the methods (c, d in <xref ref-type="supplementary-material" rid="pcbi.1007663.s007">S4 Fig</xref>). Regarding these results, a similar trend was confirmed even when changing the criterion of how to determine the threshold for the Bonferroni’s correction for the significance level <italic>α</italic> = 0.01 (<xref ref-type="supplementary-material" rid="pcbi.1007663.s008">S5 Fig</xref>).</p>
</sec>
<sec id="sec020">
<title>AUC for regions around causals</title>
<p>To evaluate how the causal itself can be detected by GWAS without relying on the LD, the AUC means for regions around the causals (QTN1 and QTN2) were compared (<xref ref-type="fig" rid="pcbi.1007663.g003">Fig 3</xref>). The mean of AUC was almost the same when using all simulation results or using only the cases in which QTN1 and QTN2 were detected, although the value of the latter was slightly larger than that of the former in some methods. The results show that RAINBOW outperformed the other methods in both models (<xref ref-type="fig" rid="pcbi.1007663.g003">Fig 3</xref>). Especially, the AUC mean of the single-SNP GWAS method in the repulsion scenario was much smaller than that in the coupling scenario, while RAINBOW was able to maintain a high AUC even in the repulsion scenario (<xref ref-type="fig" rid="pcbi.1007663.g003">Fig 3b</xref>).</p>
<fig id="pcbi.1007663.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007663.g003</object-id>
<label>Fig 3</label>
<caption>
<title>AUC for regions around causals.</title>
<p>Bar plot of the mean of AUC for regions around causals. This summary statistic indicates the extent to which the causal itself can be detected by GWAS without relying on the LD. The red bars show the results for the means of 100 simulation results and the blue bars show the results for the means of the simulation results whose QTN1 and QTN2 were detected. <bold>a</bold>: Results for the coupling scenario. <bold>b</bold>: Results for the repulsion scenario. The abbreviations of each method are the same as those of <xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1</xref>.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.g003" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec021">
<title>Examples in the repulsion scenario</title>
<p>Of the 100 simulations for the repulsion scenario, there were 7 cases in which QTN1 and QTN2 were detected only by RAINBOW. These cases were selected to satisfy three conditions that −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic>,R</sub>) ≥ 1.5, −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic>,O</sub>) ≤ 1.2 and the recall for QTN1 and QTN2 equals to 1. Here, <italic>p</italic><sub><italic>a</italic>,R</sub> represents the adjusted <italic>p</italic>-value of RAINBOW and <italic>p</italic><sub><italic>a</italic>,O</sub> represents the adjusted <italic>p</italic>-value of all the other methods. Although the same analysis was done for the other methods, no method satisfied the three conditions described above. One example of these cases (iteration 48) was shown by comparing the four GWAS methods; RAINBOW, single-SNP GWAS, HGF (the number of groups is 2, the grouping method is UPGMA) and SKAT (<xref ref-type="fig" rid="pcbi.1007663.g004">Fig 4</xref>). The Manhattan plot shows that RAINBOW succeeded in detecting the causal haplotype block (of QTN1 and QTN2) that was not detected by the other methods. Although both QTN1 and QTN2 were also detected by the single-SNP method in one case (iteration 85), the same trend as the results for iteration 48 was seen for the remaining five results (<xref ref-type="supplementary-material" rid="pcbi.1007663.s009">S6 Fig</xref>).</p>
<fig id="pcbi.1007663.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007663.g004</object-id>
<label>Fig 4</label>
<caption>
<title>An example of GWAS results for the repulsion scenario.</title>
<p>Manhattan plots of 4 GWAS methods (RAINBOW, Single-SNP GWAS, HGF (the number of groups is 2, the grouping method is UPGMA), and SKAT) for one simulation result of the Repulsion model. The black horizontal dashed lines represent the thresholds determined by the Benjamini-Hochberg method (FDR = 0.01) for each result of the Repulsion model. The red vertical dashed lines show the positions of QTN1, QTN2, and the purple ones show the position of QTN3. The red points show −log<sub>10</sub>(<italic>p</italic>) of causal SNPs or haplotypes including QTN1 and QTN2, and the purple ones show −log<sub>10</sub>(<italic>p</italic>) of QTN3 or haplotypes including QTN3.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.g004" xlink:type="simple"/>
</fig>
</sec>
</sec>
<sec id="sec022" sec-type="conclusions">
<title>Discussion</title>
<p>As shown in Results section, when −log<sub>10</sub>(<italic>p</italic>) was evaluated by the LD block unit for the coupling scenario, RAINBOW did not necessarily outperform the other methods. However, if we considered the inflation level of each result and evaluated the results with the −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic></sub>), RAINBOW showed as great a detection power as other methods (<xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1d</xref>), which means RAINBOW succeeded in controlling false positives compared with other haplotype-based GWAS methods. This can also be seen from the fact that the precision of RAINBOW was much higher than the other GWAS methods including single-SNP GWAS (<xref ref-type="fig" rid="pcbi.1007663.g002">Fig 2</xref>).</p>
<p>Moreover, −log<sub>10</sub>(<italic>p</italic>) of RAINBOW was the highest when evaluated by that of the causal SNP/haplotype block itself, which implies that RAINBOW can detect causal haplotype blocks themselves without relying on the LD beyond the scope of the haplotype block. This can also be confirmed by the results that showed that the AUC for the regions around the causal was larger in RAINBOW than in any other methods (<xref ref-type="fig" rid="pcbi.1007663.g003">Fig 3</xref>).</p>
<p>In addition, for the repulsion scenario, RAINBOW succeeded in detecting causal haplotype blocks that were not able to be detected by any other methods including single-SNP GWAS (<xref ref-type="fig" rid="pcbi.1007663.g004">Fig 4</xref>). This result affected other results that RAINBOW outperformed the other methods especially when evaluated by the detection power, recall, precision and F-measure in the repulsion scenario. This fact suggests that RAINBOW is good for detecting the causal haplotype block with multiple causal variants. For example, RAINBOW can be applied to the detection of genes that have more than one variant. Therefore, for future analysis, RAINBOW can be used for gene-set GWAS (which regards one gene as one SNP-set) by using gene annotation information.</p>
<p>The only drawback of RAINBOW is that the detection power for the causal with small effects (QTN3) was not so high (c,d in <xref ref-type="supplementary-material" rid="pcbi.1007663.s007">S4 Fig</xref>). The drawback may be related to the fact that RAINBOW succeeded in detecting QTN1 and QTN2 well. In other words, RAINBOW cannot account for the loci of large effects well when testing other loci, and the loci of relatively small effects may be concealed by these loci of large effects. This drawback, however, can be easily resolved by using methods that condition the loci of large effects, such as composite interval mapping for QTL analysis [<xref ref-type="bibr" rid="pcbi.1007663.ref059">59</xref>, <xref ref-type="bibr" rid="pcbi.1007663.ref060">60</xref>] or a multi-locus mixed model for GWAS [<xref ref-type="bibr" rid="pcbi.1007663.ref061">61</xref>]. For future analysis, we will implement this function to condition the loci of large effects when testing other loci of small effects.</p>
</sec>
<sec id="sec023">
<title>Supporting information</title>
<supplementary-material id="pcbi.1007663.s001" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s001" xlink:type="simple">
<label>S1 Appendix</label>
<caption>
<title>Supplementary Note for additional RAINBOW methods.</title>
<p>A faster computational method for the linear kernel and effective testing method for dominance and epistatic effects are mainly described.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007663.s002" mimetype="text/csv" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s002" xlink:type="simple">
<label>S1 Table</label>
<caption>
<title>Supplementary table for accession information used in this study.</title>
<p>(CSV)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007663.s003" mimetype="application/x-rar-compressed" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s003" xlink:type="simple">
<label>S1 File</label>
<caption>
<title>Source codes for the R package RAINBOWR.</title>
<p>Including source code and license files for the R package RAINBOWR. Please see “Readme.md” file to start the RAINBOW.</p>
<p>(RAR)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007663.s004" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s004" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>Supplementary figure for the flow chart of the simulation framework in tis study.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007663.s005" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s005" xlink:type="simple">
<label>S2 Fig</label>
<caption>
<title>Supplementary figure for −log<sub>10</sub>(<italic>p</italic>) and −log<sub>10</sub>(<italic>p</italic><sub><italic>a</italic></sub>) of QTN3 for each method.</title>
<p>How to view this figure (including legends and abbreviations) is the same as that of <xref ref-type="fig" rid="pcbi.1007663.g001">Fig 1</xref>.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007663.s006" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s006" xlink:type="simple">
<label>S3 Fig</label>
<caption>
<title>Supplementary figure for recall, precision and F-measure determined by the threshold criterion of Bonferroni correction whose significance level equals to 0.01.</title>
<p>How to view this figure (including legends and abbreviations) is the same as that of <xref ref-type="fig" rid="pcbi.1007663.g002">Fig 2</xref>.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007663.s007" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s007" xlink:type="simple">
<label>S4 Fig</label>
<caption>
<title>Supplementary figure for recall, precision and F-measure of each QTN.</title>
<p>How to view this figure (including legends and abbreviations) is the same as that of <xref ref-type="fig" rid="pcbi.1007663.g002">Fig 2</xref>.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007663.s008" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s008" xlink:type="simple">
<label>S5 Fig</label>
<caption>
<title>Supplementary figure for recall, precision and F-measure of each QTN determined by the threshold criterion of Bonferroni correction whose significance level equals to 0.01.</title>
<p>How to view this figure (including legends and abbreviations) is the same as that of <xref ref-type="fig" rid="pcbi.1007663.g002">Fig 2</xref>.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007663.s009" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s009" xlink:type="simple">
<label>S6 Fig</label>
<caption>
<title>Supplementary figures (6 pages) for the examples of the cases where only RAINBOW succeeded in detecting causals, for the repulsion scenario.</title>
<p>How to view this figure (including legends and abbreviations) is the same as that of <xref ref-type="fig" rid="pcbi.1007663.g004">Fig 4</xref>.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>We are grateful to Dr. Ryokei Tanaka and Dr. Shiori Yabe for fruitful discussions, Dr. Motoyuki Ishimori and Mr. Goshi Sasaki for debugging the package, and Mr. Ryusuke Hamazaki for naming the package, RAINBOW.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pcbi.1007663.ref001">
<label>1</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Metzker</surname> <given-names>ML</given-names></name>. <article-title>Sequencing technologies the next generation</article-title>. <source>Nat Rev Genet</source>. <year>2010</year>;<volume>11</volume>(<issue>1</issue>):<fpage>31</fpage>–<lpage>46</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nrg2626" xlink:type="simple">10.1038/nrg2626</ext-link></comment> <object-id pub-id-type="pmid">19997069</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref002">
<label>2</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Koboldt</surname> <given-names>DC</given-names></name>, <name name-style="western"><surname>Steinberg</surname> <given-names>KM</given-names></name>, <name name-style="western"><surname>Larson</surname> <given-names>DE</given-names></name>, <name name-style="western"><surname>Wilson</surname> <given-names>RK</given-names></name>, <name name-style="western"><surname>Mardis</surname> <given-names>ER</given-names></name>. <article-title>The next-generation sequencing revolution and its impact on genomics</article-title>. <source>Cell</source>. <year>2013</year>;<volume>155</volume>(<issue>1</issue>):<fpage>27</fpage>–<lpage>38</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cell.2013.09.006" xlink:type="simple">10.1016/j.cell.2013.09.006</ext-link></comment> <object-id pub-id-type="pmid">24074859</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref003">
<label>3</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Ott</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Leal</surname> <given-names>SM</given-names></name>. <article-title>Genetic linkage analysis in the age of whole-genome sequencing</article-title>. <source>Nat Rev Genet</source>. <year>2015</year>;<volume>16</volume>(<issue>5</issue>):<fpage>275</fpage>–<lpage>284</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nrg3908" xlink:type="simple">10.1038/nrg3908</ext-link></comment> <object-id pub-id-type="pmid">25824869</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref004">
<label>4</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Atwell</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Huang</surname> <given-names>YS</given-names></name>, <name name-style="western"><surname>Vilhjálmsson</surname> <given-names>BJ</given-names></name>, <name name-style="western"><surname>Willems</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Horton</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>Y</given-names></name>, <etal>et al</etal>. <article-title>Genome-wide association study of 107 phenotypes in Arabidopsis thaliana inbred lines</article-title>. <source>Nature</source>. <year>2010</year>;<volume>465</volume>(<issue>7298</issue>):<fpage>627</fpage>–<lpage>631</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nature08800" xlink:type="simple">10.1038/nature08800</ext-link></comment> <object-id pub-id-type="pmid">20336072</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref005">
<label>5</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Huang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Wei</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Sang</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Zhao</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Feng</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Zhao</surname> <given-names>Y</given-names></name>, <etal>et al</etal>. <article-title>Genome-wide asociation studies of 14 agronomic traits in rice landraces</article-title>. <source>Nat Genet</source>. <year>2010</year>;<volume>42</volume>(<issue>11</issue>):<fpage>961</fpage>–<lpage>967</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ng.695" xlink:type="simple">10.1038/ng.695</ext-link></comment> <object-id pub-id-type="pmid">20972439</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref006">
<label>6</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Korte</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Farlow</surname> <given-names>A</given-names></name>. <article-title>The advantages and limitations of trait analysis with GWAS: a review</article-title>. <source>Plant Methods</source>. <year>2013</year>;<volume>9</volume>(<issue>1</issue>):<fpage>29</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1746-4811-9-29" xlink:type="simple">10.1186/1746-4811-9-29</ext-link></comment> <object-id pub-id-type="pmid">23876160</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref007">
<label>7</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Li</surname> <given-names>JY</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Zeigler</surname> <given-names>RS</given-names></name>. <article-title>The 3,000 rice genomes project: New opportunities and challenges for future rice research</article-title>. <source>GigaScience</source>. <year>2014</year>;<volume>3</volume>(<issue>1</issue>):<fpage>1</fpage>–<lpage>3</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/2047-217X-3-8" xlink:type="simple">10.1186/2047-217X-3-8</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref008">
<label>8</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Wang</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Mauleon</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Hu</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Chebotarov</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Tai</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Wu</surname> <given-names>Z</given-names></name>, <etal>et al</etal>. <article-title>Genomic variation in 3,010 diverse accessions of Asian cultivated rice</article-title>. <source>Nature</source>. <year>2018</year>;<volume>557</volume>(<issue>7703</issue>):<fpage>43</fpage>–<lpage>49</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41586-018-0063-9" xlink:type="simple">10.1038/s41586-018-0063-9</ext-link></comment> <object-id pub-id-type="pmid">29695866</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref009">
<label>9</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Alexandrov</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Tai</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Mansueto</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Palis</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Fuentes</surname> <given-names>RR</given-names></name>, <etal>et al</etal>. <article-title>SNP-Seek database of SNPs derived from 3000 rice genomes</article-title>. <source>Nucleic Acids Res</source>. <year>2015</year>;<volume>43</volume>(<issue>D1</issue>):<fpage>D1023</fpage>–<lpage>D1027</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gku1039" xlink:type="simple">10.1093/nar/gku1039</ext-link></comment> <object-id pub-id-type="pmid">25429973</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref010">
<label>10</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Mansueto</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Fuentes</surname> <given-names>RR</given-names></name>, <name name-style="western"><surname>Chebotarov</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Borja</surname> <given-names>FN</given-names></name>, <name name-style="western"><surname>Detras</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Abriol-Santos</surname> <given-names>JM</given-names></name>, <etal>et al</etal>. <article-title>SNP-Seek II: A resource for allele mining and analysis of big genomic data in Oryza sativa</article-title>. <source>Curr Plant Biol</source>. <year>2016</year>;<volume>7-8</volume>:<fpage>16</fpage>–<lpage>25</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cpb.2016.12.003" xlink:type="simple">10.1016/j.cpb.2016.12.003</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref011">
<label>11</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Mansueto</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Fuentes</surname> <given-names>RR</given-names></name>, <name name-style="western"><surname>Borja</surname> <given-names>FN</given-names></name>, <name name-style="western"><surname>Detras</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Abrio-Santos</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Chebotarov</surname> <given-names>D</given-names></name>, <etal>et al</etal>. <article-title>Rice SNP-seek database update: New SNPs, indels, and queries</article-title>. <source>Nucleic Acids Res</source>. <year>2017</year>;<volume>45</volume>(<issue>D1</issue>):<fpage>D1075</fpage>–<lpage>D1081</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkw1135" xlink:type="simple">10.1093/nar/gkw1135</ext-link></comment> <object-id pub-id-type="pmid">27899667</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref012">
<label>12</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Misra</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Badoni</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Anacleto</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Graner</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Alexandrov</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Sreenivasulu</surname> <given-names>N</given-names></name>. <article-title>Whole genome sequencing-based association study to unravel genetic architecture of cooked grain width and length traits in rice</article-title>. <source>Nat Sci Reports</source>. <year>2017</year>;<volume>7</volume>(<issue>1</issue>):<fpage>12478</fpage>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref013">
<label>13</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Dickson</surname> <given-names>SP</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Krantz</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Hakonarson</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Goldstein</surname> <given-names>DB</given-names></name>. <article-title>Rare Variants Create Synthetic Genome-Wide Associations</article-title>. <source>PLoS Biol</source>. <year>2010</year>;<volume>8</volume>(<issue>1</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pbio.1000294" xlink:type="simple">10.1371/journal.pbio.1000294</ext-link></comment> <object-id pub-id-type="pmid">20126254</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref014">
<label>14</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Stram</surname> <given-names>D</given-names></name>. <source>Design, Analysis, and Interpretation of Genome-Wide Association Scans</source>. <publisher-loc>Heidelberg, New York</publisher-loc>: <publisher-name>Springer Science+Business Media</publisher-name>; <year>2014</year>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref015">
<label>15</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Wu</surname> <given-names>MC</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Cai</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Boehnke</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Lin</surname> <given-names>X</given-names></name>. <article-title>Rare-variant association testing for sequencing data with the sequence kernel association test</article-title>. <source>Am J Hum Genet</source>. <year>2011</year>;<volume>89</volume>(<issue>1</issue>):<fpage>82</fpage>–<lpage>93</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ajhg.2011.05.029" xlink:type="simple">10.1016/j.ajhg.2011.05.029</ext-link></comment> <object-id pub-id-type="pmid">21737059</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref016">
<label>16</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Liu</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Lin</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Ghosh</surname> <given-names>D</given-names></name>. <article-title>Semiparametric regression of multidimensional genetic pathway data: Least-squares kernel machines and linear mixed models</article-title>. <source>Biometrics</source>. <year>2007</year>;<volume>63</volume>(<issue>4</issue>):<fpage>1079</fpage>–<lpage>1088</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/j.1541-0420.2007.00799.x" xlink:type="simple">10.1111/j.1541-0420.2007.00799.x</ext-link></comment> <object-id pub-id-type="pmid">18078480</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref017">
<label>17</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Liu</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Ghosh</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Lin</surname> <given-names>X</given-names></name>. <article-title>Estimation and testing for the effect of a genetic pathway on a disease outcome using logistic kernel machine regression via logistic mixed models</article-title>. <source>BMC Bioinformatics</source>. <year>2008</year>;<volume>9</volume>:<fpage>1</fpage>–<lpage>11</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1471-2105-9-292" xlink:type="simple">10.1186/1471-2105-9-292</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref018">
<label>18</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Sha</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Zhang</surname> <given-names>S</given-names></name>. <article-title>Detecting Association of Rare and Common Variants by Testing an Optimally Weighted Combination of Variants</article-title>. <source>Genet Epidemiol</source>. <year>2012</year>;<volume>36</volume>(<issue>6</issue>):<fpage>561</fpage>–<lpage>571</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/gepi.21649" xlink:type="simple">10.1002/gepi.21649</ext-link></comment> <object-id pub-id-type="pmid">22714994</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref019">
<label>19</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Ionita-Laza</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Makarov</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Buxbaum</surname> <given-names>JD</given-names></name>, <name name-style="western"><surname>Lin</surname> <given-names>X</given-names></name>. <article-title>Sequence kernel association tests for the combined effect of rare and common variants</article-title>. <source>Am J Hum Genet</source>. <year>2013</year>;<volume>92</volume>(<issue>6</issue>):<fpage>841</fpage>–<lpage>853</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ajhg.2013.04.015" xlink:type="simple">10.1016/j.ajhg.2013.04.015</ext-link></comment> <object-id pub-id-type="pmid">23684009</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref020">
<label>20</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Schifano</surname> <given-names>ED</given-names></name>, <name name-style="western"><surname>Epstein</surname> <given-names>MP</given-names></name>, <name name-style="western"><surname>Bielak</surname> <given-names>LF</given-names></name>, <name name-style="western"><surname>Jhun</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Kardia</surname> <given-names>SLR</given-names></name>, <name name-style="western"><surname>Peyser</surname> <given-names>PA</given-names></name>, <etal>et al</etal>. <article-title>SNP Set Association Analysis for Familial Data</article-title>. <source>Genet Epidemiol</source>. <year>2012</year>;<volume>36</volume>(<issue>8</issue>):<fpage>797</fpage>–<lpage>810</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/gepi.21676" xlink:type="simple">10.1002/gepi.21676</ext-link></comment> <object-id pub-id-type="pmid">22968922</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref021">
<label>21</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Chen</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Meigs</surname> <given-names>JB</given-names></name>, <name name-style="western"><surname>Dupuis</surname> <given-names>J</given-names></name>. <article-title>Sequence Kernel Association Test for Quantitative Traits in Family Samples</article-title>. <source>Genet Epidemiol</source>. <year>2013</year>;<volume>37</volume>(<issue>2</issue>):<fpage>196</fpage>–<lpage>204</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/gepi.21703" xlink:type="simple">10.1002/gepi.21703</ext-link></comment> <object-id pub-id-type="pmid">23280576</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref022">
<label>22</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Oualkacha</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Dastani</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Cingolani</surname> <given-names>PE</given-names></name>, <name name-style="western"><surname>Spector</surname> <given-names>TD</given-names></name>, <name name-style="western"><surname>Hammond</surname> <given-names>CJ</given-names></name>, <etal>et al</etal>. <article-title>Adjusted Sequence Kernel Association Test for Rare Variants Controlling for Cryptic and Family Relatedness</article-title>. <source>Genet Epidemiol</source>. <year>2013</year>;<volume>37</volume>(<issue>4</issue>):<fpage>366</fpage>–<lpage>376</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/gepi.21725" xlink:type="simple">10.1002/gepi.21725</ext-link></comment> <object-id pub-id-type="pmid">23529756</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref023">
<label>23</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Rao</surname> <given-names>CR</given-names></name>. <article-title>Large sample tests of statistical hypotheses concerning several parameters with applications to problems of estimation</article-title>. <source>Math Proc Cambridge Philos Soc</source>. <year>1948</year>;<volume>44</volume>(<issue>1</issue>):<fpage>50</fpage>–<lpage>57</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1017/S0305004100023987" xlink:type="simple">10.1017/S0305004100023987</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref024">
<label>24</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Lippert</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Xiang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Horta</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Widmer</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Kadie</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Heckerman</surname> <given-names>D</given-names></name>, <etal>et al</etal>. <article-title>Greater power and computational efficiency for kernel-based association testing of sets of genetic variants</article-title>. <source>Bioinformatics</source>. <year>2014</year>;<volume>30</volume>(<issue>22</issue>):<fpage>3206</fpage>–<lpage>3214</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btu504" xlink:type="simple">10.1093/bioinformatics/btu504</ext-link></comment> <object-id pub-id-type="pmid">25075117</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref025">
<label>25</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Neyman</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Pearson</surname> <given-names>ES</given-names></name>. <article-title>On the Use and Interpretation of Certain Test Criteria for Purposes of Statistical Inference</article-title>. <source>Biometrika</source>. <year>1928</year>;<volume>20A</volume>(<issue>1-2</issue>):<fpage>175</fpage>–<lpage>240</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/biomet/20A.1-2.175" xlink:type="simple">10.1093/biomet/20A.1-2.175</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref026">
<label>26</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Wilks</surname> <given-names>SS</given-names></name>. <article-title>The Large-Sample Distribution of the Likelihood Ratio for Testing Composite Hypotheses</article-title>. <source>Ann Math Stat</source>. <year>1938</year>;<volume>9</volume>(<issue>1</issue>):<fpage>60</fpage>–<lpage>62</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1214/aoms/1177732360" xlink:type="simple">10.1214/aoms/1177732360</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref027">
<label>27</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Listgarten</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Lippert</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Kang</surname> <given-names>EY</given-names></name>, <name name-style="western"><surname>Xiang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Kadie</surname> <given-names>CM</given-names></name>, <name name-style="western"><surname>Heckerman</surname> <given-names>D</given-names></name>. <article-title>A powerful and efficient set test for genetic markers that handles confounders</article-title>. <source>Bioinformatics</source>. <year>2013</year>;<volume>29</volume>(<issue>12</issue>):<fpage>1526</fpage>–<lpage>1533</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btt177" xlink:type="simple">10.1093/bioinformatics/btt177</ext-link></comment> <object-id pub-id-type="pmid">23599503</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref028">
<label>28</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Yano</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Yamamoto</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Aya</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Takeuchi</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Lo</surname> <given-names>PC</given-names></name>, <name name-style="western"><surname>Hu</surname> <given-names>L</given-names></name>, <etal>et al</etal>. <article-title>Genome-wide association study using whole-genome sequencing rapidly identifies new genes influencing agronomic traits in rice</article-title>. <source>Nat Genet</source>. <year>2016</year>;<volume>48</volume>(<issue>8</issue>):<fpage>927</fpage>–<lpage>934</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ng.3596" xlink:type="simple">10.1038/ng.3596</ext-link></comment> <object-id pub-id-type="pmid">27322545</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref029">
<label>29</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Druet</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Georges</surname> <given-names>M</given-names></name>. <article-title>A hidden Markov model combining linkage and linkage disequilibrium information for haplotype reconstruction and quantitative trait locus fine mapping</article-title>. <source>Genetics</source>. <year>2010</year>;<volume>184</volume>(<issue>3</issue>):<fpage>789</fpage>–<lpage>798</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1534/genetics.109.108431" xlink:type="simple">10.1534/genetics.109.108431</ext-link></comment> <object-id pub-id-type="pmid">20008575</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref030">
<label>30</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zhang</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Guillaume</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Sartelet</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Charlier</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Georges</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Farnir</surname> <given-names>F</given-names></name>, <etal>et al</etal>. <article-title>Ancestral haplotype-based association mapping with generalized linear mixed models accounting for stratification</article-title>. <source>Bioinformatics</source>. <year>2012</year>;<volume>28</volume>(<issue>19</issue>):<fpage>2467</fpage>–<lpage>2473</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/bts348" xlink:type="simple">10.1093/bioinformatics/bts348</ext-link></comment> <object-id pub-id-type="pmid">22711794</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref031">
<label>31</label>
<mixed-citation publication-type="book" xlink:type="simple">
<collab>R Core Team</collab>. <source>R: A Language and Environment for Statistical Computing</source>; <year>2019</year>. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/" xlink:type="simple">https://www.R-project.org/</ext-link>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref032">
<label>32</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Wickham</surname> <given-names>H</given-names></name>. <source>ggplot2: Elegant Graphics for Data Analysis.</source> <publisher-name>Springer-Verlag</publisher-name> <publisher-loc>New York</publisher-loc>; <year>2016</year>. Available from: <ext-link ext-link-type="uri" xlink:href="https://ggplot2.tidyverse.org" xlink:type="simple">https://ggplot2.tidyverse.org</ext-link>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref033">
<label>33</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Eddelbuettel</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>François</surname> <given-names>R</given-names></name>. <article-title>Rcpp: Seamless R and C++ Integration</article-title>. <source>Journal of Statistical Software</source>. <year>2011</year>;<volume>40</volume>(<issue>8</issue>):<fpage>1</fpage>–<lpage>18</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.18637/jss.v040.i08" xlink:type="simple">10.18637/jss.v040.i08</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref034">
<label>34</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Eddelbuettel</surname> <given-names>D</given-names></name>. <source>Seamless R and C++ Integration with Rcpp</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Springer</publisher-name>; <year>2013</year>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref035">
<label>35</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Eddelbuettel</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Balamuta</surname> <given-names>JJ</given-names></name>. <article-title>Extending extitR with extitC++: A Brief Introduction to extitRcpp</article-title>. <source>PeerJ Preprints</source>. <year>2017</year>;<volume>5</volume>:<fpage>e3188v1</fpage>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref036">
<label>36</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Bates</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Eddelbuettel</surname> <given-names>D</given-names></name>. <article-title>Fast and Elegant Numerical Linear Algebra Using the RcppEigen Package</article-title>. <source>Journal of Statistical Software</source>. <year>2013</year>;<volume>52</volume>(<issue>5</issue>):<fpage>1</fpage>–<lpage>24</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.18637/jss.v052.i05" xlink:type="simple">10.18637/jss.v052.i05</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref037">
<label>37</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Endelman</surname> <given-names>JB</given-names></name>, <name name-style="western"><surname>Jannink</surname> <given-names>JL</given-names></name>. <article-title>Shrinkage Estimation of the Realized Relationship Matrix</article-title>. <source>G3 (Bethesda)</source>. <year>2012</year>;<volume>2</volume>(<issue>11</issue>):<fpage>1405</fpage>–<lpage>1413</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1534/g3.112.004259" xlink:type="simple">10.1534/g3.112.004259</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref038">
<label>38</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Edgeworth</surname> <given-names>FY</given-names></name>. <article-title>On the Probable Errors of Frequency-Constants (Contd.)</article-title>. <source>J R Stat Soc</source>. <year>1908</year>;<volume>71</volume>(<issue>3</issue>):<fpage>499</fpage>–<lpage>512</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2307/2339293" xlink:type="simple">10.2307/2339293</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref039">
<label>39</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Patterson</surname> <given-names>HD</given-names></name>, <name name-style="western"><surname>Thompson</surname> <given-names>R</given-names></name>. <article-title>Recovery of inter-block information when block sizes are unequal</article-title>. <source>Biometrika</source>. <year>1971</year>;<volume>58</volume>(<issue>3</issue>):<fpage>545</fpage>–<lpage>554</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/biomet/58.3.545" xlink:type="simple">10.1093/biomet/58.3.545</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref040">
<label>40</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Kang</surname> <given-names>HM</given-names></name>, <name name-style="western"><surname>Zaitlen</surname> <given-names>NA</given-names></name>, <name name-style="western"><surname>Wade</surname> <given-names>CM</given-names></name>, <name name-style="western"><surname>Kirby</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Heckerman</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Daly</surname> <given-names>MJ</given-names></name>, <etal>et al</etal>. <article-title>Efficient Control of Population Structure in Model Organism Association Mapping</article-title>. <source>Genetics</source>. <year>2008</year>;<volume>178</volume>(<issue>3</issue>):<fpage>1709</fpage>–<lpage>1723</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1534/genetics.107.080101" xlink:type="simple">10.1534/genetics.107.080101</ext-link></comment> <object-id pub-id-type="pmid">18385116</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref041">
<label>41</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zhou</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Stephens</surname> <given-names>M</given-names></name>. <article-title>Genome-wide efficient mixed-model analysis for association studies</article-title>. <source>Nat Genet</source>. <year>2012</year>;<volume>44</volume>(<issue>7</issue>):<fpage>821</fpage>–<lpage>824</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ng.2310" xlink:type="simple">10.1038/ng.2310</ext-link></comment> <object-id pub-id-type="pmid">22706312</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref042">
<label>42</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Byrd</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Lu</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Nocedal</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Zhu</surname> <given-names>C</given-names></name>. <article-title>A Limited Memory Algorithm for Bound Constrained Optimization</article-title>. <source>SIAM Journal on Scientific Computing</source>. <year>1995</year>;<volume>16</volume>(<issue>5</issue>):<fpage>1190</fpage>–<lpage>1208</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1137/0916069" xlink:type="simple">10.1137/0916069</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref043">
<label>43</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Gilmour</surname> <given-names>AR</given-names></name>, <name name-style="western"><surname>Thompson</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Cullis</surname> <given-names>BR</given-names></name>. <article-title>Average Information REML: An Efficient Algorithm for Variance Parameter Estimation in Linear Mixed Models</article-title>. <source>Biometrics</source>. <year>1995</year>;<volume>51</volume>(<issue>4</issue>):<fpage>1440</fpage>–<lpage>1450</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2307/2533274" xlink:type="simple">10.2307/2533274</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref044">
<label>44</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Yang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>SH</given-names></name>, <name name-style="western"><surname>Goddard</surname> <given-names>ME</given-names></name>, <name name-style="western"><surname>Visscher</surname> <given-names>PM</given-names></name>. <article-title>GCTA: A tool for genome-wide complex trait analysis</article-title>. <source>Am J Hum Genet</source>. <year>2011</year>;<volume>88</volume>(<issue>1</issue>):<fpage>76</fpage>–<lpage>82</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ajhg.2010.11.011" xlink:type="simple">10.1016/j.ajhg.2010.11.011</ext-link></comment> <object-id pub-id-type="pmid">21167468</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref045">
<label>45</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Covarrubias-Pazaran</surname> <given-names>G</given-names></name>. <article-title>Genome-Assisted prediction of quantitative traits using the r package sommer</article-title>. <source>PLoS One</source>. <year>2016</year>;<volume>11</volume>(<issue>6</issue>):<fpage>1</fpage>–<lpage>15</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0156744" xlink:type="simple">10.1371/journal.pone.0156744</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref046">
<label>46</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Wang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Do</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Woznica</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Kalousis</surname> <given-names>A</given-names></name>. <chapter-title>Metric learning with multiple kernels</chapter-title>. <source>Adv Neural Inf Process Syst</source>. <year>2011</year>; p. <fpage>1170</fpage>–<lpage>1178</lpage>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref047">
<label>47</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Self</surname> <given-names>SG</given-names></name>, <name name-style="western"><surname>Liang</surname> <given-names>KY</given-names></name>. <article-title>Asymptotic properties of maximum likelihood estimators and likelihood ratio tests under nonstandard conditions</article-title>. <source>J Am Stat Assoc</source>. <year>1987</year>;<volume>82</volume>(<issue>398</issue>):<fpage>605</fpage>–<lpage>610</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/01621459.1987.10478472" xlink:type="simple">10.1080/01621459.1987.10478472</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref048">
<label>48</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Stram</surname> <given-names>DO</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>JW</given-names></name>. <article-title>Variance Components Testing in the Longitudinal Mixed Effects Model</article-title>. <source>Biometrics</source>. <year>1994</year>;<volume>50</volume>(<issue>4</issue>):<fpage>1171</fpage>–<lpage>1177</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2307/2533455" xlink:type="simple">10.2307/2533455</ext-link></comment> <object-id pub-id-type="pmid">7786999</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref049">
<label>49</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Browning</surname> <given-names>SR</given-names></name>, <name name-style="western"><surname>Browning</surname> <given-names>BL</given-names></name>. <article-title>Rapid and Accurate Haplotype Phasing and Missing-Data Inference for Whole-Genome Association Studies By Use of Localized Haplotype Clustering</article-title>. <source>Am J Hum Genet</source>. <year>2007</year>;<volume>81</volume>(<issue>5</issue>):<fpage>1084</fpage>–<lpage>1097</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1086/521987" xlink:type="simple">10.1086/521987</ext-link></comment> <object-id pub-id-type="pmid">17924348</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref050">
<label>50</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Browning</surname> <given-names>BL</given-names></name>, <name name-style="western"><surname>Zhou</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Browning</surname> <given-names>SR</given-names></name>. <article-title>A One-Penny Imputed Genome from Next-Generation Reference Panels</article-title>. <source>Am J Hum Genet</source>. <year>2018</year>;<volume>103</volume>(<issue>3</issue>):<fpage>338</fpage>–<lpage>348</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ajhg.2018.07.015" xlink:type="simple">10.1016/j.ajhg.2018.07.015</ext-link></comment> <object-id pub-id-type="pmid">30100085</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref051">
<label>51</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Danecek</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Auton</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Abecasis</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Albers</surname> <given-names>CA</given-names></name>, <name name-style="western"><surname>Banks</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>DePristo</surname> <given-names>MA</given-names></name>, <etal>et al</etal>. <article-title>The variant call format and VCFtools</article-title>. <source>Bioinformatics</source>. <year>2011</year>;<volume>27</volume>(<issue>15</issue>):<fpage>2156</fpage>–<lpage>2158</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btr330" xlink:type="simple">10.1093/bioinformatics/btr330</ext-link></comment> <object-id pub-id-type="pmid">21653522</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref052">
<label>52</label>
<mixed-citation publication-type="other" xlink:type="simple">Purcell S, Chang C. PLINK 1.9; 2018. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.cog-genomics.org/plink/1.9/" xlink:type="simple">https://www.cog-genomics.org/plink/1.9/</ext-link>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref053">
<label>53</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Gaunt</surname> <given-names>TR</given-names></name>, <name name-style="western"><surname>Rodríguez</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Day</surname> <given-names>INM</given-names></name>. <article-title>Cubic exact solutions for the estimation of pairwise haplotype frequencies: Implications for linkage disequilibrium analyses and a web tool’CubeX’</article-title>. <source>BMC Bioinformatics</source>. <year>2007</year>;<volume>8</volume>:<fpage>1</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1471-2105-8-428" xlink:type="simple">10.1186/1471-2105-8-428</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref054">
<label>54</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Taliun</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Gamper</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Pattaro</surname> <given-names>C</given-names></name>. <article-title>Efficient haplotype block recognition of very long and dense genetic sequences</article-title>. <source>BMC Bioinformatics</source>. <year>2014</year>;<volume>15</volume>(<issue>1</issue>):<fpage>1</fpage>–<lpage>18</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1471-2105-15-10" xlink:type="simple">10.1186/1471-2105-15-10</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref055">
<label>55</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Yu</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Pressoir</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Briggs</surname> <given-names>WH</given-names></name>, <name name-style="western"><surname>Vroh Bi</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Yamasaki</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Doebley</surname> <given-names>JF</given-names></name>, <etal>et al</etal>. <article-title>A unified mixed-model method for association mapping that accounts for multiple levels of relatedness</article-title>. <source>Nat Genet</source>. <year>2006</year>;<volume>38</volume>(<issue>2</issue>):<fpage>203</fpage>–<lpage>8</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ng1702" xlink:type="simple">10.1038/ng1702</ext-link></comment> <object-id pub-id-type="pmid">16380716</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref056">
<label>56</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Benjamini</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Hochberg</surname> <given-names>Y</given-names></name>. <article-title>Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing Author (s): Yoav Benjamini and Yosef Hochberg Source: Journal of the Royal Statistical Society</article-title>. <source>Series B (Methodological), Vol. 57, No. 1 Published by: Wi. J R Stat Soc Ser B</source>. <year>1995</year>;<volume>57</volume>(<issue>1</issue>):<fpage>289</fpage>–<lpage>300</lpage>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref057">
<label>57</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Storey</surname> <given-names>JD</given-names></name>, <name name-style="western"><surname>Tibshirani</surname> <given-names>R</given-names></name>. <article-title>Statistical significance for genomewide studies</article-title>. <source>Proc Natl Acad Sci</source>. <year>2003</year>;<volume>100</volume>(<issue>16</issue>):<fpage>9440</fpage>–<lpage>9445</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1530509100" xlink:type="simple">10.1073/pnas.1530509100</ext-link></comment> <object-id pub-id-type="pmid">12883005</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref058">
<label>58</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Bland</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Altman</surname> <given-names>DG</given-names></name>. <article-title>Multiple significance tests: The Bonferroni correction</article-title>. <source>BMJ</source>. <year>1995</year>;<volume>310</volume>:<fpage>170</fpage>.</mixed-citation>
</ref>
<ref id="pcbi.1007663.ref059">
<label>59</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zeng</surname> <given-names>ZB</given-names></name>. <article-title>Theoretical basis for separation of multiple linked gene effects in mapping quantitative trait loci</article-title>. <source>Proc Natl Acad Sci</source>. <year>1993</year>;<volume>90</volume>(<issue>23</issue>):<fpage>10972</fpage>–<lpage>10976</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.90.23.10972" xlink:type="simple">10.1073/pnas.90.23.10972</ext-link></comment> <object-id pub-id-type="pmid">8248199</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref060">
<label>60</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zeng</surname> <given-names>ZB</given-names></name>. <article-title>Precision Mapping of Quantitative Trait Loci</article-title>. <source>Genetics</source>. <year>1994</year>;<volume>136</volume>:<fpage>1457</fpage>–<lpage>1468</lpage>. <object-id pub-id-type="pmid">8013918</object-id></mixed-citation>
</ref>
<ref id="pcbi.1007663.ref061">
<label>61</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Segura</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Vilhjálmsson</surname> <given-names>BJ</given-names></name>, <name name-style="western"><surname>Platt</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Korte</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Seren</surname> <given-names>Ü</given-names></name>, <name name-style="western"><surname>Long</surname> <given-names>Q</given-names></name>, <etal>et al</etal>. <article-title>An efficient multi-locus mixed-model approach for genome-wide association studies in structured populations</article-title>. <source>Nat Genet</source>. <year>2012</year>;<volume>44</volume>(<issue>7</issue>):<fpage>825</fpage>–<lpage>830</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ng.2314" xlink:type="simple">10.1038/ng.2314</ext-link></comment> <object-id pub-id-type="pmid">22706313</object-id></mixed-citation>
</ref>
</ref-list>
</back>
<sub-article article-type="aggregated-review-documents" id="pcbi.1007663.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007663.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Pertea</surname>
<given-names>Mihaela</given-names>
</name>
<role>Software Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Mihaela Pertea</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1007663" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">11 Nov 2019</named-content>
</p>
<p>Dear Dr Iwata,</p>
<p>Thank you very much for submitting your manuscript 'RAINBOW: Haplotype-based genome-wide association study using a novel SNP-set method' for review by PLOS Computational Biology. Your manuscript has been fully evaluated by the PLOS Computational Biology editorial team and in this case also by independent peer reviewers. The reviewers appreciated the attention to an important problem, but raised some substantial concerns about the manuscript as it currently stands. While your manuscript cannot be accepted in its present form, we are willing to consider a revised version in which the issues raised by the reviewers have been adequately addressed. We cannot, of course, promise publication at that time.</p>
<p>Please note while forming your response, if your article is accepted, you may have the opportunity to make the peer review history publicly available. The record will include editor decision letters (with reviews) and your responses to reviewer comments. If eligible, we will contact you to opt in or out.</p>
<p>Your revisions should address the specific points made by each reviewer. Please return the revised version within the next 60 days. If you anticipate any delay in its return, we ask that you let us know the expected resubmission date by email at <email xlink:type="simple">ploscompbiol@plos.org</email>. Revised manuscripts received beyond 60 days may require evaluation and peer review similar to that applied to newly submitted manuscripts.</p>
<p>In addition, when you are ready to resubmit, please be prepared to provide the following:</p>
<p>(1) A detailed list of your responses to the review comments and the changes you have made in the manuscript. We require a file of this nature before your manuscript is passed back to the editors.</p>
<p>(2) A copy of your manuscript with the changes highlighted (encouraged). We encourage authors, if possible to show clearly where changes have been made to their manuscript e.g. by highlighting text.</p>
<p>(3) A striking still image to accompany your article (optional). If the image is judged to be suitable by the editors, it may be featured on our website and might be chosen as the issue image for that month. These square, high-quality images should be accompanied by a short caption. Please note as well that there should be no copyright restrictions on the use of the image, so that it can be published under the Open-Access license and be subject only to appropriate attribution.</p>
<p>Before you resubmit your manuscript, please consult our Submission Checklist to ensure your manuscript is formatted correctly for PLOS Computational Biology: <ext-link ext-link-type="uri" xlink:href="http://www.ploscompbiol.org/static/checklist.action" xlink:type="simple">http://www.ploscompbiol.org/static/checklist.action</ext-link>. Some key points to remember are:</p>
<p>- Figures uploaded separately as TIFF or EPS files (if you wish, your figures may remain in your main manuscript file in addition).</p>
<p>- Supporting Information uploaded as separate files, titled Dataset, Figure, Table, Text, Protocol, Audio, or Video.</p>
<p>- Funding information in the 'Financial Disclosure' box in the online system.</p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com" xlink:type="simple">https://pacev2.apexcovantage.com</ext-link> PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email us at <email xlink:type="simple">figures@plos.org</email>.</p>
<p>To enhance the reproducibility of your results, we recommend that you deposit your laboratory protocols in protocols.io, where a protocol can be assigned its own identifier (DOI) such that it can be cited independently in the future. For instructions see <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/submission-guidelines#loc-materials-and-methods" xlink:type="simple">here</ext-link>. </p>
<p>We are sorry that we cannot be more positive about your manuscript at this stage, but if you have any concerns or questions, please do not hesitate to contact us.</p>
<p>Sincerely,</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>A link appears below if there are any accompanying review attachments. If you believe any reviews to be missing, please contact <email xlink:type="simple">ploscompbiol@plos.org</email> immediately:</p>
<p>[LINK]</p>
<p>Reviewer's Responses to Questions</p>
<p><bold>Comments to the Authors:</bold></p>
<p><bold>Please note here if the review is uploaded as an attachment.</bold></p>
<p>Reviewer #1: This is a very interesting research that attempts to improve rare variants detection of conventional GWAS-SNPs models via haplotypes. The proposed method performs as good as the conventional methods for controlling false positives; however, it shows to outperform the other models detecting causal induced variants that were not identified with the conventional models. Also, one of the advantages of this proposed model is that it does not rely on LD when causal variants are also genotyped.</p>
<p>In general the materials and methods, Results and Discussion sections are well written; however, the abstract and introduction sections needs some improvements. Especially for describing better the scope and implications of the results of the proposed method.</p>
<p>Here a few minor points.</p>
<p>Page 2, lines 6-9. sequencing data is the "3000 rice genomes project" [].</p>
<p>such public data, the conventional GWAS</p>
<p>Page 2, line 29. in false</p>
<p>as in the world collection of rice germplasm used in this</p>
<p>drawback: a weighting</p>
<p>Line 39. which is a computationally</p>
<p>method since it requires</p>
<p>Page 3, lines 47-49. Please rephrase.</p>
<p>Derivations of the equations and model development is ok</p>
<p>Adding a diagram for explaining the proposed simulation scheme would help to understand better the results.</p>
<p>Page 8, line 217. data an material</p>
<p>Discussion was well conducted. Perhaps a conclusions section would be desirable if that is allow in the journal format.</p>
<p>Reviewer #2: Please see attachment for review.</p>
<p>**********</p>
<p><bold>Have all data underlying the figures and results presented in the manuscript been provided?</bold></p>
<p>Large-scale datasets should be made available via a public repository as described in the <italic>PLOS Computational Biology</italic> <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/data-availability" xlink:type="simple">data availability policy</ext-link>, and numerical data that underlies graphs or summary statistics should be provided in spreadsheet form as supporting information.</p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p>PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/ploscompbiol/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.</p>
<p>Reviewer #1: Yes: Diego Jarquin</p>
<p>Reviewer #2: No</p>
<supplementary-material id="pcbi.1007663.s010" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s010" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Review.pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pcbi.1007663.r002">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007663.r002</article-id>
<title-group>
<article-title>Author response to Decision Letter 0</article-title>
</title-group>
<related-object document-id="10.1371/journal.pcbi.1007663" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj002" link-type="rebutted-decision-letter" object-id="10.1371/journal.pcbi.1007663.r001" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">9 Dec 2019</named-content>
</p>
<supplementary-material id="pcbi.1007663.s011" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007663.s011" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response_to_Reviewers.docx</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pcbi.1007663.r003" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007663.r003</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Pertea</surname>
<given-names>Mihaela</given-names>
</name>
<role>Software Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Mihaela Pertea</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1007663" document-id-type="doi" document-type="article" id="rel-obj003" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">18 Jan 2020</named-content>
</p>
<p>Dear Dr. Iwata,</p>
<p>We are pleased to inform you that your manuscript 'RAINBOW: Haplotype-based genome-wide association study using a novel SNP-set method' has been provisionally accepted for publication in PLOS Computational Biology.</p>
<p>Before your manuscript can be formally accepted you will need to complete some formatting changes, which you will receive in a follow up email. A member of our team will be in touch within two working days with a set of requests.</p>
<p>Please note that your manuscript will not be scheduled for publication until you have made the required changes, so a swift response is appreciated.</p>
<p>IMPORTANT: The editorial review process is now complete. PLOS will only permit corrections to spelling, formatting or significant scientific errors from this point onwards. Requests for major changes, or any which affect the scientific understanding of your work, will cause delays to the publication date of your manuscript.</p>
<p>Should you, your institution's press office or the journal office choose to press release your paper, you will automatically be opted out of early publication. We ask that you notify us now if you or your institution is planning to press release the article. All press must be co-ordinated with PLOS.</p>
<p>Thank you again for supporting Open Access publishing; we are looking forward to publishing your work in PLOS Computational Biology. </p>
<p>Best regards,</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>***********************************************************</p>
<p>Reviewer's Responses to Questions</p>
<p><bold>Comments to the Authors:</bold></p>
<p><bold>Please note here if the review is uploaded as an attachment.</bold></p>
<p>Reviewer #1: I have no further comments on the version of the manuscript. All my questions were correctly addressed by the authors.</p>
<p>Reviewer #2: The authors have substantially improved their writing.</p>
<p>Their contribution have tried to address GWAS - an important but difficult problem. The method requires estimating variance components of models from a multi-step approach, which includes estimating weights to scale the estimated variance components from a model with a single random effect.</p>
<p>I am not convinced such an approach is optimal to estimation of variance components directly but acknowledge that the contribution and results are worthy of dissemination. A weakness in the method include that it is not easily extendible (e.g. if there are three random effects then the algorithm needs modification) - perhaps something that authors may like to think about in future developments of their software.</p>
<p>**********</p>
<p><bold>Have all data underlying the figures and results presented in the manuscript been provided?</bold></p>
<p>Large-scale datasets should be made available via a public repository as described in the <italic>PLOS Computational Biology</italic> <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/data-availability" xlink:type="simple">data availability policy</ext-link>, and numerical data that underlies graphs or summary statistics should be provided in spreadsheet form as supporting information.</p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p>PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/ploscompbiol/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.</p>
<p>Reviewer #1: No</p>
<p>Reviewer #2: No</p>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pcbi.1007663.r004" specific-use="acceptance-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007663.r004</article-id>
<title-group>
<article-title>Acceptance letter</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Pertea</surname>
<given-names>Mihaela</given-names>
</name>
<role>Software Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Mihaela Pertea</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1007663" document-id-type="doi" document-type="article" id="rel-obj004" link-type="peer-reviewed-article"/>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">6 Feb 2020</named-content>
</p>
<p>PCOMPBIOL-D-19-01767R1 </p>
<p>RAINBOW: Haplotype-based genome-wide association study using a novel SNP-set method</p>
<p>Dear Dr Iwata,</p>
<p>I am pleased to inform you that your manuscript has been formally accepted for publication in PLOS Computational Biology. Your manuscript is now with our production department and you will be notified of the publication date in due course.</p>
<p>The corresponding author will soon be receiving a typeset proof for review, to ensure errors have not been introduced during production. Please review the PDF proof of your manuscript carefully, as this is the last chance to correct any errors. Please note that major changes, or those which affect the scientific understanding of the work, will likely cause delays to the publication date of your manuscript. </p>
<p>Soon after your final files are uploaded, unless you have opted out, the early version of your manuscript will be published online. The date of the early version will be your article's publication date. The final article will be published to the same URL, and all versions of the paper will be accessible to readers.</p>
<p>Thank you again for supporting PLOS Computational Biology and open-access publishing. We are looking forward to publishing your work! </p>
<p>With kind regards,</p>
<p>Sarah Hammond</p>
<p>PLOS Computational Biology | Carlyle House, Carlyle Road, Cambridge CB4 3DN | United Kingdom <email xlink:type="simple">ploscompbiol@plos.org</email> | Phone +44 (0) 1223-442824 | <ext-link ext-link-type="uri" xlink:href="http://ploscompbiol.org" xlink:type="simple">ploscompbiol.org</ext-link> | @PLOSCompBiol</p>
</body>
</sub-article>
</article>