<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD Journal Publishing DTD v3.0 20080202//EN" "http://dtd.nlm.nih.gov/publishing/3.0/journalpublishing3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="3.0" xml:lang="en">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">plos</journal-id>
      <journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
      <journal-id journal-id-type="pmc">ploscomp</journal-id>
      <journal-title-group>
        <journal-title>PLoS Computational Biology</journal-title>
      </journal-title-group>
      <issn pub-type="ppub">1553-734X</issn>
      <issn pub-type="epub">1553-7358</issn>
      <publisher>
        <publisher-name>Public Library of Science</publisher-name>
        <publisher-loc>San Francisco, USA</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">PCOMPBIOL-D-12-01452</article-id>
      <article-id pub-id-type="doi">10.1371/journal.pcbi.1002828</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Education</subject>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Biology</subject>
          <subj-group>
            <subject>Computational biology</subject>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline">
          <subject>Computational Biology</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Chapter 10: Mining Genome-Wide Genetic Markers</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Xiang</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Shunping</given-names>
          </name>
          <xref ref-type="aff" rid="aff2">
            <sup>2</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Zhaojun</given-names>
          </name>
          <xref ref-type="aff" rid="aff2">
            <sup>2</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Wei</given-names>
          </name>
          <xref ref-type="aff" rid="aff3">
            <sup>3</sup>
          </xref>
          <xref ref-type="corresp" rid="cor1">
            <sup>*</sup>
          </xref>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <addr-line>Department of Electrical Engineering and Computer Science, Case Western Reserve University, Ohio, United States of America</addr-line>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <addr-line>Department of Computer Science, University of North Carolina at Chapel Hill, North Carolina, United States of America</addr-line>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <addr-line>Department of Computer Science, University of California at Los Angeles, California, United States of America</addr-line>
      </aff>
      <contrib-group>
        <contrib contrib-type="editor" xlink:type="simple">
          <name name-style="western">
            <surname>Lewitter</surname>
            <given-names>Fran</given-names>
          </name>
          <role>Editor</role>
          <xref ref-type="aff" rid="edit1"/>
        </contrib>
        <contrib contrib-type="editor" xlink:type="simple">
          <name name-style="western">
            <surname>Kann</surname>
            <given-names>Maricel</given-names>
          </name>
          <role>Editor</role>
          <xref ref-type="aff" rid="edit2"/>
        </contrib>
      </contrib-group>
      <aff id="edit1">
        <addr-line>Whitehead Institute, United States of America</addr-line>
      </aff>
      <aff id="edit2">
        <addr-line>University of Maryland, Baltimore County, United States of America</addr-line>
      </aff>
      <author-notes>
        <corresp id="cor1">* E-mail: <email xlink:type="simple">weiwang@cs.ucla.edu</email></corresp>
        <fn fn-type="conflict">
          <p>The authors have declared that no competing interests exist.</p>
        </fn>
      </author-notes>
      <pub-date pub-type="collection">
        <month>12</month>
        <year>2012</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>27</day>
        <month>12</month>
        <year>2012</year>
      </pub-date>
      <volume>8</volume>
      <issue>12</issue>
      <elocation-id>e1002828</elocation-id>
      <permissions>
        <copyright-year>2012</copyright-year>
        <copyright-holder>Zhang et al</copyright-holder>
        <license xlink:type="simple">
          <license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
        </license>
      </permissions>
      <abstract>
        <p>Genome-wide association study (GWAS) aims to discover genetic factors underlying phenotypic traits. The large number of genetic factors poses both computational and statistical challenges. Various computational approaches have been developed for large scale GWAS. In this chapter, we will discuss several widely used computational approaches in GWAS. The following topics will be covered: (1) An introduction to the background of GWAS. (2) The existing computational approaches that are widely used in GWAS. This will cover single-locus, epistasis detection, and machine learning methods that have been recently developed in biology, statistic, and computer science communities. This part will be the main focus of this chapter. (3) The limitations of current approaches and future directions.</p>
      </abstract>
      <funding-group>
        <funding-statement>This work was supported by the following grants: NSF IIS-1162369, NSF IIS-0812464, NIH GM076468 and NIH MH090338. The funders had no role in the preparation of the manuscript.</funding-statement>
      </funding-group>
      <counts>
        <page-count count="9"/>
      </counts>
    </article-meta>
  </front>
  <body>
    <boxed-text id="pcbi-1002828-box001" position="float">
      <sec id="s1a1">
        <title>What to Learn in This Chapter</title>
        <list list-type="bullet">
          <list-item>
            <p>The background of Genome-wide association study (GWAS).</p>
          </list-item>
          <list-item>
            <p>The existing computational approaches that are widely used in GWAS. This will cover single-locus, epistasis detection, and machine learning methods.</p>
          </list-item>
          <list-item>
            <p>The limitations of current approaches and future directions.</p>
          </list-item>
        </list>
      </sec>
    </boxed-text>
    <sec id="s1">
      <title/>
      <disp-quote>
        <p>This article is part of the “Translational Bioinformatics” collection for <italic>PLOS Computational Biology</italic>.</p>
      </disp-quote>
    </sec>
    <sec id="s2">
      <title>1. Introduction</title>
      <p>With the advancement of genotyping technology, genome-wide high-density single nucleotide polymorphisms (SNPs) of human and other organisms are now available <xref ref-type="bibr" rid="pcbi.1002828-Churchill1">[1]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-The1">[2]</xref>. The goal of genome-wide association studies (GWAS) is to seek strong associations between phenotype and genetic variations in a population that represent (genomically proximal) causal genetic effects. As the most abundant source of genetic variation, millions of SNPs have been genotyped across the entire genome. Analyzing such large amount of markers poses great challenges to traditional computational and statistical methods. In this chapter, we introduce the basic concept of genome-wide association study, and discuss recently developed methods for GWAS.</p>
      <p>Genome-wide association study is an inter-discipline problem of biology, statistics and computer science <xref ref-type="bibr" rid="pcbi.1002828-Saxena1">[3]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-Scuteri1">[4]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-The2">[5]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-Weedon1">[6]</xref>. In this section, we will first provide a brief introduction to the necessary biological background. We will then formalize the problem and discuss both traditional and recently developed methods for genome-wide analysis of associations.</p>
      <p>A human genome contains over 3 billion DNA base pairs. There are four possible nucleotides at each base in the DNA: adenine (A), guanine (G), thymine (T), and cytosine (C). In some locations in the genome, a genetic variation may be found which involves two or more nucleotides across different individuals. These genetic variations are known as <italic>single-nucleotide polymorphism</italic> (SNPs), i.e., a variation of a single nucleotide in the DNA sequence. In most cases, there are two possible nucleotides for a variant. We denote the more frequent one as “0”, and the less frequent one as “1”. For bases on autosomal chromosomes, there are two parallel nucleotides, which leads to three possible combinations, “00”, “01” and “11”. These genotype combinations are known as “major homozygous site”, “heterozygous site” and “minor heterozygous site” respectively. These genetic variations contribute to the phenotypic differences among the individuals. (A phenotype is the composite of an organism's observable characteristics or traits.) Genome-wide association study (GWAS) aims to find strong associations between SNPs and phenotypes across a set of individuals.</p>
      <p>More formally, let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e001" xlink:type="simple"/></inline-formula> be the set of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e002" xlink:type="simple"/></inline-formula> SNPs for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e003" xlink:type="simple"/></inline-formula> individuals in the study, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e004" xlink:type="simple"/></inline-formula> be the phenotype of interest. The goal of GWAS is to find SNPs (markers) in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e005" xlink:type="simple"/></inline-formula>, that are highly associated with <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e006" xlink:type="simple"/></inline-formula>. There are several challenging issues that need to be addressed when developing an analytic method for GWAS <xref ref-type="bibr" rid="pcbi.1002828-Hirschhorn1">[7]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-McCarthy1">[8]</xref>.</p>
      <p><bold>Scalability</bold> Most GWAS datasets consist of a large number of SNPs. Therefore the algorithms for GWAS need to be highly scalable. For example, for a typical human GWAS, the dataset may contain up to millions SNPs and involve thousands of individuals. Inefficient methods may consume a large amount of computational resources and time to find highly associated SNPs.</p>
      <p><bold>Missing markers</bold> Even with the current dense genotyping technique, many genetic variants are still not genotyped. Current methods usually assume genetic linkage to enhance the power. Imputation, which tries to impute the unknown markers by using existing SNPs databases, is another popular approach to handle missing markers. The well known related projects include the International HapMap project <xref ref-type="bibr" rid="pcbi.1002828-Thorisson1">[9]</xref> and the 1000 Genomes Project <xref ref-type="bibr" rid="pcbi.1002828-The3">[10]</xref>.</p>
      <p><bold>Complex traits</bold> One approach in GWAS is to test the association between the trait and each marker in a genome, which is successful in detecting a single gene related disease. However, this approach may have problems in finding markers associated with complex traits. This is because that complex traits are affected by multiple genes, and each gene may only have a weak association with the phenotype. Such markers with low marginal effects are hard to detect by the single-locus methods.</p>
      <p>In the remainder of the chapter, we will first discuss the single-locus methods. We will then study epistasis detection (multi-locus) approaches which are designed for association studies of complex traits. For epistasis detection, we will mainly focus on exact two-locus association mapping methods.</p>
    </sec>
    <sec id="s3">
      <title>2. Single-Locus Association Mapping</title>
      <p>As the rapid development of high-throughput genotyping technology, millions of SNPs are now available for genome-wide association studies. Single-locus association test is a traditional way for association studies. Specifically, for each SNP, a statistical test is performed to evaluate the association between the SNP and the phenotype. A variety of tests can be applied depending on the data types. The phenotype involved in a study can be case-control (binary), quantitative (continuous), or categorical. We categorize the statistical tests based on what kind of phenotypes they can be applied on.</p>
      <sec id="s3a">
        <title>2.1 Problem Formalization</title>
        <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e007" xlink:type="simple"/></inline-formula> be a set of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e008" xlink:type="simple"/></inline-formula> SNPs for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e009" xlink:type="simple"/></inline-formula> individuals and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e010" xlink:type="simple"/></inline-formula> <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e011" xlink:type="simple"/></inline-formula>. We use 0, 1, 2 to represent the homozygous major allele, heterozygous allele, and homozygous minor allele respectively. Thus we have that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e012" xlink:type="simple"/></inline-formula> (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e013" xlink:type="simple"/></inline-formula>). Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e014" xlink:type="simple"/></inline-formula> be the phenotype. Note that the values that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e015" xlink:type="simple"/></inline-formula> can take depend on its type.</p>
      </sec>
      <sec id="s3b">
        <title>2.2 Case-Control Phenotype</title>
        <p>In a case-control study, the phenotype can be represented as a binary variable with 0 representing controls and 1 representing cases.</p>
        <p>A contingency table records the frequencies of different events. <xref ref-type="table" rid="pcbi-1002828-t001">Table 1</xref> is an example contingency table. For a SNP <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e016" xlink:type="simple"/></inline-formula> and a phenotype <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e017" xlink:type="simple"/></inline-formula>, and we use <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e018" xlink:type="simple"/></inline-formula> to denote the number of individuals whose <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e019" xlink:type="simple"/></inline-formula> equals <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e020" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e021" xlink:type="simple"/></inline-formula> equals <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e022" xlink:type="simple"/></inline-formula>. Also, we have <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e023" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e024" xlink:type="simple"/></inline-formula> . The total number of individuals <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e025" xlink:type="simple"/></inline-formula> .</p>
        <table-wrap id="pcbi-1002828-t001" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002828.t001</object-id>
          <label>Table 1</label>
          <caption>
            <title>Contingency table for a single SNP <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e026" xlink:type="simple"/></inline-formula> and a phenotype <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e027" xlink:type="simple"/></inline-formula>.</title>
          </caption>
          <alternatives>
            <graphic id="pcbi-1002828-t001-1" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002828.t001" xlink:type="simple"/>
            <table>
              <colgroup span="1">
                <col align="left" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
              </colgroup>
              <thead>
                <tr>
                  <td align="left" rowspan="1" colspan="1"/>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e028" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e029" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e030" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">Totals</td>
                </tr>
              </thead>
              <tbody>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e031" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e032" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e033" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e034" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e035" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e036" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e037" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e038" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e039" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e040" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <bold>Totals</bold>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e041" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e042" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e043" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e044" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
              </tbody>
            </table>
          </alternatives>
        </table-wrap>
        <p>Many tests can be used to assess the significance of the association between a single SNP and a binary phenotype. The test statistics are usually based on the contingency table. The null hypothesis is that there is no association between the rows and columns of the contingency table.</p>
        <sec id="s3b1">
          <title>2.2.1 Pearson's <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e045" xlink:type="simple"/></inline-formula> test</title>
          <p>Pearson's <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e046" xlink:type="simple"/></inline-formula> test can be used to test a null hypothesis stating that the frequency distribution of certain events observed in a sample is consistent with a particular theoretical distribution <xref ref-type="bibr" rid="pcbi.1002828-Balding1">[11]</xref>.</p>
          <p>The value of the test statistic is<disp-formula id="pcbi.1002828.e047"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e047" xlink:type="simple"/></disp-formula>where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e048" xlink:type="simple"/></inline-formula>. The degree of freedom is 2.</p>
        </sec>
        <sec id="s3b2">
          <title>2.2.2 G-test</title>
          <p>G-test is an approximation of the log-likelihood ratio. The test statistic is<disp-formula id="pcbi.1002828.e049"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e049" xlink:type="simple"/></disp-formula>where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e050" xlink:type="simple"/></inline-formula>.</p>
          <p>The null hypothesis is that the observed frequencies result from random sampling from a distribution with the given expected frequencies. The distribution of G is approximately that of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e051" xlink:type="simple"/></inline-formula>, with the same degree of freedom as in the corresponding <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e052" xlink:type="simple"/></inline-formula> test. When applied to a reasonable size of samples, the G-test and the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e053" xlink:type="simple"/></inline-formula> test will lead to the same conclusions.</p>
        </sec>
        <sec id="s3b3">
          <title>2.2.3 Fisher exact test</title>
          <p>When the sample size is small, the Fisher exact test is useful to determine the significance of the association. The p-value of the test is the probability of the contingency table given the fixed margins. The probability of obtaining such values in <xref ref-type="table" rid="pcbi-1002828-t001">Table 1</xref> is given by the hypergeometric distribution:<disp-formula id="pcbi.1002828.e054"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e054" xlink:type="simple"/></disp-formula>Most modern statistical packages can calculate the significance of Fisher tests. The actual computation performed by the existing software packages may be different from the exact formulation given above because of the numerical difficulties. A simple, somewhat better computational approach relies on a gamma function or log-gamma function. How to accurately compute hypergeometric and binomial probabilities remains an active research area.</p>
        </sec>
        <sec id="s3b4">
          <title>2.2.4 Cochran-Armitage test</title>
          <p>For complex traits, contributions to disease risk from SNPs are widely considered to be roughly additive. In other words, the heterozygous alleles will have an intermediate risk between two homozygous alleles. Cochran-Armitage test can be used in this case <xref ref-type="bibr" rid="pcbi.1002828-Samani1">[12]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-The2">[5]</xref>. Let the test statistic of U be the following:<disp-formula id="pcbi.1002828.e055"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e055" xlink:type="simple"/></disp-formula>After substitution, we get<disp-formula id="pcbi.1002828.e056"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e056" xlink:type="simple"/></disp-formula>The variance of U under the null hypothesis can be computed as<disp-formula id="pcbi.1002828.e057"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e057" xlink:type="simple"/></disp-formula>Notice that for a large sample size <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e058" xlink:type="simple"/></inline-formula>, we have <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e059" xlink:type="simple"/></inline-formula>, hence <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e060" xlink:type="simple"/></inline-formula>.</p>
        </sec>
        <sec id="s3b5">
          <title>2.2.5 Summary</title>
          <p>There is no overall winner of the introduced tests. Cochran-Armitage test may not be the best if the risks are deviated from the additive model. Meanwhile, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e061" xlink:type="simple"/></inline-formula> test, G-test, and Fisher exact test can handle the full range of risks, but they will unavoidably lose some power in the detection of additive ones. Different tests may be applied on the same data to detect different effects.</p>
        </sec>
      </sec>
      <sec id="s3c">
        <title>2.3 Quantitative Phenotype</title>
        <p>In addition to case-control phenotypes, many complex traits are quantitative. This type of study is also often referred to as the quantitative trait locus (QTL) analysis. The standard tools for testing the association between a single marker and a continuous outcome are analysis of variance (ANOVA) and linear regression.</p>
        <sec id="s3c1">
          <title>2.3.1 One-way ANOVA</title>
          <p>The F-test in one-way analysis of variance is used to assess whether the expected values of a quantitative variable within several pre-defined groups differ from each other.</p>
          <p>For each SNP <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e062" xlink:type="simple"/></inline-formula>, we can divide all the individuals into three groups according to their genotypes. Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e063" xlink:type="simple"/></inline-formula> be a subset of phenotypes of which the individuals have the genotypes equal to <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e064" xlink:type="simple"/></inline-formula>. We represent the number of phenotypes in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e065" xlink:type="simple"/></inline-formula> as <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e066" xlink:type="simple"/></inline-formula>, and we have <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e067" xlink:type="simple"/></inline-formula>. Notice that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e068" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e069" xlink:type="simple"/></inline-formula></p>
          <p>The total sum of squares (SST) can be divided into two parts, the between-group sum of squares (SSB) and the within-group sum of squares (SSW):<disp-formula id="pcbi.1002828.e070"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e070" xlink:type="simple"/></disp-formula><disp-formula id="pcbi.1002828.e071"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e071" xlink:type="simple"/></disp-formula>where<disp-formula id="pcbi.1002828.e072"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e072" xlink:type="simple"/></disp-formula> The formula of F-test statistic is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e073" xlink:type="simple"/></inline-formula>, and F follows the F-distribution with 2 and S-3 degrees of freedom under the null hypothesis, i.e., <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e074" xlink:type="simple"/></inline-formula>.</p>
        </sec>
        <sec id="s3c2">
          <title>2.3.2 Linear regression</title>
          <p>In the linear regression model, a least-squares regression line is fit between the phenotype values and the genotype values <xref ref-type="bibr" rid="pcbi.1002828-Balding1">[11]</xref>. For simplicity, we denote the genotypes of a single SNP to be <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e075" xlink:type="simple"/></inline-formula>. Based on the data <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e076" xlink:type="simple"/></inline-formula>, we need to fit a line in the form of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e077" xlink:type="simple"/></inline-formula>.</p>
          <p>We have the sums of squares as follows:<disp-formula id="pcbi.1002828.e078"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e078" xlink:type="simple"/></disp-formula>where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e079" xlink:type="simple"/></inline-formula></p>
          <p>To achieve least squares, the estimator of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e080" xlink:type="simple"/></inline-formula> is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e081" xlink:type="simple"/></inline-formula>. To evaluate the significance of the obtained model, a hypothesis testing for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e082" xlink:type="simple"/></inline-formula> is then applied.</p>
        </sec>
      </sec>
      <sec id="s3d">
        <title>2.4 Multiple Testing Problem</title>
        <p>In a typical GWAS, the test needs to be performed many times. We should pay attention to a statistical issue known as the multiple testing problem. In the remainder of this section, we will discuss the multiple testing problem and how to effectively control error rate in GWAS.</p>
        <p>Type 1 error rate, is the possibility that a null hypothesis is rejected when it is actually true. In other words, it is the chance of observing a positive (significant) result even if it is not. If a test is performed multiple times, the overall Type 1 Error rate will increase. This is called the multiple testing problem.</p>
        <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e083" xlink:type="simple"/></inline-formula> be the type 1 error rate for a statistical test. If the test is performed <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e084" xlink:type="simple"/></inline-formula> times, the experimental-wise error rate <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e085" xlink:type="simple"/></inline-formula> is given by<disp-formula id="pcbi.1002828.e086"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e086" xlink:type="simple"/></disp-formula>For example, if <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e087" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e088" xlink:type="simple"/></inline-formula>, then <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e089" xlink:type="simple"/></inline-formula>. In this case, the chance of getting at least one false positive is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e090" xlink:type="simple"/></inline-formula>.</p>
        <p>Because of the multiple testing problem, the test result may not be that significant even if its p-value is less than a significant level <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e091" xlink:type="simple"/></inline-formula>. To solve this problem, the nominal p-value need to be corrected/adjusted.</p>
      </sec>
      <sec id="s3e">
        <title>2.5 Family-Wise Error Rate Control</title>
        <p>For the single-locus test, we denote the p-value for a association test of a SNP <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e092" xlink:type="simple"/></inline-formula> and a phenotype <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e093" xlink:type="simple"/></inline-formula> to be <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e094" xlink:type="simple"/></inline-formula>, and the corrected p-value to be <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e095" xlink:type="simple"/></inline-formula>. Family-wise error rate (FWER), or the experiment-wise error rate, is the probability of at least one false association. We use <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e096" xlink:type="simple"/></inline-formula> to denote family-wise error rate, and it is given by<disp-formula id="pcbi.1002828.e097"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e097" xlink:type="simple"/></disp-formula>where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e098" xlink:type="simple"/></inline-formula> is the total number of tests and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e099" xlink:type="simple"/></inline-formula> is the hypothesis that all the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e100" xlink:type="simple"/></inline-formula> are true.</p>
        <p>Many methods can be used to control FWER. Bonferroni correction is a commonly used method, in which p-values need to be enlarged to account for the number of comparisons being performed. Permutation test <xref ref-type="bibr" rid="pcbi.1002828-Westfall1">[13]</xref> is also widely used to correct for multiple testing in GWAS.</p>
        <sec id="s3e1">
          <title>2.5.1 Bonferroni correction</title>
          <p>In Bonferroni correction, the p-value of a test is multiplied by the number of tests in the multiple comparison.<disp-formula id="pcbi.1002828.e101"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e101" xlink:type="simple"/></disp-formula>Here the number of tests is the number of SNPs <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e102" xlink:type="simple"/></inline-formula> in a study. Bonferroni correction is a single-step procedure, in which each of the p-values is independently corrected.</p>
        </sec>
        <sec id="s3e2">
          <title>2.5.2 Permutation tests</title>
          <p>In the permutation test, data are reshuffled. For each permutation, p-values for all the tests are re-calculated, and the minimal p-value is retained. After <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e103" xlink:type="simple"/></inline-formula> permutations, we get totally <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e104" xlink:type="simple"/></inline-formula> minimal p-values. The corrected p-value is given by the proportion of minimal p-values which is less than the original p-value.</p>
          <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e105" xlink:type="simple"/></inline-formula> be the set of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e106" xlink:type="simple"/></inline-formula> permutations. For each permutation <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e107" xlink:type="simple"/></inline-formula>, the minimal p-value <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e108" xlink:type="simple"/></inline-formula> is given by<disp-formula id="pcbi.1002828.e109"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e109" xlink:type="simple"/></disp-formula>Then we have the corrected p-value<disp-formula id="pcbi.1002828.e110"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e110" xlink:type="simple"/></disp-formula>The permutation method takes advantage of the correlation structure between SNPs. It is less stringent than Bonferroni correction.</p>
        </sec>
      </sec>
      <sec id="s3f">
        <title>2.6 False Discovery Rate Control</title>
        <p>False discovery rate (FDR) controls the expected proportion of type 1 error among all significant hypotheses. It is less conservative than the family-wise error rate. For example, if 100 observed results are claimed to be significant, and the FDR is 0.1, then 10 of results are expected to be false discoveries.</p>
        <p>One way to control the FDR is as follows <xref ref-type="bibr" rid="pcbi.1002828-Benjamini1">[14]</xref>. The p-values of SNPs and the phenotype are ranked from smallest to largest. We denote the ordered p-values to be <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e111" xlink:type="simple"/></inline-formula>. Starting from the largest p-value to the smallest, the original p-value is multiplied by the total number of SNPs and divided by its rank. For the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e112" xlink:type="simple"/></inline-formula> p-value <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e113" xlink:type="simple"/></inline-formula>, its corrected p-value <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e114" xlink:type="simple"/></inline-formula> is given by<disp-formula id="pcbi.1002828.e115"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e115" xlink:type="simple"/></disp-formula>In this section, we have discussed commonly used methods in single-locus study, the multiple testing problem and how to control error rate in GWAS. In the next section, we will introduce methods used for two-locus association studies. We will focus on one class work that finds exact solution when searching for SNP-SNP interactions in GWAS.</p>
      </sec>
    </sec>
    <sec id="s4">
      <title>3. Exact Methods for Two-Locus Association Study</title>
      <p>The vast number of SNPs has posed great computational challenge to genome-wide association study. In order to understand the underlying biological mechanisms of complex phenotype, one needs to consider the joint effect of multiple SNPs simultaneously. Although the idea of studying the association between phenotype and multiple SNPs is straightforward, the implementation is nontrivial. For a study with total <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e116" xlink:type="simple"/></inline-formula> SNPs, in order to find the association between <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e117" xlink:type="simple"/></inline-formula> SNPs and the phenotype, a brute-force approach is to exhaustively enumerate all <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e118" xlink:type="simple"/></inline-formula> possible SNP combinations and evaluate their associations with the phenotype. The computational burden imposed by this enormous search space often makes the complete genome-wide association study intractable. Moreover, although permutation test has been considered the gold standard method for multiple testing correction, it will dramatically increase the computational burden because the process needs to be performed for all permuted data.</p>
      <p>In this section, we will focus on the recently developed exact method for two-locus epistasis detection. Different from the single-locus approach, the goal of two-locus epistasis detection is to identify interacting SNP-pairs that have strong association with the phenotype. FastANOVA <xref ref-type="bibr" rid="pcbi.1002828-Zhang1">[15]</xref> is an algorithm for two-locus ANOVA (analysis of variance) test on quantitative traits and FastChi <xref ref-type="bibr" rid="pcbi.1002828-Zhang2">[16]</xref> for two-locus chi-square test on case-control phenotypes. COE <xref ref-type="bibr" rid="pcbi.1002828-Zhang3">[17]</xref> is a general method that can be applied in a wide range of tests. TEAM <xref ref-type="bibr" rid="pcbi.1002828-Zhang4">[18]</xref> is designed for studies involving a large number of individuals such as human studies. In this subsection, we will discuss these algorithms, and their strengths and limitations.</p>
      <sec id="s4a">
        <title>3.1 The FastANOVA Algorithm</title>
        <p>FastANOVA utilizes an upper bound of the two-locus ANOVA test to prune the search space. The upper bound is expressed as the sum of two terms. The first term is based on the single-SNP ANOVA test. The second term is based on the genotype of the SNP-pair and is independent of permutations. This property allows to index SNP-pairs in a 2D array based on the genotype relationship between SNPs. Since the number of entries in the 2D array is bound by the number of individuals in the study, many SNP-pairs share a common entry. Moreover, it can be shown that all SNP-pairs indexed by the same entry have exactly the same upper bound. Therefore, we can compute the upper bound for a group of SNP-pairs together. Another important property is that the indexing structure only needs to be built once and can be reused for all permutated data. Utilizing the upper bound and the indexing structure, FastANOVA only needs to perform the ANOVA test on a small number of candidate SNP-pairs without the risk of missing any significant pair. We discuss the algorithm in further detail in the following.</p>
        <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e119" xlink:type="simple"/></inline-formula> be the set of SNPs of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e120" xlink:type="simple"/></inline-formula> individuals (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e121" xlink:type="simple"/></inline-formula>) and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e122" xlink:type="simple"/></inline-formula> be the quantitative phenotype of interest, where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e123" xlink:type="simple"/></inline-formula> (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e124" xlink:type="simple"/></inline-formula>) is the phenotype value of individual <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e125" xlink:type="simple"/></inline-formula>.</p>
        <p>For any SNP <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e126" xlink:type="simple"/></inline-formula> (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e127" xlink:type="simple"/></inline-formula>), we represent the F-statistic from the ANOVA test of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e128" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e129" xlink:type="simple"/></inline-formula> as <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e130" xlink:type="simple"/></inline-formula>. For any SNP-pair <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e131" xlink:type="simple"/></inline-formula>, we represent the F-statistic from the ANOVA test of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e132" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e133" xlink:type="simple"/></inline-formula> as <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e134" xlink:type="simple"/></inline-formula>.</p>
        <p>The basic idea of ANOVA test is to partition the total sum of squared deviations <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e135" xlink:type="simple"/></inline-formula> into between-group sum of squared deviations <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e136" xlink:type="simple"/></inline-formula> and within-group sum of squared deviations <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e137" xlink:type="simple"/></inline-formula>:<disp-formula id="pcbi.1002828.e138"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e138" xlink:type="simple"/></disp-formula>In our application of the two-locus association study, <xref ref-type="table" rid="pcbi-1002828-t002">Table 2</xref> and <xref ref-type="table" rid="pcbi-1002828-t003">Table 3</xref> show the possible groupings of phenotype values by the genotypes of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e139" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e140" xlink:type="simple"/></inline-formula> respectively.</p>
        <table-wrap id="pcbi-1002828-t002" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002828.t002</object-id>
          <label>Table 2</label>
          <caption>
            <title>Grouping of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e141" xlink:type="simple"/></inline-formula> by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e142" xlink:type="simple"/></inline-formula>.</title>
          </caption>
          <alternatives>
            <graphic id="pcbi-1002828-t002-2" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002828.t002" xlink:type="simple"/>
            <table>
              <colgroup span="1">
                <col align="left" span="1"/>
                <col align="center" span="1"/>
              </colgroup>
              <thead>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e143" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e144" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
              </thead>
              <tbody>
                <tr>
                  <td align="left" rowspan="1" colspan="1">group <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e145" xlink:type="simple"/></inline-formula></td>
                  <td align="left" rowspan="1" colspan="1">group <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e146" xlink:type="simple"/></inline-formula></td>
                </tr>
              </tbody>
            </table>
          </alternatives>
        </table-wrap>
        <table-wrap id="pcbi-1002828-t003" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002828.t003</object-id>
          <label>Table 3</label>
          <caption>
            <title>Grouping of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e147" xlink:type="simple"/></inline-formula> by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e148" xlink:type="simple"/></inline-formula>.</title>
          </caption>
          <alternatives>
            <graphic id="pcbi-1002828-t003-3" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002828.t003" xlink:type="simple"/>
            <table>
              <colgroup span="1">
                <col align="left" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
              </colgroup>
              <thead>
                <tr>
                  <td align="left" rowspan="1" colspan="1"/>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e149" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e150" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
              </thead>
              <tbody>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e151" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">group <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e152" xlink:type="simple"/></inline-formula></td>
                  <td align="left" rowspan="1" colspan="1">group <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e153" xlink:type="simple"/></inline-formula></td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e154" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">group <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e155" xlink:type="simple"/></inline-formula></td>
                  <td align="left" rowspan="1" colspan="1">group <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e156" xlink:type="simple"/></inline-formula></td>
                </tr>
              </tbody>
            </table>
          </alternatives>
        </table-wrap>
        <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e157" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e158" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e159" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e160" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e161" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e162" xlink:type="simple"/></inline-formula> represent the groups as indicated in <xref ref-type="table" rid="pcbi-1002828-t002">Table 2</xref> and <xref ref-type="table" rid="pcbi-1002828-t003">Table 3</xref>. We use <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e163" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e164" xlink:type="simple"/></inline-formula> to distinct the one locus (i.e., single-SNP) and two locus (i.e., SNP-pair) analyses. Specifically, we have<disp-formula id="pcbi.1002828.e165"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e165" xlink:type="simple"/></disp-formula><disp-formula id="pcbi.1002828.e166"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e166" xlink:type="simple"/></disp-formula>The F-statistics for ANOVA tests on <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e167" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e168" xlink:type="simple"/></inline-formula> are:<disp-formula id="pcbi.1002828.e169"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e169" xlink:type="simple"/><label>(1.1)</label></disp-formula><disp-formula id="pcbi.1002828.e170"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e170" xlink:type="simple"/><label>(1.2)</label></disp-formula>where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e171" xlink:type="simple"/></inline-formula> in <xref ref-type="disp-formula" rid="pcbi.1002828.e170">Equation (1.2)</xref> is the number of groups that the genotype of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e172" xlink:type="simple"/></inline-formula> partitions the individuals into. Possible values of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e173" xlink:type="simple"/></inline-formula> are 3 or 4, assuming all SNPs are distinct: If none of groups <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e174" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e175" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e176" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e177" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e178" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e179" xlink:type="simple"/></inline-formula> is empty, then <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e180" xlink:type="simple"/></inline-formula>. If one of them is empty, then <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e181" xlink:type="simple"/></inline-formula>.</p>
        <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e182" xlink:type="simple"/></inline-formula> be the sum of all phenotype values. The total sum of squared deviations does not depend on the groupings of individuals:<disp-formula id="pcbi.1002828.e183"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e183" xlink:type="simple"/></disp-formula>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e184" xlink:type="simple"/></inline-formula> be the sum of phenotype values in a specific group, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e185" xlink:type="simple"/></inline-formula> be the number of individuals in that group. <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e186" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e187" xlink:type="simple"/></inline-formula> can be calculated as follows:<disp-formula id="pcbi.1002828.e188"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e188" xlink:type="simple"/></disp-formula><disp-formula id="pcbi.1002828.e189"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e189" xlink:type="simple"/></disp-formula>Note that for any group of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e190" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e191" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e192" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e193" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e194" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e195" xlink:type="simple"/></inline-formula>, if <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e196" xlink:type="simple"/></inline-formula>, then <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e197" xlink:type="simple"/></inline-formula> is defined to be 0.</p>
        <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e198" xlink:type="simple"/></inline-formula> be the phenotype values in group <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e199" xlink:type="simple"/></inline-formula>. Without loss of generality, assume that these phenotype values are arranged in ascending order, i.e.,<disp-formula id="pcbi.1002828.e200"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e200" xlink:type="simple"/></disp-formula>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e201" xlink:type="simple"/></inline-formula> be the phenotype values in group <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e202" xlink:type="simple"/></inline-formula>. Without loss of generality, assume that these phenotype values are arranged in ascending order, i.e.,<disp-formula id="pcbi.1002828.e203"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e203" xlink:type="simple"/></disp-formula>We have the overall upper bound on <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e204" xlink:type="simple"/></inline-formula>:</p>
        <p><bold>Theorem 1</bold> <italic>(Upper bound of</italic> <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e205" xlink:type="simple"/></inline-formula><italic>)</italic><disp-formula id="pcbi.1002828.e206"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e206" xlink:type="simple"/></disp-formula></p>
        <p>The notations in the bound can be found in <xref ref-type="table" rid="pcbi-1002828-t004">Table 4</xref>. The upper bound in Theorem 1 is tight. The tightness of the bound is obvious from the derivation of the upper bound, since there exists some genotype of SNP-pair <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e207" xlink:type="simple"/></inline-formula> that makes the equality hold.</p>
        <table-wrap id="pcbi-1002828-t004" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002828.t004</object-id>
          <label>Table 4</label>
          <caption>
            <title>Notations for the bounds.</title>
          </caption>
          <alternatives>
            <graphic id="pcbi-1002828-t004-4" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002828.t004" xlink:type="simple"/>
            <table>
              <colgroup span="1">
                <col align="left" span="1"/>
                <col align="center" span="1"/>
              </colgroup>
              <thead>
                <tr>
                  <td align="left" rowspan="1" colspan="1">Symbols</td>
                  <td align="left" rowspan="1" colspan="1">Formulas</td>
                </tr>
              </thead>
              <tbody>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e208" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e209" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e210" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e211" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e212" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e213" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e214" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e215" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e216" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e217" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e218" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e219" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
              </tbody>
            </table>
          </alternatives>
        </table-wrap>
        <p>We now discuss how to apply the upper bound in Theorem 1 in detail. The set of all SNP-pairs is partitioned into non-overlapping groups such that the upper bound can be readily applied to each group. For every <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e220" xlink:type="simple"/></inline-formula> <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e221" xlink:type="simple"/></inline-formula>, let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e222" xlink:type="simple"/></inline-formula> be the set of SNP-pairs<disp-formula id="pcbi.1002828.e223"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e223" xlink:type="simple"/></disp-formula>For all SNP-pairs in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e224" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e225" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e226" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e227" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e228" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e229" xlink:type="simple"/></inline-formula> are constants. Moreover, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e230" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e231" xlink:type="simple"/></inline-formula> are determined by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e232" xlink:type="simple"/></inline-formula>, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e233" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e234" xlink:type="simple"/></inline-formula> are determined by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e235" xlink:type="simple"/></inline-formula>. Therefore, in the upper bound, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e236" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e237" xlink:type="simple"/></inline-formula> are the only variables that depend on <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e238" xlink:type="simple"/></inline-formula> and may vary for different SNP-pairs <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e239" xlink:type="simple"/></inline-formula> in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e240" xlink:type="simple"/></inline-formula>.</p>
        <p>Note that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e241" xlink:type="simple"/></inline-formula> is the number of 1's in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e242" xlink:type="simple"/></inline-formula> when <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e243" xlink:type="simple"/></inline-formula> takes value 1, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e244" xlink:type="simple"/></inline-formula> is the number of 1's in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e245" xlink:type="simple"/></inline-formula> when <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e246" xlink:type="simple"/></inline-formula> takes value 0. It is easy to prove that switching <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e247" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e248" xlink:type="simple"/></inline-formula> does not change the F-statistic value and the correctness of the upper bound. This is also true if we switch <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e249" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e250" xlink:type="simple"/></inline-formula>. Therefore, without loss of generality, we can always assume that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e251" xlink:type="simple"/></inline-formula> is the smaller one between the number of 1's and number of 0's in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e252" xlink:type="simple"/></inline-formula> when <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e253" xlink:type="simple"/></inline-formula> takes value 1, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e254" xlink:type="simple"/></inline-formula> is the smaller one between the number of 1's and number of 0's in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e255" xlink:type="simple"/></inline-formula> when <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e256" xlink:type="simple"/></inline-formula> takes value 0.</p>
        <p>If there are <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e257" xlink:type="simple"/></inline-formula> 1's and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e258" xlink:type="simple"/></inline-formula> 0's in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e259" xlink:type="simple"/></inline-formula>, then for any <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e260" xlink:type="simple"/></inline-formula>, the possible values that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e261" xlink:type="simple"/></inline-formula> can take are <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e262" xlink:type="simple"/></inline-formula>. The possible values that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e263" xlink:type="simple"/></inline-formula> can take are <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e264" xlink:type="simple"/></inline-formula>.</p>
        <p>To efficiently retrieve the candidates, the SNP-pairs <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e265" xlink:type="simple"/></inline-formula> in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e266" xlink:type="simple"/></inline-formula> are grouped by their <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e267" xlink:type="simple"/></inline-formula> values and indexed in a 2D array, referred to as <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e268" xlink:type="simple"/></inline-formula>.</p>
        <p>Suppose that there are 32 individuals, and the genotype of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e269" xlink:type="simple"/></inline-formula> consists of half 0's and half 1's. Thus for the SNP-pairs in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e270" xlink:type="simple"/></inline-formula>, the possible values of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e271" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e272" xlink:type="simple"/></inline-formula> are <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e273" xlink:type="simple"/></inline-formula>. <xref ref-type="fig" rid="pcbi-1002828-g001">Figure 1</xref> shows the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e274" xlink:type="simple"/></inline-formula> array, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e275" xlink:type="simple"/></inline-formula>, whose entries represent the possible values of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e276" xlink:type="simple"/></inline-formula> for the SNP-pairs <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e277" xlink:type="simple"/></inline-formula>. The entries in the same column have the same <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e278" xlink:type="simple"/></inline-formula> value. The entries in the same row have the same <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e279" xlink:type="simple"/></inline-formula> value. The <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e280" xlink:type="simple"/></inline-formula> value of each column is noted beneath each column. The <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e281" xlink:type="simple"/></inline-formula> value of each row is noted left to each row. Each entry of the array is a pointer to the SNP-pairs <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e282" xlink:type="simple"/></inline-formula> having the corresponding <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e283" xlink:type="simple"/></inline-formula> values.</p>
        <fig id="pcbi-1002828-g001" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002828.g001</object-id>
          <label>Figure 1</label>
          <caption>
            <title>The index array <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e284" xlink:type="simple"/></inline-formula> for efficient retrieval of the candidate SNP-pairs.</title>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002828.g001" position="float" xlink:type="simple"/>
        </fig>
        <p>For any SNP <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e285" xlink:type="simple"/></inline-formula>, the maximum number of the entries in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e286" xlink:type="simple"/></inline-formula> is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e287" xlink:type="simple"/></inline-formula>. The proof of this property is straightforward and omitted here. In order to find candidate SNP-pairs, we scan all entries in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e288" xlink:type="simple"/></inline-formula> to calculate their upper bounds. Since the SNP-pairs indexed by the same entry share the same <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e289" xlink:type="simple"/></inline-formula> value, they have the same upper bound. In this way, we can calculate the upper bound for a group of SNP-pairs together. Note that for typical genome-wide association studies, the number of individuals <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e290" xlink:type="simple"/></inline-formula> is much smaller than the number of SNPs <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e291" xlink:type="simple"/></inline-formula>. Therefore, the additional cost for accessing <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e292" xlink:type="simple"/></inline-formula> is minimal compared to performing ANOVA tests for all pairs <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e293" xlink:type="simple"/></inline-formula>.</p>
        <p>For multiple tests, permutation procedure is often used in genetic analysis for controlling family-wise error rate. For genome-wide association study, permutation is less commonly used because it often entails prohibitively long computation times. Our FastANOVA algorithm makes permutation procedure feasible in genome-wide association study.</p>
        <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e294" xlink:type="simple"/></inline-formula> be the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e295" xlink:type="simple"/></inline-formula> permutations of the phenotype <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e296" xlink:type="simple"/></inline-formula>. Following the idea discussed above, the upper bound in Theorem 1 can be easily incorporated in the algorithm to handle the permutations. For every SNP <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e297" xlink:type="simple"/></inline-formula>, the indexing structure <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e298" xlink:type="simple"/></inline-formula> is independent of the permuted phenotypes in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e299" xlink:type="simple"/></inline-formula>. The correctness of this property relies on the fact that, for any <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e300" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e301" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e302" xlink:type="simple"/></inline-formula> only depend on the genotype of the SNP-pair and thus remain constant for different phenotype permutations. Therefore, for each <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e303" xlink:type="simple"/></inline-formula>, once we build <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e304" xlink:type="simple"/></inline-formula>, it can be reused in all permutations.</p>
      </sec>
      <sec id="s4b">
        <title>3.2 The FastChi Algorithm</title>
        <p>As our initial attempt to develop scalable algorithms for genome-wide association study, FastANOVA is specifically designed for the ANOVA test on quantitative phenotypes. Another category of phenotypes is generated in case-control study, where the phenotypes are binary variables representing disease/non-disease individuals. Chi-square test is one of the most commonly used statistics in binary phenotype association study. We can extend the principles in FastANOVA for efficient two-locus chi-square test. The general idea of FastChi is similar to that of FastANOVA, i.e., re-formulating the chi-square test statistic to establish an upper bound of two-locus chi-square test, and indexing the SNP-pairs according to their genotypes in order to effectively prune the search space and reuse redundant computations. Here we briefly introduce the FastChi algorithm.</p>
        <p>For SNP <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e305" xlink:type="simple"/></inline-formula>, we represent the chi-square test value of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e306" xlink:type="simple"/></inline-formula> and the binary phenotype <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e307" xlink:type="simple"/></inline-formula> as <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e308" xlink:type="simple"/></inline-formula>. For any SNP-pair <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e309" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e310" xlink:type="simple"/></inline-formula>, we use <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e311" xlink:type="simple"/></inline-formula> to represent the chi-square test value for the combined effect of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e312" xlink:type="simple"/></inline-formula> with <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e313" xlink:type="simple"/></inline-formula>. Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e314" xlink:type="simple"/></inline-formula> represent the following events respectively: <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e315" xlink:type="simple"/></inline-formula>; <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e316" xlink:type="simple"/></inline-formula>; <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e317" xlink:type="simple"/></inline-formula>; <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e318" xlink:type="simple"/></inline-formula>. Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e319" xlink:type="simple"/></inline-formula> denote the observed value of an event. <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e320" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e321" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e322" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e323" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e324" xlink:type="simple"/></inline-formula>, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e325" xlink:type="simple"/></inline-formula> represent the formulas shown in <xref ref-type="table" rid="pcbi-1002828-t005">Table 5</xref>. We have the upper bound of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e326" xlink:type="simple"/></inline-formula> stated in Theorem 2.</p>
        <table-wrap id="pcbi-1002828-t005" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002828.t005</object-id>
          <label>Table 5</label>
          <caption>
            <title>Notations used in the derivation of the upper bound for two-locus Chi-square test.</title>
          </caption>
          <alternatives>
            <graphic id="pcbi-1002828-t005-5" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002828.t005" xlink:type="simple"/>
            <table>
              <colgroup span="1">
                <col align="left" span="1"/>
                <col align="center" span="1"/>
              </colgroup>
              <thead>
                <tr>
                  <td align="left" rowspan="1" colspan="1">Symbols</td>
                  <td align="left" rowspan="1" colspan="1">Formulas</td>
                </tr>
              </thead>
              <tbody>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e327" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e328" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e329" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e330" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e331" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e332" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e333" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e334" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e335" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e336" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
                <tr>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e337" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                  <td align="left" rowspan="1" colspan="1">
                    <inline-formula>
                      <inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e338" xlink:type="simple"/>
                    </inline-formula>
                  </td>
                </tr>
              </tbody>
            </table>
          </alternatives>
        </table-wrap>
        <p><bold>Theorem 2</bold> <italic>(Upper bound of </italic><inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e339" xlink:type="simple"/></inline-formula><italic>)</italic><disp-formula id="pcbi.1002828.e340"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e340" xlink:type="simple"/></disp-formula></p>
        <p>For given phenotype <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e341" xlink:type="simple"/></inline-formula> and SNP<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e342" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e343" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e344" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e345" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e346" xlink:type="simple"/></inline-formula>, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e347" xlink:type="simple"/></inline-formula> are constants. <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e348" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e349" xlink:type="simple"/></inline-formula> are the only variables that depend on <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e350" xlink:type="simple"/></inline-formula> and may vary for different SNP-pairs <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e351" xlink:type="simple"/></inline-formula>. (Recall that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e352" xlink:type="simple"/></inline-formula>.) Thus for a given <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e353" xlink:type="simple"/></inline-formula>, we can treat equation <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e354" xlink:type="simple"/></inline-formula> as a <italic>straight line</italic> in the 2-D space of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e355" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e356" xlink:type="simple"/></inline-formula>. The ones whose <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e357" xlink:type="simple"/></inline-formula> values fall below the line can be pruned without any further test.</p>
        <p>Suppose that there are 32 individuals, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e358" xlink:type="simple"/></inline-formula> contains half 0's, and half 1's. For the SNP-pairs in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e359" xlink:type="simple"/></inline-formula>, the possible values of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e360" xlink:type="simple"/></inline-formula> (and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e361" xlink:type="simple"/></inline-formula>) are <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e362" xlink:type="simple"/></inline-formula>. <xref ref-type="fig" rid="pcbi-1002828-g002">Figure 2</xref> shows the 2-D space of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e363" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e364" xlink:type="simple"/></inline-formula>. The blue stars represent the values that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e365" xlink:type="simple"/></inline-formula> can take. The line <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e366" xlink:type="simple"/></inline-formula> is plotted in the figure. Only the SNP-pairs whose <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e367" xlink:type="simple"/></inline-formula> values are in the shaded region are subject to two-locus Chi-square test.</p>
        <fig id="pcbi-1002828-g002" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002828.g002</object-id>
          <label>Figure 2</label>
          <caption>
            <title>Pruning SNP-pairs in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e368" xlink:type="simple"/></inline-formula> using the upper bound.</title>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002828.g002" position="float" xlink:type="simple"/>
        </fig>
        <p>Similar to FastANOVA, in FastChi, we can index the SNP-pairs in <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e369" xlink:type="simple"/></inline-formula> according to their genotype relationships, i.e., by the values of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e370" xlink:type="simple"/></inline-formula>. Experimental results demonstrate that FastChi is an order of magnitude faster than the brute force alternative.</p>
      </sec>
      <sec id="s4c">
        <title>3.3 The COE Algorithm</title>
        <p>Both FastANOVA and FastChi rework the formula of ANOVA test and Chi-square test to estimate an upper bound of the test value for SNP pairs. These upper bounds are used to identify candidate SNP pairs that may have strong epistatic effect. Repetitive computation in a permutation test is also identified and performed once those results are stored for use by all permutations. These two strategies lead to substantial speedup, especially for large permutation test, without compromising the accuracy of the test. These approaches guarantee to find the optimal solutions. However, a common drawback of these methods is that they are designed for specific tests, i.e., chi-square test and ANOVA test. The upper bounds used in these methods do not work for other statistical tests, which are also routinely used by researchers. In addition, new statistics for epistasis detection are continually emerging in the literature. Therefore, it is desirable to develop a general model that supports a variety of statistical tests.</p>
        <p>The COE algorithm takes the advantage of convex optimization. It can be shown that a wide range of statistical tests, such as chi-square test, likelihood ratio test (also known as G-test), and entropy-based tests are all convex functions of observed frequencies in contingency tables. Since the maximum value of a convex function is attained at the vertices of its convex domain, by constraining on the observed frequencies in the contingency tables, we can determine the domain of the convex function and get its maximum value. This maximum value is used as the upper bound on the test statistics to filter out insignificant SNP-pairs. COE is applicable to all tests that are convex.</p>
      </sec>
      <sec id="s4d">
        <title>3.4 The TEAM Algorithm</title>
        <p>The methods we have discussed so far provide promising alternatives for GWAS. However, there are two major drawbacks that limit their applicability. First, they are designed for relatively small sample size and only consider homozygous markers (i.e., each SNP can be represented as a <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e371" xlink:type="simple"/></inline-formula> binary variable). In human study, the sample size is usually large and most SNPs contain heterozygous genotypes and are coded using <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e372" xlink:type="simple"/></inline-formula>. These make previous methods intractable. Second, although the family-wise error rate (FWER) and the false discovery rate (FDR) are both widely used for error controlling, previous methods are designed only to control the FWER. From a computational point of view, the difference in the FWER and the FDR controlling is that, to estimate FWER, for each permutation, only the maximum two-locus test value is needed. To estimate the FDR, on the other hand, for each permutation, all two-locus test values must be computed.</p>
        <p>To address these limitations, TEAM is proposed for efficient epistasis detection in human GWAS. TEAM has several advantages over previous methods. It supports to both homozygous and heterozygous data. By exhaustively computing all two-locus test values in permutation test, it enables both FWER and FDR controlling. It is applicable to all statistics based on the contingency table. Previous methods are either designed for specific tests or require the test statistics satisfy certain property. Experimental results demonstrate that TEAM is more efficient than existing methods for large sample studies.</p>
        <p>TEAM incorporates the permutation test for proper error controlling. The key idea is to incrementally update the contingency tables of two-locus tests. We show that only four of the eighteen observed frequencies in the contingency table need to be updated to compute the test value. In the algorithm, we build a minimum spanning tree <xref ref-type="bibr" rid="pcbi.1002828-Cormen1">[19]</xref> on the SNPs. The nodes of the tree are SNPs. Each edge represents the genotype difference between the two connected SNPs. This tree structure can be utilized to speed up the updating process for the contingency tables. A majority of the individuals are pruned and only a small portion are scanned to update the contingency tables. This is advantageous in human study, which usually involves thousands of individuals. Extensive experimental results demonstrate the efficiency of the TEAM algorithm.</p>
        <p>As a summary of the exact two-locus algorithms, FastANOVA and FastChi are designed for specific tests and binary genotype data. The COE algorithm is a more general method that can be applied to all convex tests. The TEAM algorithm is more suitable for large sample human GWAS.</p>
      </sec>
    </sec>
    <sec id="s5">
      <title>4. Multifactor Dimensionality Reduction</title>
      <p>Multifactor dimensionality reduction (MDR) <xref ref-type="bibr" rid="pcbi.1002828-Ritchie1">[20]</xref> is a data mining method to identify interactions among discrete variables for binary outcomes. It can be used to detect high-order gene-gene and gene-environment interactions in case-control studies. By pooling multi-locus SNPs into two groups, one classified as high-risk and the other classified as low risk, MDR effectively reduces the predictors from <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e373" xlink:type="simple"/></inline-formula> dimensions to one dimension. Then, the one-dimensional variable is evaluated through cross-validation. The steps are repeated for all other <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e374" xlink:type="simple"/></inline-formula> factor combinations, and the factor model which has the lowest prediction error is chosen as the ‘best’ <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e375" xlink:type="simple"/></inline-formula> factor model. Its detailed steps are as follows:</p>
      <list list-type="bullet">
        <list-item>
          <p>Divide the set of factors into 10 equal subsets.</p>
        </list-item>
        <list-item>
          <p>Select a set of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e376" xlink:type="simple"/></inline-formula> factors from the pool of all factors in the training set</p>
        </list-item>
        <list-item>
          <p>Create a contingency table for these <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e377" xlink:type="simple"/></inline-formula> factors by counting the number of cases and controls in each combination.</p>
        </list-item>
        <list-item>
          <p>Compute the case-control ratio in each combination. Label them as “high-risk if it is greater than a certain threshold, and otherwise, it is marked as “low-risk”.</p>
        </list-item>
        <list-item>
          <p>Use the labels to classify individuals. Compute the misclassification rate.</p>
        </list-item>
        <list-item>
          <p>Repeat previous steps for all combinations of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e378" xlink:type="simple"/></inline-formula> factors across 10 training and testing subsets.</p>
        </list-item>
        <list-item>
          <p>Choose the model whose average misclassification rate is minimized and cross-validation consistency is maximized as the “best” model.</p>
        </list-item>
      </list>
      <p>MDR designs a constructive induction method that combines two or more SNPs before testing for association. The power of the MDR approach is that it can be combined with other methodologies including the ones described in this chapter.</p>
    </sec>
    <sec id="s6">
      <title>5. Logistic Regression</title>
      <p>Logistic regression is a statistical method for predicting binary and categorical outcome. It is widely used in GWAS <xref ref-type="bibr" rid="pcbi.1002828-Cordell1">[21]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-Wason1">[22]</xref>. The basic idea is to use linear regression to model the probability of the occurrence of a specific outcome. Logistic regression is applicable to both single-locus and multi-locus association studies and can incorporate covariates and other factors in the model.</p>
      <p>Let <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e379" xlink:type="simple"/></inline-formula> be a binary variable representing disease status (diseased verses non diseased), and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e380" xlink:type="simple"/></inline-formula> be a SNP. The conditional probability of having the disease given a SNP is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e381" xlink:type="simple"/></inline-formula>. We define the logit function to convert the range of the probability from <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e382" xlink:type="simple"/></inline-formula> to <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e383" xlink:type="simple"/></inline-formula><disp-formula id="pcbi.1002828.e384"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e384" xlink:type="simple"/></disp-formula>The logit can be considered as a latent continuous variable that will be fit to a linear predictor function:<disp-formula id="pcbi.1002828.e385"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e385" xlink:type="simple"/></disp-formula>To cope with multiple SNP loci and potential covariates, we can modify the above model. For example, in the following model the logit is fit with predictors of SNPs (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e386" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e387" xlink:type="simple"/></inline-formula>) and covariates (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e388" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e389" xlink:type="simple"/></inline-formula>):<disp-formula id="pcbi.1002828.e390"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e390" xlink:type="simple"/></disp-formula>Although logistic regression can handle complicated models, it may be computationally demanding when the number of predictors is large <xref ref-type="bibr" rid="pcbi.1002828-Yang1">[23]</xref>.</p>
    </sec>
    <sec id="s7">
      <title>6. Summary</title>
      <p>The potential of genome-wide association study for the identification of genetic variants that underlying phenotypic variations is well recognized. The availability of large SNP data generated by high-throughput genotyping methods poses great computational and statistical challenges. In this chapter, we have discussed serval computational approaches to detect associations between genetic markers and the phenotypes. For further readings, the readers are encouraged to refer to <xref ref-type="bibr" rid="pcbi.1002828-Balding1">[11]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-Hirschhorn1">[7]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-Hoh1">[24]</xref>, <xref ref-type="bibr" rid="pcbi.1002828-Musani1">[25]</xref> for discussions about current progress and challenges in large-scale genetic association studies.</p>
    </sec>
    <sec id="s8">
      <title>7. Exercises</title>
      <p><bold>Question 1:</bold> The table below contains binary genotype and case-control phenotype data from ten individuals. Give the contingency table and use <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e391" xlink:type="simple"/></inline-formula> test to compute the association test score.<disp-formula id="pcbi.1002828.e392"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e392" xlink:type="simple"/></disp-formula></p>
      <p><bold>Question 2:</bold> Assuming that we have the following SNP and phenotype data, is the SNP significantly associated with the phenotype? Here, we represent each SNP site as the number of minor alleles on that locus, so 0 and 2 are for major and minor homozygous sites, respectively, and 1 is for the heterozygous sites. We also assume that minor alleles contribute to the phenotype and the effect is additive. In other words, the effect from a minor homozygous site should be twice as large as that from a heterozygous site. You may use any test methods introduced in the chapter. How about permutation tests?<disp-formula id="pcbi.1002828.e393"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e393" xlink:type="simple"/></disp-formula></p>
      <p><bold>Question 3:</bold> Categorize the following methods in the table. The methods are <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002828.e394" xlink:type="simple"/></inline-formula> test, G-test, ANOVA, Student's T-test, Pearson's correlation, linear regression, logistic regression.<disp-formula id="pcbi.1002828.e395"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002828.e395" xlink:type="simple"/></disp-formula></p>
      <p><bold>Question 4:</bold> Why is it important to study multiple-locus association? What are the challenges?</p>
      <p>Answers to the Exercises can be found in <xref ref-type="supplementary-material" rid="pcbi.1002828.s001">Text S1</xref>.</p>
      <boxed-text id="pcbi-1002828-box002" position="float">
        <sec id="s8a1">
          <title>Further Reading</title>
          <list list-type="bullet">
            <list-item>
              <p>Cantor RM, Lange K, Sinsheimer JS (2008) Prioritizing GWAS results: a review of statistical methods and recommendations for their application. Nat Rev Genet 9(11): 855–867.</p>
            </list-item>
            <list-item>
              <p>Cordell HJ (2009) Detecting gene-gene interactions that underlie human diseases. Nat Rev Genet 10(6): 392–404.</p>
            </list-item>
            <list-item>
              <p>Manolio TA, Collins FS, Cox NJ, Goldstein DB, Hindorff LA, et al. (2009) Finding the missing heritability of complex diseases. Nature 461(7265): 747–753.</p>
            </list-item>
            <list-item>
              <p>Moore JH, Williams SM (2009) Epistasis and its implications for personal genetics. Am J Hum Genet 85(3): 309–320.</p>
            </list-item>
            <list-item>
              <p>Phillips PC (2010) Epistasis - the essential role of gene interactions in the structure and evolution of genetic systems. Am J Hum Genet 86(1): 6–22.</p>
            </list-item>
            <list-item>
              <p>Wang K, Li M, Hakonarson H (2010) Analysing biological pathways in genome-wide association studies. Nat Rev Genet 11: 843–854.</p>
            </list-item>
          </list>
        </sec>
      </boxed-text>
    </sec>
    <sec id="s9">
      <title>Supporting Information</title>
      <supplementary-material id="pcbi.1002828.s001" mimetype="application/pdf" xlink:href="info:doi/10.1371/journal.pcbi.1002828.s001" position="float" xlink:type="simple">
        <label>Text S1</label>
        <caption>
          <p>Answers to Exercises</p>
          <p>(PDF)</p>
        </caption>
      </supplementary-material>
    </sec>
  </body>
  <back>
    <ref-list>
      <title>References</title>
      <ref id="pcbi.1002828-Churchill1">
        <label>1</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Churchill</surname><given-names>GA</given-names></name>, <name name-style="western"><surname>Airey</surname><given-names>DC</given-names></name>, <name name-style="western"><surname>Allayee</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Angel</surname><given-names>JM</given-names></name>, <name name-style="western"><surname>Attie</surname><given-names>AD</given-names></name>, <etal>et al</etal>. (<year>2004</year>) <article-title>The collaborative cross, a community resource for the genetic analysis of complex traits</article-title>. <source>Nat Genet</source> <volume>36</volume>: <fpage>1133</fpage>–<lpage>1137</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-The1">
        <label>2</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><collab xlink:type="simple">The International HapMap Consortium</collab> (<year>2003</year>) <article-title>The international hapmap project</article-title>. <source>Nature</source> <volume>426</volume><supplement>(6968)</supplement>: <fpage>789</fpage>–<lpage>796</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Saxena1">
        <label>3</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Saxena</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Voight</surname><given-names>B</given-names></name>, <name name-style="western"><surname>Lyssenko</surname><given-names>V</given-names></name>, <name name-style="western"><surname>Burtt</surname><given-names>N</given-names></name>, <name name-style="western"><surname>de Bakker</surname><given-names>P</given-names></name>, <etal>et al</etal>. (<year>2007</year>) <article-title>Genome-wide association analysis identifies loci for type 2 diabetes and triglyceride levels</article-title>. <source>Science</source> <volume>316</volume>: <fpage>1331</fpage>–<lpage>1336</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Scuteri1">
        <label>4</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Scuteri</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Sanna</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Chen</surname><given-names>W</given-names></name>, <name name-style="western"><surname>Uda</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Albai</surname><given-names>G</given-names></name>, <etal>et al</etal>. (<year>2007</year>) <article-title>Genome-wide association scan shows genetic variants in the FTO gene are associated with obesity-related traits</article-title>. <source>PLoS Genet</source> <volume>3</volume><supplement>(7)</supplement>: <fpage>e115</fpage> <comment>doi:<ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pgen.0030115" xlink:type="simple">10.1371/journal.pgen.0030115</ext-link></comment>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-The2">
        <label>5</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><collab xlink:type="simple">The Wellcome Trust Case Control Consortium</collab> (<year>2007</year>) <article-title>Genome-wide association study of 14,000 cases of seven common diseases and 3,000 shared controls</article-title>. <source>Nature</source> <volume>447</volume>: <fpage>661</fpage>–<lpage>678</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Weedon1">
        <label>6</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Weedon</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Lettre</surname><given-names>G</given-names></name>, <name name-style="western"><surname>Freathy</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Lindgren</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Voight</surname><given-names>B</given-names></name>, <etal>et al</etal>. (<year>2007</year>) <article-title>A common variant of HMGA2 is associated with adult and childhood height in the general population</article-title>. <source>Nat Genet</source> <volume>39</volume>: <fpage>1245</fpage>–<lpage>1250</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Hirschhorn1">
        <label>7</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hirschhorn</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Daly</surname><given-names>M</given-names></name> (<year>2005</year>) <article-title>Genome-wide association studies for common diseases and complex traits</article-title>. <source>Nat Rev Genet</source> <volume>6</volume>: <fpage>95</fpage>–<lpage>108</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-McCarthy1">
        <label>8</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>McCarthy</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Abecasis</surname><given-names>G</given-names></name>, <name name-style="western"><surname>Cardon</surname><given-names>L</given-names></name>, <name name-style="western"><surname>Goldstein</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Little</surname><given-names>J</given-names></name>, <etal>et al</etal>. (<year>2008</year>) <article-title>Genome-wide association studies for complex traits: consensus, uncertainty and challenges</article-title>. <source>Nat Rev Genet</source> <volume>9</volume><supplement>(5)</supplement>: <fpage>356</fpage>–<lpage>369</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Thorisson1">
        <label>9</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Thorisson</surname><given-names>GA</given-names></name>, <name name-style="western"><surname>Smith</surname><given-names>AV</given-names></name>, <name name-style="western"><surname>Krishnan</surname><given-names>L</given-names></name>, <name name-style="western"><surname>Stein</surname><given-names>LD</given-names></name> (<year>2005</year>) <article-title>The international hapmap project web site</article-title>. <source>Genome Res</source> <volume>15</volume>: <fpage>1592</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-The3">
        <label>10</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><collab xlink:type="simple">The 1000 Genomes Project Consortium</collab> (<year>2010</year>) <article-title>A map of human genome variation from population-scale sequencing</article-title>. <source>Nature</source> <volume>467</volume>: <fpage>1061</fpage>–<lpage>1073</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Balding1">
        <label>11</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Balding</surname><given-names>DJ</given-names></name> (<year>2006</year>) <article-title>A tutorial on statistical methods for population association studies</article-title>. <source>Nat Rev Genet</source> <volume>7</volume><supplement>(10)</supplement>: <fpage>781</fpage>–<lpage>791</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Samani1">
        <label>12</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Samani</surname><given-names>NJ</given-names></name>, <name name-style="western"><surname>Erdmann</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Hall</surname><given-names>AS</given-names></name>, <name name-style="western"><surname>Hengstenberg</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Mangino</surname><given-names>M</given-names></name>, <etal>et al</etal>. (<year>2007</year>) <article-title>Genomewide association analysis of coronary artery disease</article-title>. <source>N Engl J Med</source> <volume>357</volume>: <fpage>443</fpage>–<lpage>453</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Westfall1">
        <label>13</label>
        <mixed-citation publication-type="other" xlink:type="simple">Westfall PH, Young SS (1993) Resampling-based multiple testing. Wiley: New York.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Benjamini1">
        <label>14</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Benjamini</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Hochberg</surname><given-names>Y</given-names></name> (<year>1995</year>) <article-title>Controlling the false discovery rate: a practical and powerful approach to multiple testing</article-title>. <source>J R Stat Soc Series B Stat Methodol</source> <volume>57</volume><supplement>(1)</supplement>: <fpage>289</fpage>–<lpage>300</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Zhang1">
        <label>15</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname><given-names>X</given-names></name>, <name name-style="western"><surname>Zou</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Wang</surname><given-names>W</given-names></name> (<year>2008</year>) <article-title>FastANOVA: an efficient algorithm for genome-wide association study</article-title>. <source>KDD</source> <volume>2008</volume>: <fpage>821</fpage>–<lpage>829</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Zhang2">
        <label>16</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname><given-names>X</given-names></name>, <name name-style="western"><surname>Zou</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Wang</surname><given-names>W</given-names></name> (<year>2009</year>) <article-title>FastChi: an effcient algorithm for analyzing gene-gene interactions</article-title>. <source>PSB</source> <volume>2009</volume>: <fpage>528</fpage>–<lpage>539</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Zhang3">
        <label>17</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname><given-names>X</given-names></name>, <name name-style="western"><surname>Pan</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Xie</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Zou</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Wang</surname><given-names>W</given-names></name> (<year>2010</year>) <article-title>COE: a general approach for efficient genome-wide two-locus epistatic test in disease association study</article-title>. <source>J Comput Biol</source> <volume>17</volume><supplement>(3)</supplement>: <fpage>401</fpage>–<lpage>415</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Zhang4">
        <label>18</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname><given-names>X</given-names></name>, <name name-style="western"><surname>Huang</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Zou</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Wang</surname><given-names>W</given-names></name> (<year>2010</year>) <article-title>TEAM: Efficient two-locus epistasis tests in human genome-wide association study</article-title>. <source>Bioinformatics</source> <volume>26</volume><supplement>(12)</supplement>: <fpage>217</fpage>–<lpage>227</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Cormen1">
        <label>19</label>
        <mixed-citation publication-type="other" xlink:type="simple">Cormen TH, Leiserson CE, Rivest RL, Stein C (2001) Introduction to algorithms. MIT Press and McGraw-Hill.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Ritchie1">
        <label>20</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ritchie</surname><given-names>MD</given-names></name>, <name name-style="western"><surname>Hahn</surname><given-names>LW</given-names></name>, <name name-style="western"><surname>Roodi</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Bailey</surname><given-names>LR</given-names></name>, <name name-style="western"><surname>Dupont</surname><given-names>WD</given-names></name>, <etal>et al</etal>. (<year>2001</year>) <article-title>Multifactor-dimensionality reduction reveals high-order interactions among estrogen-metabolism genes in sporadic breast cancer</article-title>. <source>Am J Hum Genet</source> <volume>69</volume>: <fpage>138</fpage>–<lpage>147</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Cordell1">
        <label>21</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cordell</surname><given-names>HJ</given-names></name> (<year>2002</year>) <article-title>Epistasis: what it means, what it doesn't mean, and statistical methods to detect it in humans</article-title>. <source>Hum Mol Genet</source> <volume>11</volume>: <fpage>2463</fpage>–<lpage>2468</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Wason1">
        <label>22</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wason</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Dudbridge</surname><given-names>F</given-names></name> (<year>2010</year>) <article-title>Comparison of multimarker logistic regression models, with application to a genomewide scan of schizophrenia</article-title>. <source>BMC Genet</source> <volume>11</volume>: <fpage>80</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Yang1">
        <label>23</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yang</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Wan</surname><given-names>X</given-names></name>, <name name-style="western"><surname>Yang</surname><given-names>Q</given-names></name>, <name name-style="western"><surname>Xue</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Tang</surname><given-names>N</given-names></name>, <etal>et al</etal>. (<year>2011</year>) <article-title>A hidden two- locus disease association pattern in genome-wide association studies</article-title>. <source>BMC Bioinformatics</source> <volume>12</volume>: <fpage>156</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Hoh1">
        <label>24</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hoh</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Ott</surname><given-names>J</given-names></name> (<year>2003</year>) <article-title>Mathematical multi-locus approaches to localizing complex human trait genes</article-title>. <source>Nat Rev Genet</source> <volume>4</volume>: <fpage>701</fpage>–<lpage>709</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002828-Musani1">
        <label>25</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Musani</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Shriner</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Liu</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Feng</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Coffey</surname><given-names>C</given-names></name>, <etal>et al</etal>. (<year>2007</year>) <article-title>Detection of gene×gene interactions in genome-wide association studies of human population data</article-title>. <source>Hum Hered</source> <volume>63</volume><supplement>(2)</supplement>: <fpage>67</fpage>–<lpage>84</lpage>.</mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>