<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD Journal Publishing DTD v3.0 20080202//EN" "http://dtd.nlm.nih.gov/publishing/3.0/journalpublishing3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="3.0" xml:lang="en">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="nlm-ta">PLoS Genet</journal-id>
<journal-id journal-id-type="pmc">plosgen</journal-id><journal-title-group>
<journal-title>PLoS Genetics</journal-title></journal-title-group>
<issn pub-type="ppub">1553-7390</issn>
<issn pub-type="epub">1553-7404</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, USA</publisher-loc></publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">PGENETICS-D-13-03484</article-id>
<article-id pub-id-type="doi">10.1371/journal.pgen.1004377</article-id>
<article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="Discipline-v2"><subject>Biology and life sciences</subject><subj-group><subject>Evolutionary biology</subject><subj-group><subject>Population genetics</subject></subj-group></subj-group><subj-group><subject>Genetics</subject></subj-group></subj-group></article-categories>
<title-group>
<article-title>Insights into the Genetic Structure and Diversity of 38 South Asian Indians from Deep Whole-Genome Sequencing</article-title>
<alt-title alt-title-type="running-head">Whole-Genome Sequencing of South Asian Indians</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple"><name name-style="western"><surname>Wong</surname><given-names>Lai-Ping</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple"><name name-style="western"><surname>Lai</surname><given-names>Jason Kuan-Han</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Saw</surname><given-names>Woei-Yuh</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Ong</surname><given-names>Rick Twee-Hee</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Cheng</surname><given-names>Anthony Youzhi</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Pillai</surname><given-names>Nisha Esakimuthu</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Liu</surname><given-names>Xuanyao</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref><xref ref-type="aff" rid="aff2"><sup>2</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Xu</surname><given-names>Wenting</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Chen</surname><given-names>Peng</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Foo</surname><given-names>Jia-Nee</given-names></name><xref ref-type="aff" rid="aff3"><sup>3</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Tan</surname><given-names>Linda Wei-Lin</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Koo</surname><given-names>Seok-Hwee</given-names></name><xref ref-type="aff" rid="aff4"><sup>4</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Soong</surname><given-names>Richie</given-names></name><xref ref-type="aff" rid="aff5"><sup>5</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Wenk</surname><given-names>Markus Rene</given-names></name><xref ref-type="aff" rid="aff6"><sup>6</sup></xref><xref ref-type="aff" rid="aff7"><sup>7</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Lim</surname><given-names>Wei-Yen</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Khor</surname><given-names>Chiea-Chuen</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref><xref ref-type="aff" rid="aff3"><sup>3</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Little</surname><given-names>Peter</given-names></name><xref ref-type="aff" rid="aff8"><sup>8</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Chia</surname><given-names>Kee-Seng</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Teo</surname><given-names>Yik-Ying</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref><xref ref-type="aff" rid="aff2"><sup>2</sup></xref><xref ref-type="aff" rid="aff3"><sup>3</sup></xref><xref ref-type="aff" rid="aff8"><sup>8</sup></xref><xref ref-type="aff" rid="aff9"><sup>9</sup></xref><xref ref-type="corresp" rid="cor1"><sup>*</sup></xref></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><addr-line>Saw Swee Hock School of Public Health, National University of Singapore, Singapore</addr-line></aff>
<aff id="aff2"><label>2</label><addr-line>NUS Graduate School for Integrative Science and Engineering, National University of Singapore, Singapore</addr-line></aff>
<aff id="aff3"><label>3</label><addr-line>Genome Institute of Singapore, Agency for Science, Technology and Research, Singapore</addr-line></aff>
<aff id="aff4"><label>4</label><addr-line>Pharmacogenetics Laboratory, National University of Singapore, Singapore</addr-line></aff>
<aff id="aff5"><label>5</label><addr-line>Cancer Science Institute of Singapore, National University of Singapore, Singapore</addr-line></aff>
<aff id="aff6"><label>6</label><addr-line>Department of Biochemistry, National University of Singapore, Singapore</addr-line></aff>
<aff id="aff7"><label>7</label><addr-line>Department of Biological Sciences, National University of Singapore, Singapore</addr-line></aff>
<aff id="aff8"><label>8</label><addr-line>Life Sciences Institute, National University of Singapore, Singapore</addr-line></aff>
<aff id="aff9"><label>9</label><addr-line>Department of Statistics and Applied Probability, National University of Singapore, Singapore</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple"><name name-style="western"><surname>Williams</surname><given-names>Scott M.</given-names></name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/></contrib>
</contrib-group>
<aff id="edit1"><addr-line>Dartmouth College, United States of America</addr-line></aff>
<author-notes>
<corresp id="cor1">* E-mail: <email xlink:type="simple">statyy@nus.edu.sg</email></corresp>
<fn fn-type="conflict"><p>The authors have declared that no competing interests exist.</p></fn>
<fn fn-type="con"><p>Conceived and designed the experiments: YYT KSC PL CCK RS MRW. Performed the experiments: LWLT WX WYL SHK. Analyzed the data: LPW JKHL WYS AYC NEP XL PC RTHO JNF. Wrote the paper: YYT LPW JKHL.</p></fn>
</author-notes>
<pub-date pub-type="collection"><month>5</month><year>2014</year></pub-date>
<pub-date pub-type="epub"><day>15</day><month>5</month><year>2014</year></pub-date>
<volume>10</volume>
<issue>5</issue>
<elocation-id>e1004377</elocation-id>
<history>
<date date-type="received"><day>19</day><month>12</month><year>2013</year></date>
<date date-type="accepted"><day>28</day><month>3</month><year>2014</year></date>
</history>
<permissions>
<copyright-year>2014</copyright-year>
<copyright-holder>Wong et al</copyright-holder><license xlink:type="simple"><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p></license></permissions>
<abstract>
<p>South Asia possesses a significant amount of genetic diversity due to considerable intergroup differences in culture and language. There have been numerous reports on the genetic structure of Asian Indians, although these have mostly relied on genotyping microarrays or targeted sequencing of the mitochondria and Y chromosomes. Asian Indians in Singapore are primarily descendants of immigrants from Dravidian-language–speaking states in south India, and 38 individuals from the general population underwent deep whole-genome sequencing with a target coverage of 30X as part of the Singapore Sequencing Indian Project (SSIP). The genetic structure and diversity of these samples were compared against samples from the Singapore Sequencing Malay Project and populations in Phase 1 of the 1,000 Genomes Project (1 KGP). SSIP samples exhibited greater intra-population genetic diversity and possessed higher heterozygous-to-homozygous genotype ratio than other Asian populations. When compared against a panel of well-defined Asian Indians, the genetic makeup of the SSIP samples was closely related to South Indians. However, even though the SSIP samples clustered distinctly from the Europeans in the global population structure analysis with autosomal SNPs, eight samples were assigned to mitochondrial haplogroups that were predominantly present in Europeans and possessed higher European admixture than the remaining samples. An analysis of the relative relatedness between SSIP with two archaic hominins (Denisovan, Neanderthal) identified higher ancient admixture in East Asian populations than in SSIP. The data resource for these samples is publicly available and is expected to serve as a valuable complement to the South Asian samples in Phase 3 of 1 KGP.</p>
</abstract>
<abstract abstract-type="summary"><title>Author Summary</title>
<p>Indians of South Asia has long been a population of interest to a wide audience, due to its unique diversity. We have deep-sequenced 38 individuals of Indian descent residing in Singapore (SSIP) in an effort to illustrate their diversity from a whole-genome standpoint. Indeed, among Asians in our population panel, SSIP was most diverse, followed by the Malays in Singapore (SSMP). Their diversity is further observed in the population's chromosome Y haplogroup and mitochondria haplogroup profiles; individuals with European-dominant haplogroups had greater proportion of European admixture. Among variants (single nucleotide polymorphism and small insertions/deletions) discovered in SSIP, 21.69% were novel with respect to previous sequencing projects. In addition, some 14 loss-of-function variants (LOFs) were associated to cancer, Type II diabetes, and cholesterol levels. Finally, D statistic test with ancient hominids concurred that there was gene flow to East Asians compared to South Asians.</p>
</abstract>
<funding-group><funding-statement>We acknowledge the support from the National Research Foundation (NRF-RF-2010-05) and the Singapore Biomedical Research Council (05/1/21/19/425 and 11/1/21/18/678). We also acknowledge the support of the Yong Loo Lin School of Medicine, the National University Health System, the Life Sciences Institute and the Office of Deputy President (Research and Technology) from the National University of Singapore. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement></funding-group><counts><page-count count="15"/></counts></article-meta>
</front>
<body><sec id="s1">
<title>Introduction</title>
<p>Next-generation sequencing (NGS) technologies have enabled an entire genome to be sequenced in a cost-effective manner <xref ref-type="bibr" rid="pgen.1004377-Mardis1">[1]</xref>, and this has allowed multiple individuals from a population to be surveyed in order to catalogue genetic variants that are present in the population. By adopting an unbiased approach to survey the whole genome, NGS presents a more comprehensive catalogue of different classes of genetic variants with a single assay: from changes that affect only a single base in the genome (single nucleotide polymorphisms, SNPs), to small-size additions and omissions (insertion-deletions, indels); and to larger contiguous changes in the genome that affect either the number of copies of a stretch of genome or differences in the genomic structure (structural variants, SVs). Prior to the advent of NGS, the knowledge of these different classes of variants at the population level has been derived primarily from surveys using pre-designed microarrays, and where the majority focused on SNP variations between populations.</p>
<p>The 1000 Genomes Project (1 KGP) intends to survey more than 2,500 individuals from at least 20 populations around the world <xref ref-type="bibr" rid="pgen.1004377-Abecasis1">[2]</xref>, and Phase 1 of the project has already offered valuable insights into the population genetics of 14 populations. While the 1 KGP adopted the approach of sequencing multiple individuals at a lower sequence depth of 2-6X, a recently concluded project in Southeast Asia sequenced 100 Austronesian Malays (the Singapore Sequencing Malay Project, SSMP) at a coverage of at least 30X examined the merits of deep sequencing for a more complete characterization of variants carried by the individuals <xref ref-type="bibr" rid="pgen.1004377-Wong1">[3]</xref>. The use of NGS has also been successfully extended to sequence ancient hominids such as the Neanderthals and the Denisovans <xref ref-type="bibr" rid="pgen.1004377-Meyer1">[4]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Green1">[5]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Reich1">[6]</xref>, and from well-preserved human tissues from the Ötzi <xref ref-type="bibr" rid="pgen.1004377-Keller1">[7]</xref> and an aboriginal Australian <xref ref-type="bibr" rid="pgen.1004377-DESA1">[8]</xref>.</p>
<p>South Asia comprises more than 20% of the total world population, of which the majority resides in India <xref ref-type="bibr" rid="pgen.1004377-DESA1">[8]</xref>. The contemporary demographic makeup of the Indian subcontinent is considerably heterogeneous and is the result of complex human migration and interaction since the first human dispersal out of Africa between 60,000 to 75,000 years ago. It was first proposed from the analysis of mitochondria sequences that a single rapid coastal dispersion happened from the Horn of Africa into Southeast Asia and Australasia through the Indian subcontinent <xref ref-type="bibr" rid="pgen.1004377-Macaulay1">[9]</xref>, and which corroborated with additional analyses of mitochondria in the Andaman and Nicobar Islands <xref ref-type="bibr" rid="pgen.1004377-Thangaraj1">[10]</xref>. Genome-wide genotyping surveys of geographically well-defined South Asians indicated the presence of complex admixture between populations in the Indian subcontinent <xref ref-type="bibr" rid="pgen.1004377-Reich2">[11]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Moorjani1">[12]</xref>, which have been proposed to be attributed to the practice of the caste system which encouraged endogamous marriages and the presence of different ethno-linguistic groups – the Indo-Aryan language speaking groups that are primarily found in north India, and the Dravidian language speaking groups that are predominantly found in south India.</p>
<p>Singapore is home to more than 350,000 Indians, comprising 9.2% of the residing population <xref ref-type="bibr" rid="pgen.1004377-Singstat1">[13]</xref>. These people with ancestry originating from the Indian subcontinent has been designated officially and uniformly as “Indian”, although this can be inadequate given the heterogeneous background of the Indian populace in Singapore, which includes Tamils, Malayalee, Sikh, Hindustani, Punjabi, Sindhi, Hindi, Gujarati, Urdu and Sinhalese <xref ref-type="bibr" rid="pgen.1004377-Leow1">[14]</xref>. The majority of the Singapore Indians were descended from immigrants from south India who settled in the country after Singapore became a major entrepot trading center in the early 19<sup>th</sup> century <xref ref-type="bibr" rid="pgen.1004377-Periasamy1">[15]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Turnbull1">[16]</xref>.</p>
<p>The SSIP aims to perform whole-genome sequencing of 38 healthy Singapore Indians, to provide insights into the genetic structure and diversity of Asian Indians in Singapore. With a target sequence depth of 30X, the SSIP presents another public resource of deep whole genome sequencing of multiple samples in a well-defined population, as with the SSMP. Here, we evaluated the genetic proximity of the SSIP to the SSMP and 14 populations from Phase 1 of the 1 KGP, and measured the degree of intra-population genetic diversity in each of the 16 populations. The ancestral origins of the SSIP samples were inferred by evaluating the mitochondria and chromosome Y haplogroup memberships of the relevant samples, and we performed an analysis of the relative relatedness between SSIP and two archaic hominins (Denisovan, Neanderthal). The genetic resource for the SSIP is publicly available at <ext-link ext-link-type="uri" xlink:href="http://www.statgen.nus.edu.sg/~SSIP" xlink:type="simple">http://www.statgen.nus.edu.sg/~SSIP</ext-link>.</p>
</sec><sec id="s2">
<title>Results</title>
<sec id="s2a">
<title>Sequence data assembly, alignment and quality control</title>
<p>South Asian Indians from the Singapore Population Health Study were sampled to be whole-genome sequenced with the Illumina HiSeq 2000 to a target 30-fold coverage, using paired-end sequencing with 100 basepairs (bp) reads and a target insert size of between 300 bp and 400 bp (<xref ref-type="supplementary-material" rid="pgen.1004377.s001">Figure S1</xref>). This was performed on 38 subjects, comprising 26 females and 12 males, where ethnic membership for each sample was confirmed through verbal reconfirmation that all four grandparents were similarly of South Asian descent. A total of 56 billion paired-end reads were generated for the 38 samples, of which 42.7 billion reads were properly paired and passed quality assessment (<xref ref-type="supplementary-material" rid="pgen.1004377.s002">Figure S2</xref>). Sequence reads were mapped to the NCBI build 37 reference genome with Consensus Assessment of Sequence And VAriation (CASAVA v1.9), and variant calling for SNPs and indels were performed with both the single-sample caller CASAVA and the multi-sample caller Genome Analysis Toolkit (GATK) (<xref ref-type="supplementary-material" rid="pgen.1004377.s003">Figure S3</xref>). All but one of the 38 SSIP samples (SSI007) achieved median sequence depths greater than 30X (<xref ref-type="supplementary-material" rid="pgen.1004377.s004">Figure S4</xref>). To assess the accuracy of the self-reported population membership, a principal component analysis (PCA) was carried out on the 38 SSIP samples and 268 samples from the Singapore Genome Variation Project (SGVP), where one sample (SSI016) clearly clustered with the Malays instead of the Indians (<xref ref-type="supplementary-material" rid="pgen.1004377.s005">Figure S5</xref>). We thus excluded both SSI007 and SSI016 from further analyses, and the remaining samples consisted of 25 females and 11 males.</p>
</sec><sec id="s2b">
<title>SNP discovery and annotation</title>
<p>The data release for SSIP consisted of the consensus set of SNPs and indels called by both CASAVA and GATK to minimize false discovery. As each sample was also genotyped on the Illumina Omni2.5 M microarray, we compared the concordance of the genotype calls made by CASAVA and GATK with the genotypes at the same SNPs on the Omni2.5 M. CASAVA was observed to produce genotype calls with a higher concordance with the Omni2.5 M genotypes than GATK (<xref ref-type="supplementary-material" rid="pgen.1004377.s006">Figure S6</xref>), we subsequently retained the CASAVA genotypes release in the SSIP VCFs and downstream analyses.</p>
<p>A total of 10,305,409 SNPs, 1,269,000 indels and 56,088 large deletions were detected in the 36 SSIP samples (<xref ref-type="table" rid="pgen-1004377-t001">Table 1</xref>), where 20.02%, 32.90% and 76.67% respectively were found to be novel (defined as not present in dbSNP137, 1 KGP, the SSMP or the Database of Genomic Variants (DGV) accordingly, <xref ref-type="fig" rid="pgen-1004377-g001">Figure 1</xref>). While the vast majority of the SNPs discovered were bi-allelic, there were 18,904 tri-allelic and 27 quad-allelic SNPs, of which 5.4% of these were not previously known. The transition-to-transversion (Ts/Tv) ratio for the bi-allelic SNPs was 2.14 across the whole genome, and was significantly higher at 3.24 if restricted to only the exonic SNPs (<xref ref-type="supplementary-material" rid="pgen.1004377.s019">Table S3</xref>) due to the over-representation of CpG sites in the exome (calculated in SSIP at 32.6% in the exome, and 17.2% across the genome). With the available sample size, each variant is classified according to the alternative allele frequency (AAF) as either low-frequency (AAF&lt;5%) or common (AAF≥5%). The majority of the known bi-allelic SNPs were common while the opposite was observed for novel SNPs, where the majority was low-frequency in nature (<xref ref-type="table" rid="pgen-1004377-t001">Table 1</xref>).</p>
<fig id="pgen-1004377-g001" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.g001</object-id><label>Figure 1</label><caption>
<title>Size distribution and novelty of variants in SSIP.</title>
<p>Autosomal variants identified in the 36 SSIP samples, which included single nucleotide polymorphisms (SNPs), small insertion/deletions (indels) between 2 bp to 50 bp, and large deletions between 51 bp to 1 Mb. The SSIP SNPs and indels are defined as novel if they are not present in SSMP and dbSNP137, whereas dbSNP132 was used for defining the novelty of the 1 KGP SNPs and indels. The novelty of large deletions in SSIP and 1 KGP is defined with respect to SSMP and DGV release 2013-07-23.</p>
</caption><graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.g001" position="float" xlink:type="simple"/></fig><table-wrap id="pgen-1004377-t001" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.t001</object-id><label>Table 1</label><caption>
<title>Summary of variants discovered in SSIP.</title>
</caption><alternatives><graphic id="pgen-1004377-t001-1" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.t001" xlink:type="simple"/>
<table><colgroup span="1"><col align="left" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/></colgroup>
<thead>
<tr>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1">NOVEL<xref ref-type="table-fn" rid="nt101">*</xref></td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1">KNOWN</td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1">ALL</td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
</tr>
</thead>
<tbody>
<tr>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"><bold>Low</bold></td>
<td align="left" rowspan="1" colspan="1"><bold>Common</bold></td>
<td align="left" rowspan="1" colspan="1"><bold>Subtotal</bold></td>
<td align="left" rowspan="1" colspan="1"><bold>Low</bold></td>
<td align="left" rowspan="1" colspan="1"><bold>Common</bold></td>
<td align="left" rowspan="1" colspan="1"><bold>Subtotal</bold></td>
<td align="left" rowspan="1" colspan="1"><bold>Low</bold></td>
<td align="left" rowspan="1" colspan="1"><bold>Common</bold></td>
<td align="left" rowspan="1" colspan="1"><bold>Total</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Bi-allelic SNPs</bold></td>
<td align="left" rowspan="1" colspan="1">2,036,480</td>
<td align="left" rowspan="1" colspan="1">54,888</td>
<td align="left" rowspan="1" colspan="1">2,091,368</td>
<td align="left" rowspan="1" colspan="1">2,204,240</td>
<td align="left" rowspan="1" colspan="1">5,990,870</td>
<td align="left" rowspan="1" colspan="1">8,195,110</td>
<td align="left" rowspan="1" colspan="1">4,240,720</td>
<td align="left" rowspan="1" colspan="1">6,045,758</td>
<td align="left" rowspan="1" colspan="1">10,286,478</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>High</bold></td>
<td align="left" rowspan="1" colspan="1">323</td>
<td align="left" rowspan="1" colspan="1">3</td>
<td align="left" rowspan="1" colspan="1">326</td>
<td align="left" rowspan="1" colspan="1">260</td>
<td align="left" rowspan="1" colspan="1">342</td>
<td align="left" rowspan="1" colspan="1">602</td>
<td align="left" rowspan="1" colspan="1">583</td>
<td align="left" rowspan="1" colspan="1">345</td>
<td align="left" rowspan="1" colspan="1">928</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Moderate</bold></td>
<td align="left" rowspan="1" colspan="1">9,967</td>
<td align="left" rowspan="1" colspan="1">115</td>
<td align="left" rowspan="1" colspan="1">10,082</td>
<td align="left" rowspan="1" colspan="1">11,120</td>
<td align="left" rowspan="1" colspan="1">16,176</td>
<td align="left" rowspan="1" colspan="1">27,296</td>
<td align="left" rowspan="1" colspan="1">21,087</td>
<td align="left" rowspan="1" colspan="1">16,291</td>
<td align="left" rowspan="1" colspan="1">37,378</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Low</bold></td>
<td align="left" rowspan="1" colspan="1">7,337</td>
<td align="left" rowspan="1" colspan="1">96</td>
<td align="left" rowspan="1" colspan="1">7,433</td>
<td align="left" rowspan="1" colspan="1">10,369</td>
<td align="left" rowspan="1" colspan="1">20,467</td>
<td align="left" rowspan="1" colspan="1">30,836</td>
<td align="left" rowspan="1" colspan="1">17,706</td>
<td align="left" rowspan="1" colspan="1">20,563</td>
<td align="left" rowspan="1" colspan="1">38,269</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Modifier</bold></td>
<td align="left" rowspan="1" colspan="1">2,018,853</td>
<td align="left" rowspan="1" colspan="1">54,674</td>
<td align="left" rowspan="1" colspan="1">2,073,527</td>
<td align="left" rowspan="1" colspan="1">2,182,491</td>
<td align="left" rowspan="1" colspan="1">5,953,885</td>
<td align="left" rowspan="1" colspan="1">8,136,376</td>
<td align="left" rowspan="1" colspan="1">4,201,344</td>
<td align="left" rowspan="1" colspan="1">6,008,559</td>
<td align="left" rowspan="1" colspan="1">10,209,903</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>SIFT_PP2 Damage</bold></td>
<td align="left" rowspan="1" colspan="1">2,162</td>
<td align="left" rowspan="1" colspan="1">17</td>
<td align="left" rowspan="1" colspan="1">2,179</td>
<td align="left" rowspan="1" colspan="1">1,762</td>
<td align="left" rowspan="1" colspan="1">1,134</td>
<td align="left" rowspan="1" colspan="1">2,896</td>
<td align="left" rowspan="1" colspan="1">3,924</td>
<td align="left" rowspan="1" colspan="1">1,151</td>
<td align="left" rowspan="1" colspan="1">5,075</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Average number of SNPs per sample</bold></td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1">3,308,876</td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Indels</bold></td>
<td align="left" rowspan="1" colspan="1">33</td>
<td align="left" rowspan="1" colspan="1">417,117</td>
<td align="left" rowspan="1" colspan="1">417,150</td>
<td align="left" rowspan="1" colspan="1">17</td>
<td align="left" rowspan="1" colspan="1">851,833</td>
<td align="left" rowspan="1" colspan="1">851,850</td>
<td align="left" rowspan="1" colspan="1">50</td>
<td align="left" rowspan="1" colspan="1">1,268,950</td>
<td align="left" rowspan="1" colspan="1">1,269,000</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>High</bold></td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">334</td>
<td align="left" rowspan="1" colspan="1">334</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">378</td>
<td align="left" rowspan="1" colspan="1">378</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">712</td>
<td align="left" rowspan="1" colspan="1">712</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Moderate</bold></td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">247</td>
<td align="left" rowspan="1" colspan="1">247</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">343</td>
<td align="left" rowspan="1" colspan="1">343</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">590</td>
<td align="left" rowspan="1" colspan="1">590</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Low</bold></td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">0</td>
<td align="left" rowspan="1" colspan="1">0</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Modifier</bold></td>
<td align="left" rowspan="1" colspan="1">33</td>
<td align="left" rowspan="1" colspan="1">416,536</td>
<td align="left" rowspan="1" colspan="1">416,569</td>
<td align="left" rowspan="1" colspan="1">17</td>
<td align="left" rowspan="1" colspan="1">851,112</td>
<td align="left" rowspan="1" colspan="1">851,129</td>
<td align="left" rowspan="1" colspan="1">50</td>
<td align="left" rowspan="1" colspan="1">1,267,648</td>
<td align="left" rowspan="1" colspan="1">1,267,698</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Average number of indels per sample</bold></td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1">395,224</td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Structural variation (deletion)</bold></td>
<td align="left" rowspan="1" colspan="1">26,579</td>
<td align="left" rowspan="1" colspan="1">15,300</td>
<td align="left" rowspan="1" colspan="1">41,879</td>
<td align="left" rowspan="1" colspan="1">3,529</td>
<td align="left" rowspan="1" colspan="1">10,680</td>
<td align="left" rowspan="1" colspan="1">14,209</td>
<td align="left" rowspan="1" colspan="1">30,108</td>
<td align="left" rowspan="1" colspan="1">25,980</td>
<td align="left" rowspan="1" colspan="1">56,088</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1"><bold>Average number of SV per sample</bold></td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1">7,713</td>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
<td align="left" rowspan="1" colspan="1"/>
</tr>
</tbody>
</table>
</alternatives><table-wrap-foot><fn id="nt101"><label/><p>*Definition of novel SNPs and indels is made with reference to known variants in dbSNP137 and SSMP, while definition of novel structural variants is made with reference to known variants in Phase 1 of 1 KGP, SSMP and DGV.</p></fn></table-wrap-foot></table-wrap>
<p>Bi-allelic SNPs and indels were further annotated with SNPEff which catalogued each variant into one of four categories according to the predicted level of impact to protein function <xref ref-type="bibr" rid="pgen.1004377-Cingolani1">[17]</xref>. In the SSIP samples, 928 and 37,378 SNPs were classified into the high and moderate impact categories respectively (<xref ref-type="table" rid="pgen-1004377-t001">Table 1</xref>). Among these SNPs, 5,075 SNPs were predicted by both SIFT and PolyPhen v2 to be damaging to the protein product. Similarly, the majority of the indels were catalogued as modifiers with low or no impact to protein function, and only 712 and 590 indels were annotated as high and moderate impact variants respectively. Intriguingly, almost all the observed indels were present in the SSIP samples at minor allele frequencies &gt;5%.</p>
<p>When compared to the variants that have been reported by 1 KGP, there were more SNPs and indels present in 1 KGP than SSIP (<xref ref-type="fig" rid="pgen-1004377-g001">Figure 1</xref>). However, SSIP identified more deletions between the sizes of 50 bp and 1 kb than 1 KGP. This is likely attributed to the higher coverage of the sequencing, which provided greater confidence in identifying the smaller deletions that may only be identified with sufficient sequence depth.</p>
</sec><sec id="s2c">
<title>Loss-of-function variants</title>
<p>We identified 1,429 loss-of-function variants (LOFs) in the 36 SSIP samples, of which 635 LOFs were novel and 407 LOFs had AAF&gt;5% (<xref ref-type="supplementary-material" rid="pgen.1004377.s020">Table S4</xref>). On average, each sample carried 350 LOFs, which exceeded a previous report of around 100 genuine LOFs per healthy individual <xref ref-type="bibr" rid="pgen.1004377-MacArthur1">[18]</xref> but was similar to the average of 470 LOFs per sample in the SSMP <xref ref-type="bibr" rid="pgen.1004377-Wong1">[3]</xref>. A gene-set analysis using Visualization and Integrated Discovery (DAVID) identified significant enrichment of LOFs in pathways related to olfactory transduction, ATP-bind cassette (ABC) transporters and Histidine metabolism, although only the olfactory transduction pathway remained statistically significant after correcting for multiple testing with the Benjamini-Hochberg procedure (<italic>P</italic><sub>corrected</sub> = 2.8×10<sup>−3</sup>, <xref ref-type="supplementary-material" rid="pgen.1004377.s021">Table S5</xref>). The LOFs in the olfactory pathway may have emerged to perceive chemicals due to differential diet and environmental exposure that may have geographical specificity.</p>
<p>The LOFs were also mapped to the Catalogue of Somatic Mutations In Cancer (COSMIC) database with SNPnexus, and 11 variants were found to be associated with ovarian, gastrointestinal, hepatic and pancreatic cancer (<xref ref-type="supplementary-material" rid="pgen.1004377.s022">Table S6</xref>). The risk alleles at these variants generally were found at higher frequencies in the SSIP samples, although we caution against over-interpreting the significance given the small sample set in the SSIP. When checked against the genome-wide association studies (GWAS) database, 3 LOFs were reported to be associated with conduct disorder, triglyceride and high density lipoprotein cholesterol, and Type 2 diabetes (<xref ref-type="supplementary-material" rid="pgen.1004377.s023">Table S7</xref>). In particular, the LOF rs1048886 that was reported to be associated with diabetes was first established from a diabetes GWAS in Singapore Indians.</p>
</sec><sec id="s2d">
<title>Population structure of SSIP samples</title>
<p>A PCA was performed with the SSIP samples and a panel of 132 South Asians from 25 well-defined groups from the Indian subcontinent <xref ref-type="bibr" rid="pgen.1004377-Reich2">[11]</xref> in an attempt to understand the ancestral origins of the Singapore Indians. Consistent with the findings by Reich and colleagues <xref ref-type="bibr" rid="pgen.1004377-Reich2">[11]</xref>, the first principal component (PC) distinguished the Great Andamanese, Onge, Nyshi, AoNaga and Siddi samples from all other Indian subgroups (including SSIP, <xref ref-type="fig" rid="pgen-1004377-g002">Figure 2A</xref>). When samples from these five groups were removed, it was observed that SSIP samples were located with all the south Indian groups (Hallaki, Kamsali, Velama) except Chenchu (<xref ref-type="fig" rid="pgen-1004377-g002">Figures 2B, C</xref>), although there were considerable heterogeneity amongst the north Indian groups (<xref ref-type="fig" rid="pgen-1004377-g002">Figure 2C</xref>) that precluded a clear latitudinal distinction of north and south Indians. We performed admixture analysis on 104 individuals from 20 Indians groups reported by Reich and colleagues, together with the SSIP samples. The results supported the hypothesis that the SSIP individuals are genetically more homogenous to the south Indians than to the north Indians (<xref ref-type="supplementary-material" rid="pgen.1004377.s014">Figure S14</xref>).</p>
<fig id="pgen-1004377-g002" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.g002</object-id><label>Figure 2</label><caption>
<title>Principal component analysis (PCA) of SSIP samples with 132 South Asians.</title>
<p>PCA of 36 SSIP samples with 132 South Asian samples from 25 well-defined Indian groups by Reich and colleagues <xref ref-type="bibr" rid="pgen.1004377-Reich3">[44]</xref> using 202,600 SNPs that were present in both databases (panel A). Five groups corresponding to Great Andamanese, Onge, Nyshi, Aonaga and Siddi were subsequently removed, leaving 104 samples from 20 Indian groups to be analyzed in a second PCA, where the samples were first assigned a color according to their group memberships (panel B), and second by the latitude of origin into North and South Indians (panel C, see <xref ref-type="supplementary-material" rid="pgen.1004377.s018">Table S2</xref> for the classification of North and South Indians). The color assignments in panels A and B are represented by the color legend on the bottom left of the figure.</p>
</caption><graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.g002" position="float" xlink:type="simple"/></fig>
<p>A separate PCA with samples from 16 populations (SSIP, SSMP and Phase 1 of 1 KGP) was performed to place these 36 Singapore Indians amongst global populations (<xref ref-type="fig" rid="pgen-1004377-g003">Figure 3A</xref>). The first PC differentiated the SSIP samples from Africans and East Asians although the second PC was necessary to distinguished them from the Europeans. However, there appeared to be considerable diversity between the Americans that confounded the distinction between them and the SSIP samples. This was similarly evident in the F<sub>ST</sub> analyses, where the Americans were found to be most similar to the SSIP (<xref ref-type="supplementary-material" rid="pgen.1004377.s007">Figure S7</xref>). Excluding the admixed American populations, the SSIP was genetically closest to the Europeans in the panel of populations considered.</p>
<fig id="pgen-1004377-g003" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.g003</object-id><label>Figure 3</label><caption>
<title>Principal component analysis (PCA) of 1,224 samples from 16 global populations.</title>
<p>PCA of 1,224 samples from SSIP, SSMP and 14 populations from Phase 1 of the 1-coded by continents (panel A). An analysis of admixture was also performed on the 16 populations with ADMIXTURE, where the number of distinct populations (<italic>K</italic>) was allowed to vary between 2 and 8 (panel B). The black window highlights the position of the SSIP samples on the admixture plot.</p>
</caption><graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.g003" position="float" xlink:type="simple"/></fig>
<p>In an admixture analysis of the 16 populations, we observed that the SSIP were consistently inferred to be significantly admixed with the Europeans until at least six distinct groups were assumed (<italic>K</italic>≥6, <xref ref-type="fig" rid="pgen-1004377-g003">Figure 3B</xref>), where the European component stabilized to an average of 7.1%, 4.3% of Malay and 1.3% of East Asians (<xref ref-type="supplementary-material" rid="pgen.1004377.s024">Table S8</xref>).</p>
</sec><sec id="s2e">
<title>Assessing between and within population diversity</title>
<p>One measure of population diversity we investigated was the proportion of SNPs that was shared uniquely with only one other population when assessed across all 16 populations (<xref ref-type="fig" rid="pgen-1004377-g004">Figure 4A</xref>). We observed that SSIP exhibited the greatest sharing with SSMP, although this is likely to be attributed to the &gt;30X sequencing depth for both populations, since the remaining 14 populations in 1 KGP were sequenced at between 2-6X. Unsurprisingly, populations from the same continent generally exhibited higher sharing amongst themselves (<xref ref-type="fig" rid="pgen-1004377-g004">Figure 4B</xref>) and the admixed populations from the Americas tend to possess greater sharing with either the European populations or the African populations.</p>
<fig id="pgen-1004377-g004" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.g004</object-id><label>Figure 4</label><caption>
<title>Unique SNP sharing between populations.</title>
<p>(A) Each row represents the distribution of SNPs that are shared uniquely between a reference population (vertical axis) and a target population (horizontal), where the bars along the diagonal indicate the number of SNPs that are unique to the reference population. Here, unique sharing is defined as SNPs that are present only in the two respective populations but not others. (B) Distribution of SNPs in the reference population (horizontal) that are shared by only one other population, but here the target populations are grouped by continents into four broad categories of the Americas (AMR: CLM, MXL, PUR), Africans (AFR: ASW, LWK, YRI), Asian (ASN: CHB, CHS, JPT, SSMP, SSIP) and Europeans (EUR: CEU, FIN, GBR, IBS, TSI).</p>
</caption><graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.g004" position="float" xlink:type="simple"/></fig>
<p>In addition to evaluating the inter-population diversity, we also investigated intra-population diversity by measuring the degree of SNP sharing between every pair of samples in each population. This presented a distance measure <italic>D</italic> that is scaled between 0 and 1, where a higher value indicated a greater degree of heterogeneity in SNP content between the two samples (or a lower degree of SNP sharing). As expected, the African populations exhibited the highest intra-population diversity while the East Asian populations exhibited the lowest (<xref ref-type="fig" rid="pgen-1004377-g005">Figure 5</xref>). In comparison, SSIP had a median intra-population diversity that was marginally higher than the Europeans, while SSMP was between the East Asians and Europeans. Other than the median, the spread of the <italic>D</italic> metric calculation between every pair of samples is also indicative of the inter-sample diversity within a population. As expected, the American populations exhibited the largest spread due to differential degree of admixture between the samples. Excluding the Americans, SSMP exhibited the largest spread for a population, suggesting that there were considerable differences in the extent of genetic dissimilarities between the Singapore Malays, likely a reflection of the heterogeneous ancestry of the Singapore Malays. The trend in the distribution of the <italic>D</italic> metric was consistent even when the analysis was restricted to 36 samples from each population (<xref ref-type="supplementary-material" rid="pgen.1004377.s008">Figure S8</xref>) to avoid confounding due to the different number of samples in each population. Consistent results were similarly observed when the same analysis was performed on samples from seven populations in 1 KGP that were sequenced by Complete Genomics to deeper coverage of between 51X and 89X, which included the Gujarati Indians from Houston that exhibited a <italic>D</italic> distribution highly concordant to the SSIP (<xref ref-type="supplementary-material" rid="pgen.1004377.s009">Figure S9</xref>).</p>
<fig id="pgen-1004377-g005" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.g005</object-id><label>Figure 5</label><caption>
<title>Assessing intra-population diversity between the samples.</title>
<p>The extent of SNP sharing between every pair of samples in a population can be measured with a distance measure <italic>D</italic> that is scaled between 0 and 1 (vertical axis), where a higher value indicates a greater extent of heterogeneity in SNP content (or a lower degree of SNP sharing) between two samples. All possible pairwise measurements of <italic>D</italic> in each population are represented in a boxplot, where the ends of the whiskers indicate the minimum and maximum distances between specific pairs of samples in that population, the edges of the box indicates the 1<sup>st</sup> and 3<sup>rd</sup> quartiles, and the horizontal line in the box represents the median pairwise distance. The groups are colored with respect to the four continents (Americas – maroon; Africans – red; Asians – green; Europeans – blue). Each label on the horizontal axis indicates the continent label, population label, number of samples and total number of sample pairs of the population.</p>
</caption><graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.g005" position="float" xlink:type="simple"/></fig>
<p>While SSIP exhibited the highest intra-population diversity amongst the five Asian populations (CHB, CHS, JPT, SSMP, SSIP), we similarly observed the highest ratio of heterozygous genotypes to homozygous genotypes (Het/Hom) in the SSIP samples among all Asians, and the levels present in the SSIP were comparable to those in Europeans (<xref ref-type="supplementary-material" rid="pgen.1004377.s010">Figure S10</xref>). The trend in the distribution of Het/Hom remained consistent even when adjusted for sample size and sequencing coverage (<xref ref-type="supplementary-material" rid="pgen.1004377.s010">Figures S10</xref>, <xref ref-type="supplementary-material" rid="pgen.1004377.s011">S11</xref>), where Africans generally displayed Het/Hom of around 2, East Asians around 1.4, and South Asians (SSIP, GIH from Complete Genomics) and Europeans around 1.6.</p>
</sec><sec id="s2f">
<title>Mitochondria and Y chromosome haplogroup membership</title>
<p>We assigned the 36 SSIP mitochondria (mtDNA) sequences to known mitochondria haplogroups with HaploGrep <xref ref-type="bibr" rid="pgen.1004377-KlossBrandsttter1">[19]</xref>. The majority of the samples were assigned to the haplogroups M, N and R (<xref ref-type="table" rid="pgen-1004377-t002">Table 2</xref>) which were found in high frequencies in South Asian populations <xref ref-type="bibr" rid="pgen.1004377-Rajkumar1">[20]</xref>, <xref ref-type="bibr" rid="pgen.1004377-vanOven1">[21]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Chandrasekar1">[22]</xref>. However, the remaining 9 samples were assigned to haplogroups F, HV, T, U and W. Other than haplogroup F that was found mostly in East Asia, the other four haplogroups were predominantly present in European populations. In a similar analysis of chromosome Y for the 11 male SSIP samples (<xref ref-type="table" rid="pgen-1004377-t003">Table 3</xref>), eight of the Y chromosomes were assigned to haplogroups that were predominantly found in South Asia (F: primarily a south Indian Dravidian-speaking haplogroup; H: present mainly in the Indian subcontinent; L: present mainly in South and Central Asia). Two samples were assigned to haplogroups J2b2 that were present mainly in the Mediterranean and Southern Europe, while the remaining sample was assigned to R1a1, a haplogroup that is mostly present in Europe but is found at low frequencies in South Asia.</p>
<table-wrap id="pgen-1004377-t002" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.t002</object-id><label>Table 2</label><caption>
<title>Mitochondria haplogroup assignment for the 36 SSIP samples.</title>
</caption><alternatives><graphic id="pgen-1004377-t002-2" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.t002" xlink:type="simple"/>
<table><colgroup span="1"><col align="left" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/></colgroup>
<thead>
<tr>
<td align="left" rowspan="1" colspan="1">Sample</td>
<td align="left" rowspan="1" colspan="1">Haplogroup</td>
<td align="left" rowspan="1" colspan="1">Sample</td>
<td align="left" rowspan="1" colspan="1">Haplogroup</td>
<td align="left" rowspan="1" colspan="1">Sample</td>
<td align="left" rowspan="1" colspan="1">Haplogroup</td>
</tr>
</thead>
<tbody>
<tr>
<td align="left" rowspan="1" colspan="1">SSI003</td>
<td align="left" rowspan="1" colspan="1">M30d</td>
<td align="left" rowspan="1" colspan="1">SSI017</td>
<td align="left" rowspan="1" colspan="1">T2a1a</td>
<td align="left" rowspan="1" colspan="1">SSI029</td>
<td align="left" rowspan="1" colspan="1">R6</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI004</td>
<td align="left" rowspan="1" colspan="1">F1c1a</td>
<td align="left" rowspan="1" colspan="1">SSI018</td>
<td align="left" rowspan="1" colspan="1">M40</td>
<td align="left" rowspan="1" colspan="1">SSI030</td>
<td align="left" rowspan="1" colspan="1">HV12b</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI005</td>
<td align="left" rowspan="1" colspan="1">M33a+146</td>
<td align="left" rowspan="1" colspan="1">SSI019</td>
<td align="left" rowspan="1" colspan="1">M7c3c</td>
<td align="left" rowspan="1" colspan="1">SSI031</td>
<td align="left" rowspan="1" colspan="1">M3a1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI006</td>
<td align="left" rowspan="1" colspan="1">M</td>
<td align="left" rowspan="1" colspan="1">SSI020</td>
<td align="left" rowspan="1" colspan="1">M34</td>
<td align="left" rowspan="1" colspan="1">SSI032</td>
<td align="left" rowspan="1" colspan="1">U1a3</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI008</td>
<td align="left" rowspan="1" colspan="1">N8</td>
<td align="left" rowspan="1" colspan="1">SSI021</td>
<td align="left" rowspan="1" colspan="1">M6a1</td>
<td align="left" rowspan="1" colspan="1">SSI033</td>
<td align="left" rowspan="1" colspan="1">U1a3</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI009</td>
<td align="left" rowspan="1" colspan="1">M53</td>
<td align="left" rowspan="1" colspan="1">SSI022</td>
<td align="left" rowspan="1" colspan="1">M36</td>
<td align="left" rowspan="1" colspan="1">SSI034</td>
<td align="left" rowspan="1" colspan="1">R8a1a1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI010</td>
<td align="left" rowspan="1" colspan="1">M36d1</td>
<td align="left" rowspan="1" colspan="1">SSI023</td>
<td align="left" rowspan="1" colspan="1">W3a1</td>
<td align="left" rowspan="1" colspan="1">SSI035</td>
<td align="left" rowspan="1" colspan="1">R6a</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI011</td>
<td align="left" rowspan="1" colspan="1">M66</td>
<td align="left" rowspan="1" colspan="1">SSI024</td>
<td align="left" rowspan="1" colspan="1">HV12b</td>
<td align="left" rowspan="1" colspan="1">SSI036</td>
<td align="left" rowspan="1" colspan="1">M5c1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI012</td>
<td align="left" rowspan="1" colspan="1">M2a'b</td>
<td align="left" rowspan="1" colspan="1">SSI025</td>
<td align="left" rowspan="1" colspan="1">U7</td>
<td align="left" rowspan="1" colspan="1">SSI037</td>
<td align="left" rowspan="1" colspan="1">M40a</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI013</td>
<td align="left" rowspan="1" colspan="1">HV14</td>
<td align="left" rowspan="1" colspan="1">SSI026</td>
<td align="left" rowspan="1" colspan="1">M35a1</td>
<td align="left" rowspan="1" colspan="1">SSI038</td>
<td align="left" rowspan="1" colspan="1">M5a2a</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI014</td>
<td align="left" rowspan="1" colspan="1">M3a1</td>
<td align="left" rowspan="1" colspan="1">SSI027</td>
<td align="left" rowspan="1" colspan="1">M34</td>
<td align="left" rowspan="1" colspan="1">SSI039</td>
<td align="left" rowspan="1" colspan="1">M66</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI015</td>
<td align="left" rowspan="1" colspan="1">M3a2</td>
<td align="left" rowspan="1" colspan="1">SSI028</td>
<td align="left" rowspan="1" colspan="1">M35a1</td>
<td align="left" rowspan="1" colspan="1">SSI040</td>
<td align="left" rowspan="1" colspan="1">M6a1</td>
</tr>
</tbody>
</table>
</alternatives></table-wrap><table-wrap id="pgen-1004377-t003" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.t003</object-id><label>Table 3</label><caption>
<title>Chromosome Y haplogroup assignment for the 11 SSIP male samples.</title>
</caption><alternatives><graphic id="pgen-1004377-t003-3" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.t003" xlink:type="simple"/>
<table><colgroup span="1"><col align="left" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/></colgroup>
<thead>
<tr>
<td align="left" rowspan="1" colspan="1">Sample</td>
<td align="left" rowspan="1" colspan="1">Haplogroup (trunk)</td>
<td align="left" rowspan="1" colspan="1">Haplogroup (branch)</td>
<td align="left" rowspan="1" colspan="1">Haplogroup (branch)</td>
</tr>
</thead>
<tbody>
<tr>
<td align="left" rowspan="1" colspan="1">SSI004</td>
<td align="left" rowspan="1" colspan="1">J</td>
<td align="left" rowspan="1" colspan="1">J2b2</td>
<td align="left" rowspan="1" colspan="1">J2b2</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI006</td>
<td align="left" rowspan="1" colspan="1">L</td>
<td align="left" rowspan="1" colspan="1">L1</td>
<td align="left" rowspan="1" colspan="1">L1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI009</td>
<td align="left" rowspan="1" colspan="1">H</td>
<td align="left" rowspan="1" colspan="1">H1</td>
<td align="left" rowspan="1" colspan="1">H1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI012</td>
<td align="left" rowspan="1" colspan="1">L</td>
<td align="left" rowspan="1" colspan="1">L1</td>
<td align="left" rowspan="1" colspan="1">L1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI014</td>
<td align="left" rowspan="1" colspan="1">L</td>
<td align="left" rowspan="1" colspan="1">L1</td>
<td align="left" rowspan="1" colspan="1">L1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI023</td>
<td align="left" rowspan="1" colspan="1">F</td>
<td align="left" rowspan="1" colspan="1">F</td>
<td align="left" rowspan="1" colspan="1">F</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI029</td>
<td align="left" rowspan="1" colspan="1">H</td>
<td align="left" rowspan="1" colspan="1">H1</td>
<td align="left" rowspan="1" colspan="1">H1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI031</td>
<td align="left" rowspan="1" colspan="1">J</td>
<td align="left" rowspan="1" colspan="1">J2b2</td>
<td align="left" rowspan="1" colspan="1">J2b2</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI032</td>
<td align="left" rowspan="1" colspan="1">R</td>
<td align="left" rowspan="1" colspan="1">R1a1</td>
<td align="left" rowspan="1" colspan="1">R1a1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI034</td>
<td align="left" rowspan="1" colspan="1">H</td>
<td align="left" rowspan="1" colspan="1">H1</td>
<td align="left" rowspan="1" colspan="1">H1</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSI036</td>
<td align="left" rowspan="1" colspan="1">L</td>
<td align="left" rowspan="1" colspan="1">L1</td>
<td align="left" rowspan="1" colspan="1">L1</td>
</tr>
</tbody>
</table>
</alternatives></table-wrap>
<p>To investigate the correlation between the admixture analysis and the haplogroup assignments, we stratified the 36 samples according to the mitochondria haplogroups into a European dominant group (haplogroups HV, T, U and W; 8 samples) and a non-European dominant group (haplogroups F, M, N and R; 28 samples) and compared the estimated degree of European admixture between these two groups when we assumed six distinct populations in the admixture analysis (<xref ref-type="supplementary-material" rid="pgen.1004377.s024">Table S8</xref>). We assumed six distinct populations as this yielded amongst the lowest cross-validation errors and was the most parsimonious choice (<xref ref-type="supplementary-material" rid="pgen.1004377.s015">Figure S15</xref>). We observed a significant elevation of the European component (9.1% versus 6.5%, <italic>P</italic> = 0.047) in the samples identified with European-dominant mitochondrial haplogroup assignment, despite this not being evident in the PCA with autosomal SNPs (<xref ref-type="supplementary-material" rid="pgen.1004377.s012">Figure S12</xref>). We observed that the extent of European admixture in the SSIP was about 7% (at <italic>K</italic> = 6, <xref ref-type="supplementary-material" rid="pgen.1004377.s015">Figure S15</xref>), which was lower than previous studies where two reported around 50% north Indian ancestry in 17 Indian subgroups <xref ref-type="bibr" rid="pgen.1004377-Reich2">[11]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Moorjani2">[23]</xref>. However, the results between the different studies are not directly comparable due to the makeup of the European populations, since our analyses have considered five European subgroups (CEU, FIN, GBR, IBS, TSI) while the published reports have used only CEU to represent Europeans in the admixture estimation. This can be seen in our admixture analysis that assumed three distinct populations (<italic>K</italic> = 3), where the SSIP is observed to possess an average of 53% European admixture (<xref ref-type="supplementary-material" rid="pgen.1004377.s016">Figure S16</xref>), which is in fact in agreement with previous reports.</p>
</sec><sec id="s2g">
<title>Admixture with ancient genomes</title>
<p>By randomly choosing one sample from each of the 16 populations, we calculated the <italic>Dstatistic</italic> metric to investigate the relative extent of admixture of two ancient hominid genomes, a Neanderthal and a Denisovan, into modern humans. As <italic>Dstatistic</italic> required the input of four genomes, the analysis was anchored with a specific SSIP sample (SSI033 as G1) together with an ancient hominid and the chimpanzee genome, and we varied only the sample that was used in this four-sample analysis (<xref ref-type="table" rid="pgen-1004377-t004">Table 4</xref>). We observed significantly greater extent of Neanderthal genome in East Asians (CHB, JPT) relative to SSIP, but lesser in the Luhya Kenyans (LWK) than SSIP. This was consistent with the estimations by Wall and colleagues, despite the use of Gujarati Indians as their South Asian samples <xref ref-type="bibr" rid="pgen.1004377-Wall1">[24]</xref>. There was also evidence to suggest a greater degree of Denisovan admixture in northern Han Chinese (CHB) than SSIP, although this observation was not reproduced for the other two East Asian populations (CHS, JPT). To evaluate the robustness of these analyses to the choice of samples and sequence coverage, we additionally performed the <italic>Dstatistic</italic> calculation on five randomly chosen pair of samples from each population (anchored with a different SSIP sample as G1), and also performed the same analyses on the populations sequenced by Complete Genomics (to a minimum coverage of 51X). These additional analyses indicated that our original observations were robust (<xref ref-type="table" rid="pgen-1004377-t005">Table 5</xref>, <xref ref-type="supplementary-material" rid="pgen.1004377.s025">Table S9</xref>).</p>
<table-wrap id="pgen-1004377-t004" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.t004</object-id><label>Table 4</label><caption>
<title>Analysis of admixture with ancient hominid genomes, anchored with one SSIP genome (SSI033 as G1 in Dstatistic) and the chimpanzee genome.</title>
</caption><alternatives><graphic id="pgen-1004377-t004-4" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.t004" xlink:type="simple"/>
<table><colgroup span="1"><col align="left" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/></colgroup>
<thead>
<tr>
<td colspan="6" align="left" rowspan="1">Neanderthal</td>
<td colspan="6" align="left" rowspan="1">Denisovan</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">Sample (G2)</td>
<td align="left" rowspan="1" colspan="1"><italic>N<sub>BABA</sub></italic></td>
<td align="left" rowspan="1" colspan="1"><italic>N<sub>ABBA</sub></italic></td>
<td align="left" rowspan="1" colspan="1">%<italic>Dstat</italic></td>
<td align="left" rowspan="1" colspan="1">%SE</td>
<td align="left" rowspan="1" colspan="1">Z</td>
<td align="left" rowspan="1" colspan="1">Sample (G2)</td>
<td align="left" rowspan="1" colspan="1"><italic>N<sub>BABA</sub></italic></td>
<td align="left" rowspan="1" colspan="1"><italic>N<sub>ABBA</sub></italic></td>
<td align="left" rowspan="1" colspan="1">%<italic>Dstat</italic></td>
<td align="left" rowspan="1" colspan="1">%SE</td>
<td align="left" rowspan="1" colspan="1">Z</td>
</tr>
</thead>
<tbody>
<tr>
<td colspan="12" align="left" rowspan="1"><bold>Americans</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">NA19720_MXL</td>
<td align="left" rowspan="1" colspan="1">61,869</td>
<td align="left" rowspan="1" colspan="1">62,711</td>
<td align="left" rowspan="1" colspan="1">−0.676</td>
<td align="left" rowspan="1" colspan="1">0.936</td>
<td align="left" rowspan="1" colspan="1">0.543</td>
<td align="left" rowspan="1" colspan="1">NA19749_MXL</td>
<td align="left" rowspan="1" colspan="1">56,423</td>
<td align="left" rowspan="1" colspan="1">56,824</td>
<td align="left" rowspan="1" colspan="1">−0.35</td>
<td align="left" rowspan="1" colspan="1">0.74</td>
<td align="left" rowspan="1" colspan="1">1.24</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">HG01271_CLM</td>
<td align="left" rowspan="1" colspan="1">62,637</td>
<td align="left" rowspan="1" colspan="1">63,033</td>
<td align="left" rowspan="1" colspan="1">−0.315</td>
<td align="left" rowspan="1" colspan="1">0.988</td>
<td align="left" rowspan="1" colspan="1">0.505</td>
<td align="left" rowspan="1" colspan="1">HG01342_CLM</td>
<td align="left" rowspan="1" colspan="1">59,852</td>
<td align="left" rowspan="1" colspan="1">60,988</td>
<td align="left" rowspan="1" colspan="1">−0.94</td>
<td align="left" rowspan="1" colspan="1">0.71</td>
<td align="left" rowspan="1" colspan="1">1.93</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">HG01060_PUR</td>
<td align="left" rowspan="1" colspan="1">63,134</td>
<td align="left" rowspan="1" colspan="1">63,171</td>
<td align="left" rowspan="1" colspan="1">−0.029</td>
<td align="left" rowspan="1" colspan="1">0.959</td>
<td align="left" rowspan="1" colspan="1">−0.894</td>
<td align="left" rowspan="1" colspan="1">HG01191_PUR</td>
<td align="left" rowspan="1" colspan="1">56,526</td>
<td align="left" rowspan="1" colspan="1">57,331</td>
<td align="left" rowspan="1" colspan="1">−0.71</td>
<td align="left" rowspan="1" colspan="1">0.68</td>
<td align="left" rowspan="1" colspan="1">1.50</td>
</tr>
<tr>
<td colspan="12" align="left" rowspan="1"><bold>Africans</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">NA19908_ASW</td>
<td align="left" rowspan="1" colspan="1">72,284</td>
<td align="left" rowspan="1" colspan="1">69,292</td>
<td align="left" rowspan="1" colspan="1">2.113</td>
<td align="left" rowspan="1" colspan="1">0.757</td>
<td align="left" rowspan="1" colspan="1">−1.275</td>
<td align="left" rowspan="1" colspan="1">NA20299_ASW</td>
<td align="left" rowspan="1" colspan="1">58,889</td>
<td align="left" rowspan="1" colspan="1">61,285</td>
<td align="left" rowspan="1" colspan="1">−1.99</td>
<td align="left" rowspan="1" colspan="1">0.68</td>
<td align="left" rowspan="1" colspan="1">1.48</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">NA19401_LWK</td>
<td align="left" rowspan="1" colspan="1">73,400</td>
<td align="left" rowspan="1" colspan="1">70,380</td>
<td align="left" rowspan="1" colspan="1">2.100</td>
<td align="left" rowspan="1" colspan="1">0.765</td>
<td align="left" rowspan="1" colspan="1">−2.311</td>
<td align="left" rowspan="1" colspan="1">NA19327_LWK</td>
<td align="left" rowspan="1" colspan="1">65,428</td>
<td align="left" rowspan="1" colspan="1">68,000</td>
<td align="left" rowspan="1" colspan="1">−1.93</td>
<td align="left" rowspan="1" colspan="1">0.64</td>
<td align="left" rowspan="1" colspan="1">1.34</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">NA19236_YRI</td>
<td align="left" rowspan="1" colspan="1">73,774</td>
<td align="left" rowspan="1" colspan="1">69,606</td>
<td align="left" rowspan="1" colspan="1">2.907</td>
<td align="left" rowspan="1" colspan="1">0.729</td>
<td align="left" rowspan="1" colspan="1">−1.926</td>
<td align="left" rowspan="1" colspan="1">NA19160_YRI</td>
<td align="left" rowspan="1" colspan="1">66,408</td>
<td align="left" rowspan="1" colspan="1">67,672</td>
<td align="left" rowspan="1" colspan="1">−0.94</td>
<td align="left" rowspan="1" colspan="1">0.64</td>
<td align="left" rowspan="1" colspan="1">1.36</td>
</tr>
<tr>
<td colspan="12" align="left" rowspan="1"><bold>Asians</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">NA18978_JPT</td>
<td align="left" rowspan="1" colspan="1">61,398</td>
<td align="left" rowspan="1" colspan="1">62,449</td>
<td align="left" rowspan="1" colspan="1">−0.849</td>
<td align="left" rowspan="1" colspan="1">0.935</td>
<td align="left" rowspan="1" colspan="1">−2.548</td>
<td align="left" rowspan="1" colspan="1">NA19078_JPT</td>
<td align="left" rowspan="1" colspan="1">56,313</td>
<td align="left" rowspan="1" colspan="1">55,554</td>
<td align="left" rowspan="1" colspan="1">0.68</td>
<td align="left" rowspan="1" colspan="1">0.67</td>
<td align="left" rowspan="1" colspan="1">−0.75</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">NA18645_CHB</td>
<td align="left" rowspan="1" colspan="1">61,203</td>
<td align="left" rowspan="1" colspan="1">62,837</td>
<td align="left" rowspan="1" colspan="1">−1.317</td>
<td align="left" rowspan="1" colspan="1">0.945</td>
<td align="left" rowspan="1" colspan="1">−3.014</td>
<td align="left" rowspan="1" colspan="1">NA18577_CHB</td>
<td align="left" rowspan="1" colspan="1">55,720</td>
<td align="left" rowspan="1" colspan="1">56,822</td>
<td align="left" rowspan="1" colspan="1">−0.98</td>
<td align="left" rowspan="1" colspan="1">0.75</td>
<td align="left" rowspan="1" colspan="1">2.06</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">HG00500_CHS</td>
<td align="left" rowspan="1" colspan="1">61,443</td>
<td align="left" rowspan="1" colspan="1">63,314</td>
<td align="left" rowspan="1" colspan="1">−1.500</td>
<td align="left" rowspan="1" colspan="1">0.899</td>
<td align="left" rowspan="1" colspan="1">−1.894</td>
<td align="left" rowspan="1" colspan="1">HG00543_CHS</td>
<td align="left" rowspan="1" colspan="1">55,647</td>
<td align="left" rowspan="1" colspan="1">56,643</td>
<td align="left" rowspan="1" colspan="1">−0.89</td>
<td align="left" rowspan="1" colspan="1">0.74</td>
<td align="left" rowspan="1" colspan="1">1.16</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">SSM097</td>
<td align="left" rowspan="1" colspan="1">57,291</td>
<td align="left" rowspan="1" colspan="1">60,031</td>
<td align="left" rowspan="1" colspan="1">−2.335</td>
<td align="left" rowspan="1" colspan="1">0.888</td>
<td align="left" rowspan="1" colspan="1">1.815</td>
<td align="left" rowspan="1" colspan="1">SSM059</td>
<td align="left" rowspan="1" colspan="1">52,166</td>
<td align="left" rowspan="1" colspan="1">52,617</td>
<td align="left" rowspan="1" colspan="1">−0.43</td>
<td align="left" rowspan="1" colspan="1">0.71</td>
<td align="left" rowspan="1" colspan="1">1.37</td>
</tr>
<tr>
<td colspan="12" align="left" rowspan="1"><bold>Europeans</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">NA20755_TSI</td>
<td align="left" rowspan="1" colspan="1">61,990</td>
<td align="left" rowspan="1" colspan="1">63,008</td>
<td align="left" rowspan="1" colspan="1">−0.814</td>
<td align="left" rowspan="1" colspan="1">1.053</td>
<td align="left" rowspan="1" colspan="1">−1.291</td>
<td align="left" rowspan="1" colspan="1">NA20813_TSI</td>
<td align="left" rowspan="1" colspan="1">56,229</td>
<td align="left" rowspan="1" colspan="1">56,837</td>
<td align="left" rowspan="1" colspan="1">−0.54</td>
<td align="left" rowspan="1" colspan="1">0.77</td>
<td align="left" rowspan="1" colspan="1">1.74</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">NA07056_CEU</td>
<td align="left" rowspan="1" colspan="1">62,356</td>
<td align="left" rowspan="1" colspan="1">62,641</td>
<td align="left" rowspan="1" colspan="1">−0.228</td>
<td align="left" rowspan="1" colspan="1">1.001</td>
<td align="left" rowspan="1" colspan="1">−1.123</td>
<td align="left" rowspan="1" colspan="1">NA12775_CEU</td>
<td align="left" rowspan="1" colspan="1">56,260</td>
<td align="left" rowspan="1" colspan="1">55,649</td>
<td align="left" rowspan="1" colspan="1">0.55</td>
<td align="left" rowspan="1" colspan="1">0.68</td>
<td align="left" rowspan="1" colspan="1">1.30</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">HG00315_FIN</td>
<td align="left" rowspan="1" colspan="1">62,523</td>
<td align="left" rowspan="1" colspan="1">62,366</td>
<td align="left" rowspan="1" colspan="1">0.126</td>
<td align="left" rowspan="1" colspan="1">0.991</td>
<td align="left" rowspan="1" colspan="1">−1.195</td>
<td align="left" rowspan="1" colspan="1">HG00275_FIN</td>
<td align="left" rowspan="1" colspan="1">56,541</td>
<td align="left" rowspan="1" colspan="1">56,308</td>
<td align="left" rowspan="1" colspan="1">0.21</td>
<td align="left" rowspan="1" colspan="1">0.73</td>
<td align="left" rowspan="1" colspan="1">1.87</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">HG01624_IBS</td>
<td align="left" rowspan="1" colspan="1">62,210</td>
<td align="left" rowspan="1" colspan="1">62,496</td>
<td align="left" rowspan="1" colspan="1">−0.229</td>
<td align="left" rowspan="1" colspan="1">0.952</td>
<td align="left" rowspan="1" colspan="1">−0.994</td>
<td align="left" rowspan="1" colspan="1">HG01620_IBS</td>
<td align="left" rowspan="1" colspan="1">56,162</td>
<td align="left" rowspan="1" colspan="1">56,823</td>
<td align="left" rowspan="1" colspan="1">−0.59</td>
<td align="left" rowspan="1" colspan="1">0.68</td>
<td align="left" rowspan="1" colspan="1">−0.49</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">HG00261_GBR</td>
<td align="left" rowspan="1" colspan="1">62,198</td>
<td align="left" rowspan="1" colspan="1">61,291</td>
<td align="left" rowspan="1" colspan="1">0.734</td>
<td align="left" rowspan="1" colspan="1">0.879</td>
<td align="left" rowspan="1" colspan="1">−1.447</td>
<td align="left" rowspan="1" colspan="1">HG00263_GBR</td>
<td align="left" rowspan="1" colspan="1">56,652</td>
<td align="left" rowspan="1" colspan="1">55,975</td>
<td align="left" rowspan="1" colspan="1">0.60</td>
<td align="left" rowspan="1" colspan="1">0.76</td>
<td align="left" rowspan="1" colspan="1">1.78</td>
</tr>
</tbody>
</table>
</alternatives></table-wrap><table-wrap id="pgen-1004377-t005" position="float"><object-id pub-id-type="doi">10.1371/journal.pgen.1004377.t005</object-id><label>Table 5</label><caption>
<title>Dstatistic analysis with ancient genomes for 5 randomly selected paired samples from each population from 1KGP and SSMP, anchored with a different SSIP sample (G1) and the chimpanzee genome in each of the 5 iterations.</title>
</caption><alternatives><graphic id="pgen-1004377-t005-5" position="float" mimetype="image" xlink:href="info:doi/10.1371/journal.pgen.1004377.t005" xlink:type="simple"/>
<table><colgroup span="1"><col align="left" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/><col align="center" span="1"/></colgroup>
<thead>
<tr>
<td align="left" rowspan="1" colspan="1">Population</td>
<td align="left" rowspan="1" colspan="1">Neanderthal</td>
<td align="left" rowspan="1" colspan="1">Denisovan</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">G2</td>
<td align="left" rowspan="1" colspan="1">Mean %Dstatisitc</td>
<td align="left" rowspan="1" colspan="1">Standard deviation</td>
<td align="left" rowspan="1" colspan="1">Mean %Dstatisitc</td>
<td align="left" rowspan="1" colspan="1">Standard deviation</td>
</tr>
</thead>
<tbody>
<tr>
<td colspan="5" align="left" rowspan="1"><bold>Americans</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">MXL</td>
<td align="left" rowspan="1" colspan="1">−0.8287</td>
<td align="left" rowspan="1" colspan="1">0.51</td>
<td align="left" rowspan="1" colspan="1">−0.3476</td>
<td align="left" rowspan="1" colspan="1">0.38</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">CLM</td>
<td align="left" rowspan="1" colspan="1">−0.3085</td>
<td align="left" rowspan="1" colspan="1">0.16</td>
<td align="left" rowspan="1" colspan="1">−0.9630</td>
<td align="left" rowspan="1" colspan="1">0.70</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">PUR</td>
<td align="left" rowspan="1" colspan="1">−0.1668</td>
<td align="left" rowspan="1" colspan="1">0.17</td>
<td align="left" rowspan="1" colspan="1">−0.7225</td>
<td align="left" rowspan="1" colspan="1">0.55</td>
</tr>
<tr>
<td colspan="5" align="left" rowspan="1"><bold>Africans</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">ASW</td>
<td align="left" rowspan="1" colspan="1">2.1022</td>
<td align="left" rowspan="1" colspan="1">0.82</td>
<td align="left" rowspan="1" colspan="1">−1.8012</td>
<td align="left" rowspan="1" colspan="1">0.45</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">LWK</td>
<td align="left" rowspan="1" colspan="1">2.5654</td>
<td align="left" rowspan="1" colspan="1">0.29</td>
<td align="left" rowspan="1" colspan="1">−1.6356</td>
<td align="left" rowspan="1" colspan="1">0.47</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">YRI</td>
<td align="left" rowspan="1" colspan="1">2.9993</td>
<td align="left" rowspan="1" colspan="1">0.31</td>
<td align="left" rowspan="1" colspan="1">−0.9381</td>
<td align="left" rowspan="1" colspan="1">0.12</td>
</tr>
<tr>
<td colspan="5" align="left" rowspan="1"><bold>Asians</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">JPT</td>
<td align="left" rowspan="1" colspan="1">−0.8431</td>
<td align="left" rowspan="1" colspan="1">0.48</td>
<td align="left" rowspan="1" colspan="1">0.7085</td>
<td align="left" rowspan="1" colspan="1">0.67</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">CHB</td>
<td align="left" rowspan="1" colspan="1">−1.0029</td>
<td align="left" rowspan="1" colspan="1">0.69</td>
<td align="left" rowspan="1" colspan="1">−0.9975</td>
<td align="left" rowspan="1" colspan="1">0.81</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">CHS</td>
<td align="left" rowspan="1" colspan="1">−1.1559</td>
<td align="left" rowspan="1" colspan="1">0.58</td>
<td align="left" rowspan="1" colspan="1">−0.8884</td>
<td align="left" rowspan="1" colspan="1">0.80</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">MAS</td>
<td align="left" rowspan="1" colspan="1">−1.4695</td>
<td align="left" rowspan="1" colspan="1">0.67</td>
<td align="left" rowspan="1" colspan="1">−0.4289</td>
<td align="left" rowspan="1" colspan="1">0.24</td>
</tr>
<tr>
<td colspan="5" align="left" rowspan="1"><bold>Europeans</bold></td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">TSI</td>
<td align="left" rowspan="1" colspan="1">−0.7051</td>
<td align="left" rowspan="1" colspan="1">0.21</td>
<td align="left" rowspan="1" colspan="1">−0.5580</td>
<td align="left" rowspan="1" colspan="1">0.51</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">CEU</td>
<td align="left" rowspan="1" colspan="1">−0.2317</td>
<td align="left" rowspan="1" colspan="1">0.08</td>
<td align="left" rowspan="1" colspan="1">0.5491</td>
<td align="left" rowspan="1" colspan="1">0.47</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">FIN</td>
<td align="left" rowspan="1" colspan="1">−0.2016</td>
<td align="left" rowspan="1" colspan="1">0.22</td>
<td align="left" rowspan="1" colspan="1">0.2371</td>
<td align="left" rowspan="1" colspan="1">0.24</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">IBS</td>
<td align="left" rowspan="1" colspan="1">−0.2121</td>
<td align="left" rowspan="1" colspan="1">0.22</td>
<td align="left" rowspan="1" colspan="1">−0.5146</td>
<td align="left" rowspan="1" colspan="1">0.42</td>
</tr>
<tr>
<td align="left" rowspan="1" colspan="1">GBR</td>
<td align="left" rowspan="1" colspan="1">0.7361</td>
<td align="left" rowspan="1" colspan="1">0.62</td>
<td align="left" rowspan="1" colspan="1">0.6602</td>
<td align="left" rowspan="1" colspan="1">0.64</td>
</tr>
</tbody>
</table>
</alternatives></table-wrap></sec></sec><sec id="s3">
<title>Discussion</title>
<p>The Singapore Sequencing Indian Project has produced a publicly available genomic resource by sequencing the genomes of 36 South Asian Indians from Singapore at a target coverage of 30X. This complements the existing resource of 96 Southeast Asian Malays from the SSMP, as well as 1,092 samples from Phase 1 of the 1000 Genomes Project. By comparing against a panel of 132 South Asians with well-defined geographical origins, the 36 Singapore Indians were found to be genetically closer to the South Asians from south India. The assignment of Y chromosomes and mitochondria to known haplogroups identified 18% and 22% of the samples respectively to belong to haplogroups that are predominantly present in Europeans, and these memberships were not discernible in the PCA using autosomal SNPs of SSIP with other global populations, even though there were marginally higher degree of Caucasian admixture in these samples that were statistically significant.</p>
<p>The PCA of the South Asian samples revealed greater genetic heterogeneity in Asian Indians that originated from northern parts of India, while those that originated from the south were considerably more homogeneous (see <xref ref-type="fig" rid="pgen-1004377-g002">Figure 2C</xref>). In an independent analysis of HapMap Gujarati Indians with Asian Indians from the Singapore Genome Variation Project, around a third of the Gujarati Indians were found to cluster with the Singapore Indians (<xref ref-type="supplementary-material" rid="pgen.1004377.s013">Figure S13</xref>). With an original sample size of 38, the SSIP is hardly representative of the complex genomic diversity that is present in the Indian subcontinent, nor does it contain geographically well-distributed samples to yield deeper insights into the migratory history of South Asians. Many theories have been proposed on the differentiation between Indo-Aryan and Dravidian-speaking Indians, and we emphasize the SSIP is not intended to investigate the different hypotheses, although it can serve as a potential population panel when more South Asian genetics data become available. Individuals in the SSIP have been sampled from Singapore, a location which is geographically distant from India, and migration and inter-marriages likely will have confounded genetic membership to specific geographical origins in south India, and thus the SSIP is likely to be more representative of cosmopolitan Indians in Singapore.</p>
<p>While our assessment of intra-population diversity considers the extent of SNP sharing between pairs of samples in a population, the variance of this metric provides an effective measure of the genetic homogeneity of population labels. For example, the admixed Mexican population exhibited the largest spread in the pairwise distances, even though the median distance for Mexicans was in the same range as with the Caucasians (see <xref ref-type="fig" rid="pgen-1004377-g005">Figure 5</xref>). This suggests that for subjects labeled as Mexicans, there are pairs of samples where the extent of SNP sharing was as similar as East Asians, while at the same time there are pairs within the population that were significantly more diverse than between pairs of SSIP samples. The downstream implication to such variable degree of intra-population diversity is the interpretation and relevance of population labels as surrogate for genomic information in the practice of public health. An example of this is in warfarin pharmacology. Although polymorphisms in <italic>VKORC1</italic> and <italic>CYP2C9</italic> can explain up to 70% of dosage variance between populations <xref ref-type="bibr" rid="pgen.1004377-Chan1">[25]</xref>, it is still common to rely on race or ethnic labels to identify the loading dosages when initiating warfarin therapy in the absence of information from genetic screening <xref ref-type="bibr" rid="pgen.1004377-Chan1">[25]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Chan2">[26]</xref>. A population with a large variance for the intra-population diversity suggests that the adherence to self-reported population labels may serve as a poor surrogate for the underlying pharmacogenomics, which was evident in the larger range of warfarin international normalized ratio (INR) for African Americans and Caucasians, as compared to the Chinese and Japanese (see <xref ref-type="fig" rid="pgen-1004377-g001">figure 1</xref> of <xref ref-type="bibr" rid="pgen.1004377-Chan2">[26]</xref>).</p>
<p>The SSIP resource is expected to be a timely complement to Phase 3 of the 1 KGP, which has sequenced the genomes of samples from three additional migrant South Asian groups in Houston (Gujarati Indians) and the United Kingdom (Sri Lankan Tamils, Indian Telegus), as well as two native South Asian groups in Bangladesh (Bengali) and Pakistan (Punjabi). Similar to the Phase 1 design, these five groups have been sequenced at a low coverage of between 2- to 6-fold, and it can be useful to evaluate whether the availability of the SSIP data will benefit the process of variant calling for these populations that have undergone low-pass sequencing. The variants for the 36 SSIP samples are publicly available in the variant call format (VCF), and these can be accessed along with phased haplotypes for the SSIP samples at <ext-link ext-link-type="uri" xlink:href="http://www.statgen.nus.edu.sg/~SSIP" xlink:type="simple">http://www.statgen.nus.edu.sg/~SSIP</ext-link>.</p>
</sec><sec id="s4" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="s4a">
<title>Samples</title>
<p>Subjects enrolled in the SSIP consisted of 38 subjects, 12 males and 26 females, from the Multi-Ethnic Cohort (MEC) of the Singapore Population Health Study who self-reported themselves as Singapore Indians. This is a cross-sectional survey on individuals of ages between 40 and 65 years old, the exclusion criteria at the time of the initial enrolment into the MEC were: (i) below 21 years of age; (ii) having any mental condition that may interfere with the participant's competency in giving informed consent; and were suffering from any of the following conditions (iii) renal failure; (iv) stroke; (v) cancer; (vi) heart disease (including congenital conditions). We confirmed the ethnic membership of each individual via a telephone survey to verify that both sets of grandparents similarly self-reported to be South Asian Indians. Informed consent was obtained from all participants and ethical approvals were obtained for the Singapore Population Health Study and the extension to perform whole-genome sequencing from two independent Institutional Review Boards at the National University Hospital (Singapore) and the National University of Singapore respectively.</p>
</sec><sec id="s4b">
<title>Sample preparation and sequence data generation</title>
<p>The blood samples of all 38 Singapore Indians were extracted from the Singapore BioBank, and DNA extraction was performed at the Defence Medical and Enviromental Research Institute according the protocol by Illumina, with DNA quantification performed using picogreen and the SpectraMax Gemini EM microplate reader (with spectrophometic set at 480/520 nm) to ensure DNA concentration for each sample was at least 50 ng/µl. Whole genome sequencing was performed at the Illumina facility at Hayward, California, USA, using the Illumina HiSeq 2000 sequencer, where each sample was run on a unique lane to achieve a target coverage of 30-fold with 2×100 paired-end reads and a target insert size of between 300–400 bp. To ensure consistent and quality sequencing, multiple quality control procedures were adopted: (i) in preparing the libraries, the Bioanalyzer was used to ensure DNA quality and size distribution; (ii) a short paired-end sequencing reaction was applied to each sample after library preparation to ensure the extent of GC bias and the observed sequencing quality were within normal ranges; (iii) sequencing of each sample was performed on a unique lane, with the condition that at least 80% of the generated bases must attain a quality score of at least 30, failing which the sample was re-sequenced. Each sample was also genotyped on the Illumina Omni2.5 M microarray, where genotype calling was performed with the proprietary GenomeStudio software by Illumina.</p>
</sec><sec id="s4c">
<title>Read assembly and alignment</title>
<p>Assembly and alignment of each individual genome to the human reference genome (National Center for Biotechnology Information, NCBI build 37) was performed using the proprietary Illumina CASAVA version 1.9.0a1_110909 assembler. CASAVA aligned sequence reads using Eland v2e, and the aligned reads for each sample were then consolidated into the BAM format file <xref ref-type="bibr" rid="pgen.1004377-Li1">[27]</xref>.</p>
</sec><sec id="s4d">
<title>Variant discovery</title>
<p>Two methods were used to call SNPs and indels: (i) single sample calling by CASAVA; and (ii) multi sample calling by GATK version 2.1.8 <xref ref-type="bibr" rid="pgen.1004377-DePristo1">[28]</xref>, <xref ref-type="bibr" rid="pgen.1004377-McKenna1">[29]</xref> (see Supplementary <xref ref-type="sec" rid="s4">Methods</xref> for details). We assessed the performance of both methods by comparing the concordance of the genotypes called by CASAVA and GATK with the genotypes reported in the Illumina Omni2.5 M array. We reported only variants that were identified by both CASAVA and GATK, but used the genotype calls from the software that yielded a higher concordance rate. SNP annotation was performed using SNPEff version 3.1 b <xref ref-type="bibr" rid="pgen.1004377-Cingolani1">[17]</xref>, while the functional impact of the SNPs were predicted using both SIFT <xref ref-type="bibr" rid="pgen.1004377-Ng1">[30]</xref> and Polyphen <xref ref-type="bibr" rid="pgen.1004377-Adzhubei1">[31]</xref> where a non-synonymous SNP was defined as damaging if SIFT yielded a score ≤0.05 and PolyPhen-2 yielded a score ≥0.95. We defined LOF SNPs as those that were annotated by SNPEff to be nonsense mutations, splice-site mutations, or frame-shifts caused by indels <xref ref-type="bibr" rid="pgen.1004377-Abecasis1">[2]</xref>, and the Database for Annotation, DAVID <xref ref-type="bibr" rid="pgen.1004377-Huangda1">[32]</xref> was used to identify biological pathways that were enriched with LOF SNPs in our samples. In addition, we mapped the LOF variants in our samples to the COSMIC <xref ref-type="bibr" rid="pgen.1004377-Forbes1">[33]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Forbes2">[34]</xref> and previous discoveries from GWAS <xref ref-type="bibr" rid="pgen.1004377-Hindorff1">[35]</xref> with the online SNPnexus platform <xref ref-type="bibr" rid="pgen.1004377-DayemUllah1">[36]</xref> to identify any functional impact of these LOF SNPs. SVs were called with 4 methods: (i) BreakDancer v1.1._2011_02_21 <xref ref-type="bibr" rid="pgen.1004377-Chen1">[37]</xref>; (ii) VariationHunter Release_v0.3 <xref ref-type="bibr" rid="pgen.1004377-Hormozdiari1">[38]</xref>; (iii) Pindel version 0.2.2 <xref ref-type="bibr" rid="pgen.1004377-Ye1">[39]</xref>; and (iv) Delly v0.0.5 <xref ref-type="bibr" rid="pgen.1004377-Rausch1">[40]</xref>, although the focus was primarily on deletions. Large deletions of size 50 bp to 10 Mbp that were successfully called by at least one of the four algorithms were consolidated, whereby for structural deletions that were detected by multiple methods, the boundaries were obtained by considering the union of the deleted regions from these methods (see Supplementary <xref ref-type="sec" rid="s4">Methods</xref>). A SNP or indel is defined to be novel if it is not present in dbSNP 137 or the SSMP <xref ref-type="bibr" rid="pgen.1004377-Wong1">[3]</xref>, while a structural deletion is defined to be novel if there is less than 50% overlap with previously reported deletions in the SSMP, 1 KGP and DGV released 2013-07-23 <xref ref-type="bibr" rid="pgen.1004377-Iafrate1">[41]</xref>.</p>
</sec><sec id="s4e">
<title>Assessing population structure</title>
<p>PCA was used to assess the population structure of the SSIP samples with samples from worldwide populations using the <italic>pca</italic> option in the software <italic>eigenstrat</italic> <xref ref-type="bibr" rid="pgen.1004377-Price1">[42]</xref>. As part of the sample QC process, a PCA was performed with 420,817 autosomal SNPs on the 38 SSIP samples and 268 samples from the SGVP <xref ref-type="bibr" rid="pgen.1004377-Teo1">[43]</xref> to ensure that the SSIP samples clustered together with the SGVP Indian samples. The SSIP samples that remained after QC were jointly analyzed with 96 samples from the SSMP and 1,092 samples from 14 populations in Phase 1 of 1 KGP in a PCA of the 16 populations on 217,302 SNPs (<xref ref-type="supplementary-material" rid="pgen.1004377.s017">Table S1</xref>). A third PCA was performed with 202,600 SNPs on the SSIP samples with 132 South Asian samples from 25 well-defined Indian groups <xref ref-type="bibr" rid="pgen.1004377-Reich3">[44]</xref>, where the latter samples can be broadly categorized into Southern Indians and Northern Indians according to the latitude of the sampling location (<xref ref-type="supplementary-material" rid="pgen.1004377.s018">Table S2</xref>). To estimate the membership of each sample in the 16 populations to dominant population groups in the world, an admixture analysis was performed using ADMIXTURE version 1.22 program <xref ref-type="bibr" rid="pgen.1004377-Alexander1">[45]</xref> with 6,519,079 autosomal SNPs, where the number of ancestral population (<italic>K</italic>) was set to range from 2 to 16.</p>
</sec><sec id="s4f">
<title>Assessing population diversity</title>
<p>We measured the genetic diversity of SSIP, SSMP and 14 populations in Phase 1 of 1 KGP with a distance metric calculated between every possible pair of samples in each of the 16 populations, defined as <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pgen.1004377.e001" xlink:type="simple"/></inline-formula>, where min(<italic>S</italic><sub>1</sub>,<italic>S</italic><sub>2</sub>) is the minimun number of SNPs in two samples denoted as <italic>S</italic><sub>1</sub> and <italic>S</italic><sub>2</sub>, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pgen.1004377.e002" xlink:type="simple"/></inline-formula> represents the number of SNPs present in both <italic>S</italic><sub>1</sub> and <italic>S</italic><sub>2</sub>. A high <italic>D</italic> value thus reflects a lower degree of SNP sharing, or greater genetic heterogeneity, between the two samples; a low <italic>D</italic> value indicates that SNPs present in <italic>S</italic><sub>1</sub> are likely to be present in <italic>S</italic><sub>2</sub>, thus reflecting greater genetic homogeneity between the two samples. To evaluate whether the number of samples in each population influenced the comparison, we additionally performed the analysis with the same number of samples selected randomly from each population. To ensure that the results of this analysis were robust to sequencing coverage and the method used for variant calling, we also measured <italic>D</italic> for samples from six populations in 1 KGP (CEU, CHB, JPT, LWK, TSI and YRI) that were sequenced by Complete Genomics <xref ref-type="bibr" rid="pgen.1004377-Drmanac1">[46]</xref> at a higher coverage of 51-fold to 89-fold (see <xref ref-type="supplementary-material" rid="pgen.1004377.s017">Table S1</xref>). For each individual, we also measured the ratio of heterozygous genotypes to homozygous genotypes (Het/Hom) across all the autosomal SNPs. A population-level metric is calculated as the average of the Het/Hom across all the individuals in a specific population. This metric was similarly calculated with the same number of samples chosen randomly from each population, and including samples from Complete Genomics to calibrate against sequencing coverage and variant calling differences.</p>
</sec><sec id="s4g">
<title>Mitochondria and chromosome Y haplogroup assignment</title>
<p>Every SSIP individual was assigned to a unique mitochondria haplogroup on the basis of the complete sequence of the mtDNA. This was performed with haplogrep with reference to PhyloTree mtDNA tree Build 15 <xref ref-type="bibr" rid="pgen.1004377-KlossBrandsttter1">[19]</xref>, <xref ref-type="bibr" rid="pgen.1004377-Alexander1">[45]</xref> using a FASTA-based input generated with the consensus calls from GATK and <italic>mpileup</italic> in SAMTOOLS <xref ref-type="bibr" rid="pgen.1004377-Li1">[27]</xref> (see Supplementary <xref ref-type="sec" rid="s4">Methods</xref> for details). Each of the 11 male subjects in SSIP was also assigned to a unique chromosome Y haplogroup defined using a maximum likelihood approach against the 2008 chromosome Y tree with Yfitter <xref ref-type="bibr" rid="pgen.1004377-Jostins1">[47]</xref>. An input VCF file of the detected variants was generated with <italic>mpileup</italic> in SAMTOOLS, which was subsequently converted to the qcall input file used by Yfitter for haplogroup assignment. Default settings for Yfitter and haplogrep were used in our analyses.</p>
</sec><sec id="s4h">
<title>Relatedness with ancient genomes</title>
<p>We evaluated the relative degree of relatedness between one SSIP sample (SSI033, chosen randomly) and a randomly chosen sample from each of the 15 populations (SSMP, 1 KGP) with an ancient genome using the <italic>Dstatistic</italic> metric <xref ref-type="bibr" rid="pgen.1004377-Patterson1">[48]</xref>. Calculating this metric requires information at bi-allelic transversion substitutions as transition mutations are likely results of deamination of cytosine residues in ancient DNA <xref ref-type="bibr" rid="pgen.1004377-Reich4">[49]</xref> (denoted generically as alleles <italic>A</italic> and <italic>B</italic>) sites from two modern human genomes (<italic>G</italic><sub>1</sub>, <italic>G</italic><sub>2</sub>), an ancient genome (either Denisovan or Neanderthal) and the chimpanzee genome, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pgen.1004377.e003" xlink:type="simple"/></inline-formula> where <italic>N<sub>BABA</sub></italic> denote the total number of sites where <italic>G</italic><sub>1</sub> and the ancient genome carried allele <italic>B</italic> while <italic>G</italic><sub>2</sub> and the chimpanzee genome carried allele <italic>A</italic>; and <italic>N<sub>ABBA</sub></italic> denote the total number of sites where <italic>G</italic><sub>1</sub> and the chimpanzee genome carried allele <italic>A</italic> while <italic>G</italic><sub>2</sub> and the ancient genome carried allele <italic>B</italic>. At sites where the ancient genome carries different alleles to the chimpanzee genome, <italic>Dstatistic</italic> thus measures the deviation in similarity between the ancient genome and each of the two modern genomes. The genome was divided into <italic>M</italic> non-overlapping blocks of 5 Mb and a jack-knife approach was used to calculate the mean, standard error and Z score of <italic>Dstatistic</italic>. Two ancient genomes were considered: (i) the Denisovan genome, sequenced on the Illumina Genome Analyzer IIx at a coverage of 30-fold <xref ref-type="bibr" rid="pgen.1004377-Meyer2">[50]</xref>; and (ii) the Neanderthal genome, sequenced on the Illumina HiSeq at a coverage of 50-fold (<ext-link ext-link-type="uri" xlink:href="http://www.eva.mpg.de/neandertal/index.html" xlink:type="simple">http://www.eva.mpg.de/neandertal/index.html</ext-link>).</p>
<p>A full description of the <xref ref-type="sec" rid="s4">Methods</xref> can be found in the <xref ref-type="supplementary-material" rid="pgen.1004377.s026">Text S1</xref> and Supplementary <xref ref-type="sec" rid="s4">Methods</xref> at the SSIP website.</p>
</sec></sec><sec id="s5">
<title>Supporting Information</title>
<supplementary-material id="pgen.1004377.s001" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s001" position="float" xlink:type="simple"><label>Figure S1</label><caption>
<p>Mean and standard deviation of insert sizes for each sample. The vertical blue bars represent mean insert sizes while the red line shows the standard deviation of insert sizes from the paired-end sequencing reads. No outliers were found.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s002" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s002" position="float" xlink:type="simple"><label>Figure S2</label><caption>
<p>Total number of reads for each sample. Total number of reads sequenced for all samples fall within an acceptable range. Blue bars represent reads that passed QC while red bars represent reads that failed QC.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s003" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s003" position="float" xlink:type="simple"><label>Figure S3</label><caption>
<p>Total paired reads for each sample. Vertical blue bars represent reads that passed QC while red bars represent reads that failed QC. No unusual trend observed.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s004" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s004" position="float" xlink:type="simple"><label>Figure S4</label><caption>
<p>Read depth summary statistics for each sample. Sample SSI007 displayed a median read depth less than the targeted depth of 30X (red line) and is subsequently excluded from downstream analyses. 75<sup>th</sup> and 25<sup>th</sup> percentiles are represented by green line and blue line respectively.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s005" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s005" position="float" xlink:type="simple"><label>Figure S5</label><caption>
<p>Principal Component Analysis (PCA) of samples from Singapore Sequencing Indians Project (SSIP) and Singapore Genome Variation Project (SGVP). A set of 420,817 SNPs common between the 38 samples from SSIP (blue circles) and 268 samples from the SGVP, which includes 96 Chinese (red), 89 Malays (green) and 83 Indians (sky blue) were used to generate a PCA plot. The analysis revealed one sample from SSIP (SSI016) to be of closer proximity to Malays (SGVP_MAS), this sample was removed from downstream analysis.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s006" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s006" position="float" xlink:type="simple"><label>Figure S6</label><caption>
<p>Genotype concordance rate for autosomes SNPs. Comparison of genotype concordance rate between CASAVA (blue) and GATK (red) SNPs calling with reference to Omni 2.5 M array for autosomal SNPs. Chromosome number is displayed at horizontal axis. CASAVA outperformed GATK across all chromosomes.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s007" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s007" position="float" xlink:type="simple"><label>Figure S7</label><caption>
<p>Pairwise populations FST between SSIP and other 15 populations on bi-allelic SNPs of autosomal chromosomes. Blue bar is the mean pairwise population FST for SSIP and another population for common SNPs between the two populations, green bar represents mean pairwise population FST for SSIP and another population for common SNPs across entire population panel (total of 4,460,176 SNPs for original sample sizes in (A), 4,360,323 for 36 samples each population in (B)). Red circle shows number of common SNPs between a pair of populations that was used for mean FST calculation.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s008" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s008" position="float" xlink:type="simple"><label>Figure S8</label><caption>
<p>Genetic diversity measured by distance metric. Intra population diversity measured for all possible pairs of sample in each population for (A) original sample size; (B) normalized sample size by randomly selecting 36 samples from each population. IBS was removed from the analysis because its sample size was less than 36 samples. We do not observe any deviation between original samples size and normalized samples size and thus this analysis is not sensitive to sample size variation.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s009" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s009" position="float" xlink:type="simple"><label>Figure S9</label><caption>
<p>Intra population diversity for 7 populations in 1 KGP and Complete Genomics. Intra population diversity base on distance measure <italic>D</italic> for 7 populations for (A) 1 KGP (average coverage of 5X) and (B) Complete Genomics (deep coverage of 51-89X). Label at axis X show information of continent, population, sample size and total number of pair. Identical trend was observed regardless of low or deep sequencing coverage. African populations have the highest intra-population diversity score while Asian populations have the lowest, GIH (Northern Indians) and SSIP are slightly above Europeans.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s010" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s010" position="float" xlink:type="simple"><label>Figure S10</label><caption>
<p>Heterozygous to homozygous ratio. (A) Boxplot of original sample sizes heterozygous to homozygous ratio in each population. (B) We randomly selected 36 samples from each population (SSIP, SSMP and 1 KGP) to calculate single sample heterozygous to homozygous ratio. SSIP has the highest ratio than all other Asian populations, indicating SSIP is more diverse than East Asian populations (JPT, CHB, CHS) and Southeast Asian population (SSMP).</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s011" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s011" position="float" xlink:type="simple"><label>Figure S11</label><caption>
<p>Heterozygous to homozygous ratio for Complete Genomics samples. Heterozygous to homozygous ratio for samples from Complete Genomics color coded by continent, red represents Africans, green for Asians while skyblue for Europeans. Het/Hom ratio obtained in deep sequencing samples (Complete Genomics) and shallow sequencing samples (1 KGP) are within the same ranges for all compatible populations. Axis X show information of sample id and population.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s012" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s012" position="float" xlink:type="simple"><label>Figure S12</label><caption>
<p>Principal component analysis of 16 world populations. PCA of 1,224 samples from SSIP, SSMP and 14 populations from Phase 1 of the 1 KGP, where the samples are grouped and color-coded by continents (legend). Blue circles are 26 SSIP samples and the remaining 8 SSIP individuals (yellow circles) are with European dominant mitochondria haplogroup assignment.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s013" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s013" position="float" xlink:type="simple"><label>Figure S13</label><caption>
<p>Principal component analysis of 83 Indians from Singapore Genome Variation Project and 85 Gujarathi from Hapmap 3. PCA on a set of 30,927 SNPs for 83 Singapore Indians (blue) from SGVP and 85 Gujarati Indians in Houston (skyblue) from Hapmap 3.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s014" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s014" position="float" xlink:type="simple"><label>Figure S14</label><caption>
<p>Admixture analysis on 104 samples from 20 Indian subgroups <xref ref-type="bibr" rid="pgen.1004377-Reich2">[11]</xref> and 36 SSIP individuals. An analysis of admixture on the 20 Indian subgroups and SSIP with ADMIXTURE program, <italic>K</italic> is the number of distinct populations that varied between 2 and 5. Black windows highlights the position of the Chencu, Austro-Asiatic groups (Kharia and Santhal), Northern Indians and Southern Indians (see <xref ref-type="supplementary-material" rid="pgen.1004377.s018">Table S2</xref> for the categorization of the 20 Indian subgroups into Southern or Northern Indians).</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s015" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s015" position="float" xlink:type="simple"><label>Figure S15</label><caption>
<p>Cross validation error for admixture analysis with <italic>K</italic> from 2 to 15 performed on 16 populations. Cross-validation error in the admixture analysis at different values of ancestral groups (<italic>K</italic>), which we have allowed to range from 2 to 15. It was observed that while <italic>K</italic> = 7 yielded the lowest cross-validation error, <italic>K</italic> = 6 yielded a difference that was less than 0.01 and thus <italic>K</italic> = 6 was chosen for reporting in the main text on the basis of parsimony.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s016" mimetype="image/tiff" xlink:href="info:doi/10.1371/journal.pgen.1004377.s016" position="float" xlink:type="simple"><label>Figure S16</label><caption>
<p>European component in 36 SSIP individuals from admixture analysis. Bars represent average of European component in 36 SSIP samples from the admixture analysis on 6,519,079 SNPs of 16 populations in which 14 populations from 1 KGP (<xref ref-type="supplementary-material" rid="pgen.1004377.s017">Table S1</xref>), SSMP and SSIP. <italic>K</italic> indicates the number of ancestral populations. At <italic>K</italic> = 3, the three ancestral populations are African, European and Asian (<xref ref-type="fig" rid="pgen-1004377-g003">Figure 3B</xref>) where an average of 53.42% European component was found in 36 SSIP individuals. When <italic>K</italic> increased, more ancestral populations contributed to the entire admixture panel that likely to dilute the European component possessed by SSIP individuals.</p>
<p>(TIF)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s017" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s017" position="float" xlink:type="simple"><label>Table S1</label><caption>
<p>Description of populations used for comparison with SSIP.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s018" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s018" position="float" xlink:type="simple"><label>Table S2</label><caption>
<p>Description of 25 Indian groups extracted from Reich et al. 2009.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s019" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s019" position="float" xlink:type="simple"><label>Table S3</label><caption>
<p>Transition to transversion ratio (Ts/Tv). (A) Ts/Tv for bi-allelic SNPs, (B) Ts/Tv after the removal of CpG exonic transition SNPs.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s020" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s020" position="float" xlink:type="simple"><label>Table S4</label><caption>
<p>Summary of single sample Loss-of-function (LOF) variants.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s021" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s021" position="float" xlink:type="simple"><label>Table S5</label><caption>
<p>List of pathways affected by Loss-of-function (LOF) variants.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s022" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s022" position="float" xlink:type="simple"><label>Table S6</label><caption>
<p>List of Loss-of-function (LOF) variants found in COSMIC database.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s023" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s023" position="float" xlink:type="simple"><label>Table S7</label><caption>
<p>List of Loss-of-function (LOF) variants related to GWAS studies.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s024" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s024" position="float" xlink:type="simple"><label>Table S8</label><caption>
<p>The proportions for 6 ancestral populations extracted from the output of ADMIXTURE program.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s025" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s025" position="float" xlink:type="simple"><label>Table S9</label><caption>
<p>D statistic analysis with ancient genome for 5 randomly selected pairwise samples, anchored with different SSIP sample (G1). (A) Neanderthal as ancient hominid, (B) Denisovan as ancient hominid.</p>
<p>(DOC)</p>
</caption></supplementary-material><supplementary-material id="pgen.1004377.s026" mimetype="application/msword" xlink:href="info:doi/10.1371/journal.pgen.1004377.s026" position="float" xlink:type="simple"><label>Text S1</label><caption>
<p>Supplementary methods.</p>
<p>(DOC)</p>
</caption></supplementary-material></sec></body>
<back>
<ack>
<p>We thank all the subjects in this study for their participation.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pgen.1004377-Mardis1"><label>1</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Mardis</surname><given-names>ER</given-names></name> (<year>2011</year>) <article-title>A decade's perspective on DNA sequencing technology</article-title>. <source>Nature</source> <volume>470</volume>: <fpage>198</fpage>–<lpage>203</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Abecasis1"><label>2</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Abecasis</surname><given-names>GR</given-names></name>, <name name-style="western"><surname>Auton</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Brooks</surname><given-names>LD</given-names></name>, <name name-style="western"><surname>DePristo</surname><given-names>MA</given-names></name>, <name name-style="western"><surname>Durbin</surname><given-names>RM</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>An integrated map of genetic variation from 1,092 human genomes</article-title>. <source>Nature</source> <volume>491</volume>: <fpage>56</fpage>–<lpage>65</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Wong1"><label>3</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wong</surname><given-names>LP</given-names></name>, <name name-style="western"><surname>Ong</surname><given-names>RT</given-names></name>, <name name-style="western"><surname>Poh</surname><given-names>WT</given-names></name>, <name name-style="western"><surname>Liu</surname><given-names>X</given-names></name>, <name name-style="western"><surname>Chen</surname><given-names>P</given-names></name>, <etal>et al</etal>. (<year>2013</year>) <article-title>Deep whole-genome sequencing of 100 southeast Asian Malays</article-title>. <source>Am J Hum Genet</source> <volume>92</volume>: <fpage>52</fpage>–<lpage>66</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Meyer1"><label>4</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Meyer</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Kircher</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Gansauge</surname><given-names>MT</given-names></name>, <name name-style="western"><surname>Li</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Racimo</surname><given-names>F</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>A high-coverage genome sequence from an archaic Denisovan individual</article-title>. <source>Science</source> <volume>338</volume>: <fpage>222</fpage>–<lpage>226</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Green1"><label>5</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Green</surname><given-names>RE</given-names></name>, <name name-style="western"><surname>Krause</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Briggs</surname><given-names>AW</given-names></name>, <name name-style="western"><surname>Maricic</surname><given-names>T</given-names></name>, <name name-style="western"><surname>Stenzel</surname><given-names>U</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>A draft sequence of the Neandertal genome</article-title>. <source>Science</source> <volume>328</volume>: <fpage>710</fpage>–<lpage>722</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Reich1"><label>6</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Reich</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Green</surname><given-names>RE</given-names></name>, <name name-style="western"><surname>Kircher</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Krause</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Patterson</surname><given-names>N</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>Genetic history of an archaic hominin group from Denisova Cave in Siberia</article-title>. <source>Nature</source> <volume>468</volume>: <fpage>1053</fpage>–<lpage>1060</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Keller1"><label>7</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Keller</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Graefen</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Ball</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Matzas</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Boisguerin</surname><given-names>V</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>New insights into the Tyrolean Iceman's origin and phenotype as inferred by whole-genome sequencing</article-title>. <source>Nat Commun</source> <volume>3</volume>: <fpage>698</fpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-DESA1"><label>8</label>
<mixed-citation publication-type="other" xlink:type="simple">DESA (2009) World population prospects: the 2008 revision. New York: Department for Economic and Social Affairs.</mixed-citation>
</ref>
<ref id="pgen.1004377-Macaulay1"><label>9</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Macaulay</surname><given-names>V</given-names></name>, <name name-style="western"><surname>Hill</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Achilli</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Rengo</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Clarke</surname><given-names>D</given-names></name>, <etal>et al</etal>. (<year>2005</year>) <article-title>Single, rapid coastal settlement of Asia revealed by analysis of complete mitochondrial genomes</article-title>. <source>Science</source> <volume>308</volume>: <fpage>1034</fpage>–<lpage>1036</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Thangaraj1"><label>10</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Thangaraj</surname><given-names>K</given-names></name>, <name name-style="western"><surname>Chaubey</surname><given-names>G</given-names></name>, <name name-style="western"><surname>Kivisild</surname><given-names>T</given-names></name>, <name name-style="western"><surname>Reddy</surname><given-names>AG</given-names></name>, <name name-style="western"><surname>Singh</surname><given-names>VK</given-names></name>, <etal>et al</etal>. (<year>2005</year>) <article-title>Reconstructing the origin of Andaman Islanders</article-title>. <source>Science</source> <volume>308</volume>: <fpage>996</fpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Reich2"><label>11</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Reich</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Thangaraj</surname><given-names>K</given-names></name>, <name name-style="western"><surname>Patterson</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Price</surname><given-names>AL</given-names></name>, <name name-style="western"><surname>Singh</surname><given-names>L</given-names></name> (<year>2009</year>) <article-title>Reconstructing Indian population history</article-title>. <source>Nature</source> <volume>461</volume>: <fpage>489</fpage>–<lpage>494</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Moorjani1"><label>12</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Moorjani</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Thangaraj</surname><given-names>K</given-names></name>, <name name-style="western"><surname>Patterson</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Lipson</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Loh</surname><given-names>PR</given-names></name>, <etal>et al</etal>. (<year>2013</year>) <article-title>Genetic evidence for recent population mixture in India</article-title>. <source>Am J Hum Genet</source> <volume>93</volume>: <fpage>422</fpage>–<lpage>438</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Singstat1"><label>13</label>
<mixed-citation publication-type="other" xlink:type="simple">Singstat (2012) Population Trends 2012. Singapore: Department of Statistics.</mixed-citation>
</ref>
<ref id="pgen.1004377-Leow1"><label>14</label>
<mixed-citation publication-type="other" xlink:type="simple">Leow BG, Singapore. Dept. of Statistics. (2001) Census of population 2000. Demographic characteristics. Singapore: Dept. of Statistics. 142 pp.</mixed-citation>
</ref>
<ref id="pgen.1004377-Periasamy1"><label>15</label>
<mixed-citation publication-type="other" xlink:type="simple">Periasamy M (2007) Indian Migration into Malaya and Singapore During the British Period. Singapore: Biblioasia.</mixed-citation>
</ref>
<ref id="pgen.1004377-Turnbull1"><label>16</label>
<mixed-citation publication-type="other" xlink:type="simple">Turnbull CM (2009) A History of Modern Singapore. Singapore: National University of Singapore Press.</mixed-citation>
</ref>
<ref id="pgen.1004377-Cingolani1"><label>17</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cingolani</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Platts</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Wang</surname><given-names>LL</given-names></name>, <name name-style="western"><surname>Coon</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Nguyen</surname><given-names>T</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3;</article-title>. <source>Fly</source> <volume>6</volume>: <fpage>80</fpage>–<lpage>92</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-MacArthur1"><label>18</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>MacArthur</surname><given-names>DG</given-names></name>, <name name-style="western"><surname>Balasubramanian</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Frankish</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Huang</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Morris</surname><given-names>J</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>A systematic survey of loss-of-function variants in human protein-coding genes</article-title>. <source>Science</source> <volume>335</volume>: <fpage>823</fpage>–<lpage>828</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-KlossBrandsttter1"><label>19</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kloss-Brandstätter</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Pacher</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Schönherr</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Weissensteiner</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Binna</surname><given-names>R</given-names></name>, <etal>et al</etal>. (<year>2011</year>) <article-title>HaploGrep: a fast and reliable algorithm for automatic classification of mitochondrial DNA haplogroups</article-title>. <source>Human Mutation</source> <volume>32</volume>: <fpage>25</fpage>–<lpage>32</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Rajkumar1"><label>20</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Rajkumar</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Banerjee</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Gunturi</surname><given-names>HB</given-names></name>, <name name-style="western"><surname>Trivedi</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Kashyap</surname><given-names>VK</given-names></name> (<year>2005</year>) <article-title>Phylogeny and antiquity of M macrohaplogroup inferred from complete mt DNA sequence of Indian specific lineages</article-title>. <source>BMC Evol Biol</source> <volume>5</volume>: <fpage>26</fpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-vanOven1"><label>21</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>van Oven</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Kayser</surname><given-names>M</given-names></name> (<year>2009</year>) <article-title>Updated comprehensive phylogenetic tree of global human mitochondrial DNA variation</article-title>. <source>Hum Mutat</source> <volume>30</volume>: <fpage>E386</fpage>–<lpage>394</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Chandrasekar1"><label>22</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chandrasekar</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Kumar</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Sreenath</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Sarkar</surname><given-names>BN</given-names></name>, <name name-style="western"><surname>Urade</surname><given-names>BP</given-names></name>, <etal>et al</etal>. (<year>2009</year>) <article-title>Updating phylogeny of mitochondrial DNA macrohaplogroup m in India: dispersal of modern human in South Asian corridor</article-title>. <source>PLoS One</source> <volume>4</volume>: <fpage>e7447</fpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Moorjani2"><label>23</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Moorjani</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Thangaraj</surname><given-names>K</given-names></name>, <name name-style="western"><surname>Patterson</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Lipson</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Loh</surname><given-names>P-R</given-names></name>, <etal>et al</etal>. (<year>2013</year>) <article-title>Genetic Evidence for Recent Population Mixture in India</article-title>. <source>American journal of human genetics</source> <volume>93</volume>: <fpage>422</fpage>–<lpage>438</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Wall1"><label>24</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wall</surname><given-names>JD</given-names></name>, <name name-style="western"><surname>Yang</surname><given-names>MA</given-names></name>, <name name-style="western"><surname>Jay</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Kim</surname><given-names>SK</given-names></name>, <name name-style="western"><surname>Durand</surname><given-names>EY</given-names></name>, <etal>et al</etal>. (<year>2013</year>) <article-title>Higher Levels of Neanderthal Ancestry in East Asians than in Europeans</article-title>. <source>Genetics</source> <volume>194</volume>: <fpage>199</fpage>–<lpage>209</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Chan1"><label>25</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chan</surname><given-names>SL</given-names></name>, <name name-style="western"><surname>Suo</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Lee</surname><given-names>SC</given-names></name>, <name name-style="western"><surname>Goh</surname><given-names>BC</given-names></name>, <name name-style="western"><surname>Chia</surname><given-names>KS</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>Translational aspects of genetic factors in the prediction of drug response variability: a case study of warfarin pharmacogenomics in a multi-ethnic cohort from Asia</article-title>. <source>Pharmacogenomics J</source> <volume>12</volume>: <fpage>312</fpage>–<lpage>318</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Chan2"><label>26</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chan</surname><given-names>SL</given-names></name>, <name name-style="western"><surname>Suo</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Chia</surname><given-names>KS</given-names></name>, <name name-style="western"><surname>Teo</surname><given-names>YY</given-names></name> (<year>2012</year>) <article-title>The population attributable fraction as a measure of the impact of warfarin pharmacogenetic testing</article-title>. <source>Pharmacogenomics</source> <volume>13</volume>: <fpage>1247</fpage>–<lpage>1256</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Li1"><label>27</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Li</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Handsaker</surname><given-names>B</given-names></name>, <name name-style="western"><surname>Wysoker</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Fennell</surname><given-names>T</given-names></name>, <name name-style="western"><surname>Ruan</surname><given-names>J</given-names></name>, <etal>et al</etal>. (<year>2009</year>) <article-title>The Sequence Alignment/Map format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume>: <fpage>2078</fpage>–<lpage>2079</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-DePristo1"><label>28</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>DePristo</surname><given-names>MA</given-names></name>, <name name-style="western"><surname>Banks</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Poplin</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Garimella</surname><given-names>KV</given-names></name>, <name name-style="western"><surname>Maguire</surname><given-names>JR</given-names></name>, <etal>et al</etal>. (<year>2011</year>) <article-title>A framework for variation discovery and genotyping using next-generation DNA sequencing data</article-title>. <source>Nat Genet</source> <volume>43</volume>: <fpage>491</fpage>–<lpage>498</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-McKenna1"><label>29</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>McKenna</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Hanna</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Banks</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Sivachenko</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Cibulskis</surname><given-names>K</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data</article-title>. <source>Genome Research</source> <volume>20</volume>: <fpage>1297</fpage>–<lpage>1303</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Ng1"><label>30</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ng</surname><given-names>PC</given-names></name>, <name name-style="western"><surname>Henikoff</surname><given-names>S</given-names></name> (<year>2003</year>) <article-title>SIFT: predicting amino acid changes that affect protein function</article-title>. <source>Nucleic Acids Research</source> <volume>31</volume>: <fpage>3812</fpage>–<lpage>3814</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Adzhubei1"><label>31</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Adzhubei</surname><given-names>I</given-names></name>, <name name-style="western"><surname>Schmidt</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Peshkin</surname><given-names>L</given-names></name>, <name name-style="western"><surname>Ramensky</surname><given-names>V</given-names></name>, <name name-style="western"><surname>Gerasimova</surname><given-names>A</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>A method and server for predicting damaging missense mutations</article-title>. <source>Nature methods</source> <volume>7</volume>: <fpage>248</fpage>–<lpage>249</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Huangda1"><label>32</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huang da</surname><given-names>W</given-names></name>, <name name-style="western"><surname>Sherman</surname><given-names>BT</given-names></name>, <name name-style="western"><surname>Lempicki</surname><given-names>RA</given-names></name> (<year>2009</year>) <article-title>Bioinformatics enrichment tools: paths toward the comprehensive functional analysis of large gene lists</article-title>. <source>Nucleic Acids Res</source> <volume>37</volume>: <fpage>1</fpage>–<lpage>13</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Forbes1"><label>33</label>
<mixed-citation publication-type="other" xlink:type="simple">Forbes SA, Bhamra G, Bamford S, Dawson E, Kok C, <etal>et al</etal>.. (2008) The Catalogue of Somatic Mutations in Cancer (COSMIC). Curr Protoc Hum Genet Chapter 10: Unit 10 11.</mixed-citation>
</ref>
<ref id="pgen.1004377-Forbes2"><label>34</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Forbes</surname><given-names>SA</given-names></name>, <name name-style="western"><surname>Bindal</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Bamford</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Cole</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Kok</surname><given-names>CY</given-names></name>, <etal>et al</etal>. (<year>2011</year>) <article-title>COSMIC: mining complete cancer genomes in the Catalogue of Somatic Mutations in Cancer</article-title>. <source>Nucleic Acids Res</source> <volume>39</volume>: <fpage>D945</fpage>–<lpage>950</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Hindorff1"><label>35</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hindorff</surname><given-names>LA</given-names></name>, <name name-style="western"><surname>Sethupathy</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Junkins</surname><given-names>HA</given-names></name>, <name name-style="western"><surname>Ramos</surname><given-names>EM</given-names></name>, <name name-style="western"><surname>Mehta</surname><given-names>JP</given-names></name>, <etal>et al</etal>. (<year>2009</year>) <article-title>Potential etiologic and functional implications of genome-wide association loci for human diseases and traits</article-title>. <source>Proc Natl Acad Sci U S A</source> <volume>106</volume>: <fpage>9362</fpage>–<lpage>9367</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-DayemUllah1"><label>36</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Dayem Ullah</surname><given-names>AZ</given-names></name>, <name name-style="western"><surname>Lemoine</surname><given-names>NR</given-names></name>, <name name-style="western"><surname>Chelala</surname><given-names>C</given-names></name> (<year>2012</year>) <article-title>SNPnexus: a web server for functional annotation of novel and publicly known genetic variants (2012 update)</article-title>. <source>Nucleic Acids Res</source> <volume>40</volume>: <fpage>W65</fpage>–<lpage>70</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Chen1"><label>37</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chen</surname><given-names>K</given-names></name>, <name name-style="western"><surname>Wallis</surname><given-names>JW</given-names></name>, <name name-style="western"><surname>McLellan</surname><given-names>MD</given-names></name>, <name name-style="western"><surname>Larson</surname><given-names>DE</given-names></name>, <name name-style="western"><surname>Kalicki</surname><given-names>JM</given-names></name>, <etal>et al</etal>. (<year>2009</year>) <article-title>BreakDancer: an algorithm for high-resolution mapping of genomic structural variation</article-title>. <source>Nat Meth</source> <volume>6</volume>: <fpage>677</fpage>–<lpage>681</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Hormozdiari1"><label>38</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hormozdiari</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Hajirasouliha</surname><given-names>I</given-names></name>, <name name-style="western"><surname>Dao</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Hach</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Yorukoglu</surname><given-names>D</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>Next-generation VariationHunter: combinatorial algorithms for transposon insertion discovery</article-title>. <source>Bioinformatics</source> <volume>26</volume>: <fpage>i350</fpage>–<lpage>i357</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Ye1"><label>39</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ye</surname><given-names>K</given-names></name>, <name name-style="western"><surname>Schulz</surname><given-names>MH</given-names></name>, <name name-style="western"><surname>Long</surname><given-names>Q</given-names></name>, <name name-style="western"><surname>Apweiler</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Ning</surname><given-names>Z</given-names></name> (<year>2009</year>) <article-title>Pindel: a pattern growth approach to detect break points of large deletions and medium sized insertions from paired-end short reads</article-title>. <source>Bioinformatics</source> <volume>25</volume>: <fpage>2865</fpage>–<lpage>2871</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Rausch1"><label>40</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Rausch</surname><given-names>T</given-names></name>, <name name-style="western"><surname>Zichner</surname><given-names>T</given-names></name>, <name name-style="western"><surname>Schlattl</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Stütz</surname><given-names>AM</given-names></name>, <name name-style="western"><surname>Benes</surname><given-names>V</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>DELLY: structural variant discovery by integrated paired-end and split-read analysis</article-title>. <source>Bioinformatics</source> <volume>28</volume>: <fpage>i333</fpage>–<lpage>i339</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Iafrate1"><label>41</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Iafrate</surname><given-names>AJ</given-names></name>, <name name-style="western"><surname>Feuk</surname><given-names>L</given-names></name>, <name name-style="western"><surname>Rivera</surname><given-names>MN</given-names></name>, <name name-style="western"><surname>Listewnik</surname><given-names>ML</given-names></name>, <name name-style="western"><surname>Donahoe</surname><given-names>PK</given-names></name>, <etal>et al</etal>. (<year>2004</year>) <article-title>Detection of large-scale variation in the human genome</article-title>. <source>Nat Genet</source> <volume>36</volume>: <fpage>949</fpage>–<lpage>951</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Price1"><label>42</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Price</surname><given-names>AL</given-names></name>, <name name-style="western"><surname>Patterson</surname><given-names>NJ</given-names></name>, <name name-style="western"><surname>Plenge</surname><given-names>RM</given-names></name>, <name name-style="western"><surname>Weinblatt</surname><given-names>ME</given-names></name>, <name name-style="western"><surname>Shadick</surname><given-names>NA</given-names></name>, <etal>et al</etal>. (<year>2006</year>) <article-title>Principal components analysis corrects for stratification in genome-wide association studies</article-title>. <source>Nat Genet</source> <volume>38</volume>: <fpage>904</fpage>–<lpage>909</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Teo1"><label>43</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Teo</surname><given-names>YY</given-names></name>, <name name-style="western"><surname>Sim</surname><given-names>X</given-names></name>, <name name-style="western"><surname>Ong</surname><given-names>RT</given-names></name>, <name name-style="western"><surname>Tan</surname><given-names>AK</given-names></name>, <name name-style="western"><surname>Chen</surname><given-names>J</given-names></name>, <etal>et al</etal>. (<year>2009</year>) <article-title>Singapore Genome Variation Project: a haplotype map of three Southeast Asian populations</article-title>. <source>Genome Res</source> <volume>19</volume>: <fpage>2154</fpage>–<lpage>2162</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Reich3"><label>44</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Reich</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Thangaraj</surname><given-names>K</given-names></name>, <name name-style="western"><surname>Patterson</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Price</surname><given-names>AL</given-names></name>, <name name-style="western"><surname>Singh</surname><given-names>L</given-names></name> (<year>2009</year>) <article-title>Reconstructing Indian population history</article-title>. <source>Nature</source> <volume>461</volume>: <fpage>489</fpage>–<lpage>494</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Alexander1"><label>45</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Alexander</surname><given-names>DH</given-names></name>, <name name-style="western"><surname>Novembre</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Lange</surname><given-names>K</given-names></name> (<year>2009</year>) <article-title>Fast model-based estimation of ancestry in unrelated individuals</article-title>. <source>Genome Research</source> <volume>19</volume>: <fpage>1655</fpage>–<lpage>1664</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Drmanac1"><label>46</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Drmanac</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Sparks</surname><given-names>AB</given-names></name>, <name name-style="western"><surname>Callow</surname><given-names>MJ</given-names></name>, <name name-style="western"><surname>Halpern</surname><given-names>AL</given-names></name>, <name name-style="western"><surname>Burns</surname><given-names>NL</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>Human Genome Sequencing Using Unchained Base Reads on Self-Assembling DNA Nanoarrays</article-title>. <source>Science</source> <volume>327</volume>: <fpage>78</fpage>–<lpage>81</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Jostins1"><label>47</label>
<mixed-citation publication-type="other" xlink:type="simple">Jostins L (2011) YFitter: a program for assigning haplogroups using maximum likelihood.</mixed-citation>
</ref>
<ref id="pgen.1004377-Patterson1"><label>48</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Patterson</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Moorjani</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Luo</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Mallick</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Rohland</surname><given-names>N</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>Ancient Admixture in Human History</article-title>. <source>Genetics</source> <volume>192</volume>: <fpage>1065</fpage>–<lpage>1093</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Reich4"><label>49</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Reich</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Green</surname><given-names>RE</given-names></name>, <name name-style="western"><surname>Kircher</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Krause</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Patterson</surname><given-names>N</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>Genetic history of an archaic hominin group from Denisova Cave in Siberia</article-title>. <source>Nature</source> <volume>468</volume>: <fpage>1053</fpage>–<lpage>1060</lpage>.</mixed-citation>
</ref>
<ref id="pgen.1004377-Meyer2"><label>50</label>
<mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Meyer</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Kircher</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Gansauge</surname><given-names>M-T</given-names></name>, <name name-style="western"><surname>Li</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Racimo</surname><given-names>F</given-names></name>, <etal>et al</etal>. (<year>2012</year>) <article-title>A High-Coverage Genome Sequence from an Archaic Denisovan Individual</article-title>. <source>Science</source> <volume>338</volume>: <fpage>222</fpage>–<lpage>226</lpage>.</mixed-citation>
</ref>
</ref-list></back>
</article>