<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD Journal Publishing DTD v3.0 20080202//EN" "http://dtd.nlm.nih.gov/publishing/3.0/journalpublishing3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="3.0" xml:lang="EN"><front><journal-meta><journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id><journal-id journal-id-type="publisher-id">plos</journal-id><journal-id journal-id-type="pmc">plosone</journal-id><!--===== Grouping journal title elements =====--><journal-title-group><journal-title>PLoS ONE</journal-title></journal-title-group><issn pub-type="epub">1932-6203</issn><publisher><publisher-name>Public Library of Science</publisher-name><publisher-loc>San Francisco, USA</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">07-PONE-RA-02317R1</article-id><article-id pub-id-type="doi">10.1371/journal.pone.0001172</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="Discipline"><subject>Biotechnology/Plant Biotechnology</subject><subject>Genetics and Genomics/Plant Genetics and Gene Expression</subject><subject>Molecular Biology/Bioinformatics</subject><subject>Plant Biology/Agricultural Biotechnology</subject></subj-group></article-categories><title-group><article-title>Complexity Reduction of Polymorphic Sequences (CRoPS™): A Novel Approach for Large-Scale Polymorphism Discovery in Complex Genomes</article-title><alt-title alt-title-type="running-head">SNP Discovery Using CRoPS™</alt-title></title-group><contrib-group><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>van Orsouw</surname><given-names>Nathalie J.</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="corresp" rid="cor1"><sup>*</sup></xref></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Hogers</surname><given-names>René C. J.</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Janssen</surname><given-names>Antoine</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Yalcin</surname><given-names>Feyruz</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Snoeijers</surname><given-names>Sandor</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Verstege</surname><given-names>Esther</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Schneiders</surname><given-names>Harrie</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>van der Poel</surname><given-names>Hein</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>van Oeveren</surname><given-names>Jan</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>Verstegen</surname><given-names>Harold</given-names></name><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" xlink:type="simple"><name name-style="western"><surname>van Eijk</surname><given-names>Michiel J. T.</given-names></name><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1">          <addr-line>Keygene NV, Wageningen, The Netherlands</addr-line>       </aff><contrib-group><contrib contrib-type="editor" xlink:type="simple"><name name-style="western"><surname>Baxter</surname><given-names>Ivan</given-names></name><role>Academic Editor</role><xref ref-type="aff" rid="edit1"/></contrib></contrib-group><aff id="edit1">Purdue University, United States of America</aff><author-notes><corresp id="cor1">* To whom correspondence should be addressed. E-mail: <email xlink:type="simple">nathalie.van-orsouw@keygene.com</email></corresp><fn fn-type="con"><p>Conceived and designed the experiments: Nv RH Mv. Performed the experiments: SS EV HS Hv. Analyzed the data: AJ FY Jv. Contributed reagents/materials/analysis tools: AJ FY Jv HV. Wrote the paper: Nv Mv.</p></fn><fn fn-type="conflict"><p>Paid employment: All authors and co-authors with the exception of S. Snoeijers are employees of Keygene N.V. S. Snoeijers was a Keygene N.V. employee during the execution of this project but he has left Keygene N.V. on December 31, 2006. Patent application: The CRoPS technology is subject to patent applications owned by Keygene N.V. See also acknowledgements.</p></fn></author-notes><pub-date pub-type="collection"><year>2007</year></pub-date><pub-date pub-type="epub"><day>14</day><month>11</month><year>2007</year></pub-date><volume>2</volume><issue>11</issue><elocation-id>e1172</elocation-id><history><date date-type="received"><day>25</day><month>9</month><year>2007</year></date><date date-type="accepted"><day>25</day><month>10</month><year>2007</year></date></history><!--===== Grouping copyright info into permissions =====--><permissions><copyright-year>2007</copyright-year><copyright-holder>van Orsouw et al</copyright-holder><license><license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p></license></permissions><abstract><p>Application of single nucleotide polymorphisms (SNPs) is revolutionizing human bio-medical research. However, discovery of polymorphisms in low polymorphic species is still a challenging and costly endeavor, despite widespread availability of Sanger sequencing technology. We present CRoPS™ as a novel approach for polymorphism discovery by combining the power of reproducible genome complexity reduction of AFLP® with Genome Sequencer (GS) 20/GS FLX next-generation sequencing technology. With CRoPS, hundreds-of-thousands of sequence reads derived from complexity-reduced genome sequences of two or more samples are processed and mined for SNPs using a fully-automated bioinformatics pipeline. We show that over 75% of putative maize SNPs discovered using CRoPS are successfully converted to SNPWave® assays, confirming them to be true SNPs derived from unique (single-copy) genome sequences. By using CRoPS, polymorphism discovery will become affordable in organisms with high levels of repetitive DNA in the genome and/or low levels of polymorphism in the (breeding) germplasm without the need for prior sequence information.</p></abstract><funding-group><funding-statement>This work was funded by Keygene N.V.</funding-statement></funding-group><counts><page-count count="10"/></counts></article-meta></front><body><sec id="s1"><title>Introduction</title><p>SNP discovery is an important area of molecular genetics research aimed at collecting sufficient exploitable sequence polymorphisms to enable high-resolution, high-throughput genotyping at lower costs in the future. However, for many crop species the efficiency of the SNP discovery process is often hampered by the fact that limited amounts of genome sequences are available compared to e.g. <italic>Arabidopsis</italic> and rice, for which draft genome sequences have been completed <xref ref-type="bibr" rid="pone.0001172-The1">[1]</xref>, <xref ref-type="bibr" rid="pone.0001172-International1">[2]</xref>. Furthermore, the occurrence of (highly) duplicated genome sequences in crops such as maize <xref ref-type="bibr" rid="pone.0001172-SanMiguel1">[3]</xref>, wheat <xref ref-type="bibr" rid="pone.0001172-Li1">[4]</xref>, soybean <xref ref-type="bibr" rid="pone.0001172-Swaminathan1">[5]</xref> and pepper <xref ref-type="bibr" rid="pone.0001172-An1">[6]</xref> impedes conversion of identified polymorphisms into genotyping assays for application in breeding. As a result, available high-throughput SNP genotyping technologies <xref ref-type="bibr" rid="pone.0001172-Oliphant1">[7]</xref>–<xref ref-type="bibr" rid="pone.0001172-Matsuzaki1">[10]</xref> can not be fully exploited in plant breeding at present due to lack of suitable “content”. This is unlike the situation in humans where several millions of SNPs are known and being utilized in population genetic analysis <xref ref-type="bibr" rid="pone.0001172-The2">[11]</xref> and medical diagnostics <xref ref-type="bibr" rid="pone.0001172-Thomas1">[12]</xref>. Hence, there is a need for efficient polymorphism discovery technologies which target unique genome regions in organisms lacking extensive genome sequence information.</p><p>The maize (<italic>Zea mays</italic>) genome comprises 2300 to 2700 Mb <xref ref-type="bibr" rid="pone.0001172-Arumuganathan1">[13]</xref>. Approximately 80% of the total nuclear genome of maize consists of highly repetitive sequences interspersed with single-copy, gene-rich regions. The majority of the repeats are classified as long terminal repeat (LTR)-retrotransposon families that vary in copy number <xref ref-type="bibr" rid="pone.0001172-Meyers1">[14]</xref>. As a consequence of these genome characteristics, SNP discovery in maize is not straightforward since it is not always obvious how to distinguish a true SNP from sequence differences between duplicated sequences occurring within the genome. Various techniques have been employed to enrich for single-copy sequences in maize, such as High C<sub>o</sub><italic>t</italic> selection <xref ref-type="bibr" rid="pone.0001172-Peterson1">[15]</xref>, methylation filtering <xref ref-type="bibr" rid="pone.0001172-Whitelaw1">[16]</xref> and hypomethylated partial restriction (HMPR) <xref ref-type="bibr" rid="pone.0001172-Emberton1">[17]</xref>. HMPR utilizes methylation-sensitive restriction enzymes, thereby relying on the observation that in maize genes often remain unmethylated, whereas most LTR retrotransposons are methylated <xref ref-type="bibr" rid="pone.0001172-Bennetzen1">[18]</xref>, <xref ref-type="bibr" rid="pone.0001172-Rabinowicz1">[19]</xref>. Especially HMPR has been shown to be exceptional in depleting retrotransposons to less than 5% <xref ref-type="bibr" rid="pone.0001172-Emberton1">[17]</xref> of the original content. However, despite the fact that these methods enrich for low-copy sequences, for economical reasons further genome complexity reduction is required to engage in comparative sequencing.</p><p>The AFLP® technology <xref ref-type="bibr" rid="pone.0001172-Vos1">[20]</xref>–<xref ref-type="bibr" rid="pone.0001172-Vuylsteke1">[22]</xref> is a powerful DNA fingerprinting technology which has found widespread application in many organisms of diverse origin, including plants, animals, micro-organisms and human. AFLP is based on the selective PCR amplification of restriction fragments from a digest of whole genomic DNA. Its main features are that no prior sequence information is needed and multiplexing levels can be controlled by the choice (and number) of restriction endonucleases and by varying the number of selective bases of the primers used in the amplification process. Besides its many applications as genetic marker technology <xref ref-type="bibr" rid="pone.0001172-Vuylsteke1">[22]</xref>, AFLP is therefore also a robust and scalable method for genome complexity reduction. This feature of the AFLP technology can be exploited to expedite polymorphism discovery by generating in parallel highly similar genome representations of multiple accessions of crop species for high-throughput sequencing.</p><p>Here we describe the CRoPS™ technology (acronym for <underline>C</underline>omplexity <underline>R</underline>eduction <underline>o</underline>f <underline>P</underline>olymorphic <underline>S</underline>equences) and its application in maize. With CRoPS, tagged complexity-reduced libraries of two or more genetically diverse samples are prepared by AFLP, preferably using a methylation-sensitive restriction enzyme. Next, AFLP fragment libraries are sequenced at 5 to10-fold average redundancy in microfabricated, high-density picoliter reactions using the GS system <xref ref-type="bibr" rid="pone.0001172-Margulies1">[23]</xref>. Resulting sequences are clustered and aligned, and the alignments are mined for SNPs using custom-developed bio-informatics tools. Rigorous quality measures are applied to separate PCR amplification and/or sequence errors from true polymorphisms. The fact that CRoPS is AFLP-based enables its application in many organisms, irrespective of genome complexity and size. The use of homozygous lines in the CRoPS process enables selection of SNPs which are located in low- or single copy genome sequences and therefore have a high conversion rate to genotyping assays for medium to large-scale genotyping.</p><p>The CRoPS technology has been applied for polymorphism discovery between the maize lines B73 and Mo17, using AFLP enzyme combination <italic>Hpa</italic>II/<italic>Mse</italic>I. Using a fully automated bioinformatics pipeline we mined more than 1200 high quality putative SNPs and show that 23 out of 30 SNPs were successfully converted into SNPWave assays <xref ref-type="bibr" rid="pone.0001172-VanEijk1">[24]</xref>. We propose CRoPS as a generic approach to significantly enhance polymorphism discovery in vegetable and field crops.</p></sec><sec id="s2"><title>Results</title><sec id="s2a"><title>GS 20 sequence analysis</title><p>After completion of one single GS 20 run, a first bioinformatics analysis was performed using the GS 20 software (i.e. “on-rig” software). A total of 754,199 reads (“totalRawWells”) were obtained. The number of reads after the first filtering for Key sequences (“totalKeyPass”) was 739,042. Of these, 399,252 GS 20 raw sequencing reads remained after the final filtering by the GS 20 software. This number of sequence reads is higher than the specifications of the GS 20 but in line with other runs we performed earlier (data not shown) as well as results reported by others <xref ref-type="bibr" rid="pone.0001172-Weber1">[25]</xref>. Their average read length was 103 nt (<xref ref-type="table" rid="pone-0001172-t001">Table 1</xref>).</p><table-wrap id="pone-0001172-t001" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0001172.t001</object-id><label>Table 1</label><caption><title>Overview of results of one GS 20 CRoPS run in maize</title></caption><!--===== Grouping alternate versions of objects =====--><alternatives><graphic id="pone-0001172-t001-1" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0001172.t001" xlink:type="simple"/><table><colgroup span="1"><col align="left" span="1"/><col align="center" span="1"/><col align="center" span="1"/></colgroup><tbody><tr><td align="left" colspan="1" rowspan="1">Parameters</td><td align="left" colspan="1" rowspan="1">Enzyme combination</td><td align="left" colspan="1" rowspan="1"><italic>Hpa</italic>II &amp; <italic>Mse</italic>I</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1">Selective bases AFLP primers</td><td align="left" colspan="1" rowspan="1">A &amp; CT</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1">Average obtained read length (before trimming)</td><td align="left" colspan="1" rowspan="1">103 nt</td></tr><tr><td align="left" colspan="1" rowspan="1">Trimming</td><td align="left" colspan="1" rowspan="1">Total # of reads after filtering (“GS 20 raw sequencing reads)</td><td align="left" colspan="1" rowspan="1">399,252</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1">Reads with sample identification tag assigned</td><td align="left" colspan="1" rowspan="1">383,566 (96%)</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1">Faulty reads (no sample identification tag assigned)</td><td align="left" colspan="1" rowspan="1">15,686 (4%)</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1"># reads with sample identification tag for sample 1 (B73)</td><td align="left" colspan="1" rowspan="1">149,226 (39%)</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1"># reads with sample identification tag for sample 2 (Mo17)</td><td align="left" colspan="1" rowspan="1">234,340 (61%)</td></tr><tr><td align="left" colspan="1" rowspan="1">Clustering</td><td align="left" colspan="1" rowspan="1">Multiple sequence alignments</td><td align="left" colspan="1" rowspan="1">18,989</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1">Reads in multiple sequence alignments</td><td align="left" colspan="1" rowspan="1">211,100</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1">Average # reads per alignment</td><td align="left" colspan="1" rowspan="1">11.11</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1">Singletons</td><td align="left" colspan="1" rowspan="1">29,141</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1"># reads in large clusters not contained in MSAs</td><td align="left" colspan="1" rowspan="1">143,325</td></tr><tr><td align="left" colspan="1" rowspan="1">Polymorphisms</td><td align="left" colspan="1" rowspan="1"># putative SNPs</td><td align="left" colspan="1" rowspan="1">1,225</td></tr><tr><td align="left" colspan="1" rowspan="1"/><td align="left" colspan="1" rowspan="1"># putative indels</td><td align="left" colspan="1" rowspan="1">37</td></tr></tbody></table></alternatives></table-wrap><p>Further bioinformatics analysis took place “off-rig” (i.e. on a separate server) using the CRoPS pipeline (<xref ref-type="fig" rid="pone-0001172-g001">Fig. 1</xref>). The GS 20 raw sequencing reads were trimmed (adapter removal) and 383,566 (96%) sequences remained (i.e. sequences for which a significant match with a tagged AFLP primer was found). The reasons for rejection of the remaining 15,686 (4%) reads (classified as faulty reads) were three-fold: 1) AFLP adapter not found, 2) conflict in adapter position (concatamers), and 3) sample identification tag conflict, i.e. a sequence with sample identification tag of one sample at one end and with sample identification tag of the second sample at the other end of the sequence read (so called “mixed fragments”, see further below).</p><fig id="pone-0001172-g001" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0001172.g001</object-id><label>Figure 1</label><caption><title>Bioinformatics pipeline for high-throughput analysis of CRoPS sequence runs.</title></caption><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0001172.g001" xlink:type="simple"/></fig><p>Using the TIGR Gene Indices clustering tool (TGICL) <xref ref-type="bibr" rid="pone.0001172-Pertea1">[26]</xref>, the remaining 383,566 sequences were clustered and assembled. Among these were two very large clusters (119,717 and 23,608 reads respectively) containing heavily repeated sequences. Homology searches using the Basic Local Alignment Search Tool (BLAST) revealed that the sequences within these two clusters were in fact chloroplast sequences. These two clusters were excluded from further processing. Subsequently, sequences within the remaining clusters were assembled into multiple sequence alignments (MSAs) (<xref ref-type="table" rid="pone-0001172-t001">Table 1</xref>). In addition to the 18,989 MSAs containing 211,100 sequence reads, 29,141 (7.6%) singletons were found, i.e. sequences that were not assembled into an MSA.</p><p>Finally, SNPs were mined between the reads contained in an MSA. Parameters for SNP mining were set to include only SNPs for which both alleles were observed at least twice and SNPs not being part of homopolymers larger than 3 bases. The threshold for minimal distance to a neighboring SNP was initially set at one base, i.e. all SNPs were selected irrespective of their distance to (a) neighboring SNP(s). In addition, and importantly, SNPs were mined according to sample origin, i.e. only SNPs “segregating homozygously” between the two maize lines were included. By doing so, a strong filter was created to select against “false” SNPs resulting from alignment of highly homologous duplicated sequences as opposed to genuine SNPs derived from single-copy sequences in the sequenced genome fraction (<xref ref-type="fig" rid="pone-0001172-g002">Fig. 2</xref>). As a result, 1262 putative SNPs, including 37 putative indels were mined (<xref ref-type="table" rid="pone-0001172-t001">Table 1</xref>).</p><fig id="pone-0001172-g002" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0001172.g002</object-id><label>Figure 2</label><caption><title>Example of a multiple sequence alignment (MSA) with SNP and sample related properties.</title><p>SNP properties include sequence depth (sd), the count on the number of reads at the polymorphic position, the relative position of the SNP on the consensus sequence, the distance to the neighboring SNP, flanking sequence size and homopolymeric region information. Sample related properties were derived from the Oracle database. The ratio sample sequence depth to MSA sequence depth is calculated.</p></caption><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0001172.g002" xlink:type="simple"/></fig></sec><sec id="s2b"><title>Effect of search parameters on SNP mining</title><p>To investigate the relationship between the number of putative SNPs and SNP mining parameters, SNPs were mined under different parameter settings regarding the minimal available sequence information flanking the target SNP (1, 2, 3, 4, 6 or 12 bases), and the minimal interval of flanking sequence that must be devoid of additional SNPs (1, 2, 3, 4, 6 or 12 bases). SNP mining was performed by varying these two parameters in all 36 (6 times 6) possible combinations, while keeping the other SNP mining parameters, including minimal representation of both alleles at least twice, homopolymer settings and segregation according to sample origin constant. As expected, the number of SNPs mined according to these more stringent criteria decreased to less than 50% (from 1262 to 591; <xref ref-type="fig" rid="pone-0001172-g003">Fig. 3</xref>). This selection of 591 SNPs was available for subsequent assay design.</p><fig id="pone-0001172-g003" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0001172.g003</object-id><label>Figure 3</label><caption><title>Number of putative SNPs and indels as a function of the minimal length of flanking sequences surrounding the SNP and the minimal interval devoid of additional SNPs/indels.</title></caption><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0001172.g003" xlink:type="simple"/></fig></sec><sec id="s2c"><title>Validation of putative CRoPS SNPs</title><p>Small-scale validation of putative SNPs was carried out using the SNPWave® technology <xref ref-type="bibr" rid="pone.0001172-VanEijk1">[24]</xref>. From the selection of 591 putative SNPs mined according to the stringent criteria mentioned above (including a minimal of 12 bases flanking sequence surrounding the target SNP and minimal interval of 12 bases devoid of additional SNPs), 30 SNPs were randomly selected. Two 15-plex SNPWave assays were designed and tested using two parental lines and 94 recombinant inbred lines (RIL) offspring of the ISU (B73×Mo17) maize mapping population. For 23 out of 30 tested loci (77%) clear SNPWave reactions products were observed for both alleles, while for the remaining 7 loci one or both alleles were not observed (conversion failure). For all 23 SNP loci functioning properly in the SNPWave assay, the parental lines B73 and Mo17 were polymorphic and segregation was observed among RIL lines (<xref ref-type="fig" rid="pone-0001172-g004">Fig. 4</xref>), indicative of a high proportion of mined SNPs being derived from single-copy regions in the genome.</p><fig id="pone-0001172-g004" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0001172.g004</object-id><label>Figure 4</label><caption><title>Pseudo-gel image visualizations of two SNPWave assays in maize detected by capillary electrophoresis.</title><p>Left panel: 13-plex SNPWave assay; right panel: 10-plex SNPWave assay. Number 1-9 represent different recombinant inbred line offspring of B73 and Mo17.</p></caption><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0001172.g004" xlink:type="simple"/></fig></sec></sec><sec id="s3"><title>Discussion</title><p>We have applied CRoPS technology for polymorphism discovery in maize and have mined more than 1200 high quality putative SNPs from a single GS 20 sequencing run. We speculate that the stringent but user-definable parameter settings of the bioinformatics pipeline as well as the use of <italic>Hpa</italic>II as one of the restriction enzymes for AFLP template preparation effectively enrich for SNPs located in low-copy or unique genome sequences which have a high success rate of conversion. Since SNPWave is a ligation-based multiplexed SNP genotyping technology <xref ref-type="bibr" rid="pone.0001172-VanEijk1">[24]</xref>, we expect conversion rates to be similar when SNPs mined using CRoPS are converted using other ligation-based SNP genotyping technologies <xref ref-type="bibr" rid="pone.0001172-Oliphant1">[7]</xref>, <xref ref-type="bibr" rid="pone.0001172-Hardenbol1">[8]</xref>.</p><p>During the development of CRoPS, which led to the current sample preparation protocol, we have made several modifications (see <xref ref-type="sec" rid="s4">Methods</xref>) to the original protocol for GS 20 sequencing <xref ref-type="bibr" rid="pone.0001172-Margulies1">[23]</xref> which was conceived for library preparation of a single sample. These modifications were introduced after the observation of so called “mixed-fragments” in earlier CRoPS runs (results not shown). “Mixed fragments” are sequence reads containing a sample identification tag of one sample at one end and the sample identification tag of another sample at the other end (<xref ref-type="fig" rid="pone-0001172-g005">Fig. 5</xref>). In earlier experiments we observed these “mixed fragments” at frequencies between 0.1 and 16% of all obtained reads per run with higher frequencies when more than two samples were involved (data not shown). We suspected that “mixed-fragments” arose from the combination of the enzymatic (3′-5′ exonuclease) mediated recession of free 3′ termini of sample DNA and concomitant fill-in using <italic>Bst</italic> polymerase to create blunt ends for GS 20 adapter ligation as per the original protocol. When this procedure is applied to a mixture of short PCR products containing single-stranded fragments (such as in case of CRoPS), heteroduplex fragments are formed upon mixing the two (or more) samples at this step. Since the different samples contain different four base sample identification tags at their 5′ ends, we suspected that the 3′ ends (which do not match the four base sample identification tags at the 5′ ends of the opposite strand of the heteroduplexes) are removed and filled-in with the opposite strand as template for polymerization. The net result of such an event is a sample identification tag switch (<xref ref-type="fig" rid="pone-0001172-g005">Fig. 5</xref>). Therefore, we omitted the end-polishing step and modified the GS 20 adapters A and B by adding a 5′ T nucleotide to allow T/A ligation as commonly performed in PCR product cloning (<xref ref-type="fig" rid="pone-0001172-g006">Fig. 6</xref>). This modification was also expected to prevent possible concatamer formation of PCR products. Indeed, these modifications reduced the occurrence of “mixed fragments” to negligible levels (less than 0.00025% of reads) in the CRoPS run reported here.</p><fig id="pone-0001172-g005" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0001172.g005</object-id><label>Figure 5</label><caption><title>Composition and hypothesized cause of “mixed fragments”.</title><p>“Mixed fragments” are characterized by the occurrence of the sample identification tag of sample 1 on one side and the sample identification tag of sample 2 on the other side. (A) Schematic representation of observed homoduplex and heteroduplex fragment types containing expected tags and “mixed fragments”. (B) “Mixed fragments” are formed when (1) a heteroduplex is formed between complementary strands of samples 1 and 2, (2) 3′-5′ exonuclease activity of T4 DNA polymerase removes the sequence tags at the 3′ ends, (3) polymerase activity of T4 DNA polymerase extends the 3′ ends using the opposite strand as template, resulting in incorporation of the “wrong” sequence tag, i.e. the observation of “mixed fragments”.</p></caption><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0001172.g005" xlink:type="simple"/></fig><fig id="pone-0001172-g006" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0001172.g006</object-id><label>Figure 6</label><caption><title>Protocol modification to avoid “mixed fragments”.</title><p>(A) Blunt-end adapter ligation as per the original GS 20 library preparation protocol. (B) T/A ligation as applied in the CRoPS protocol. Amplification using a polymerase lacking 3′-5′ exonuclease (proofreading) activity is performed resulting in A-addition to the AFLP fragments, after which the T-adapters can be ligated. (C) Flowcharts of the original GS 20 library preparation protocol and the CRoPS library preparation protocol.</p></caption><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0001172.g006" xlink:type="simple"/></fig><p>Although it was attempted to carefully pool AFLP products of both samples in equal amounts, a somewhat skewed sample distribution in terms of reads per sample (39% sample 1 and 61% sample 2) was obtained. With varying levels of deviation from equal sample representation, this has also been observed in at least six other GS 20 runs (data not shown), despite our attempts to pool equimolar amounts of AFLP products. Clearly it would be beneficial to reach more equal sample representation to increase the number of SNPs mined per run. The same observation was made by Binladen and co-workers <xref ref-type="bibr" rid="pone.0001172-Binladen1">[27]</xref>. In their study the effect of 5′-tag sequences was suggested as a likely explanation for the single molecule sequence variations. We support this explanation as we have observed in this GS 20 run and other subsequent runs (data not shown) that 5′ tag sequences containing “AC” bases at the 5′ end are significantly underrepresented when equimolar amounts of tagged sample DNAs are pooled. Therefore, in retrospect the choice of a sample identification tag containing “5′-AC” has contributed to the observed skewed sample distribution.</p><p>Other optimization steps expected to increase the output of CRoPS further include selection against plastid (chloroplast sequences) co-isolated with genomic DNA, the use of normalized genomic or cDNA <xref ref-type="bibr" rid="pone.0001172-Barbazuk1">[28]</xref> libraries or other methods of enrichment for unique, single-copy sequences such as High C<sub>o</sub>t selection <xref ref-type="bibr" rid="pone.0001172-Peterson1">[15]</xref> or methylfiltration <xref ref-type="bibr" rid="pone.0001172-Whitelaw1">[16]</xref>, prior to AFLP amplification. The use of such enrichment methods contribute to losing as little as possible sequence capacity to (highly) repeated sequences. Obviously, the output of a CRoPS run will also increase considerably as a result of the recent introduction of the GS FLX which has output specifications of over 400,000 reads with average read length of 240 nt. The increased read length does not only increase the amount of basepairs per run but also reduces the fraction of SNPs that can not be exploited due to insufficient flanking sequence information available for assay development. In conclusion, CRoPS is a powerful technology for random genetic marker development, which meets the shortcomings intrinsic to many plant species, i.e. the lack of available sequence information, large genomes containing high proportions of duplicated sequences and/or low levels of polymorphism. In the absence of whole-genome draft sequences, high-throughput sequencing of genome representations of multiple accessions in parallel using CRoPS will supply sufficient genetic (single nucleotide) polymorphisms to allow marker-assisted selection using existing genotyping platforms. It is our expectation that these developments will allow high-resolution sequence-based breeding using thousands of genetic markers to become reality in the nearby future.</p></sec><sec id="s4"><title>Materials and Methods</title><sec id="s4a"><title>AFLP target preparation</title><p>Total genomic DNA was isolated from leaf material of the two parental lines (i.e. B73 &amp; Mo17) of the ISU mapping population (<ext-link ext-link-type="uri" xlink:href="http://www.maizegdb.org" xlink:type="simple">www.maizegdb.org</ext-link>), using a modified CTAB procedure <xref ref-type="bibr" rid="pone.0001172-Stuart1">[29]</xref>. These 2 parental lines were chosen to be able to validate and map the discovered SNPs in the ISU mapping population.</p><p>AFLP templates were prepared as described previously <xref ref-type="bibr" rid="pone.0001172-Vos1">[20]</xref>. In short, 100–500 ng total genomic DNA was digested using 5 units <italic>Hpa</italic>II and 2 units <italic>Mse</italic>I for at least 1 hour at 37°C. After digestion, the mixture was heated at 80°C for 10 min. Next, AFLP adapter ligation using <italic>Hpa</italic>II and <italic>Mse</italic>I adapters was carried out for 3 hours at 37°C. The restriction-ligation (RL) mixture was subsequently diluted 10-fold with T<sub>10</sub>E<sub>0.1</sub> and 5 µl diluted mix was used as a template in a selective pre-amplification step, the so-called +1/+1 pre-amplification. Primer sequences for the +1/+1 pre-amplification were 5′-GTAGACTGCGTACACGGA-3′ (<italic>Hpa</italic>II site, including 1 selective nucleotide “A”) and 5′-GATGAGTCCTGAGTAAC-3′ (<italic>Mse</italic>I site, including 1 selective nucleotide “C”). Twenty µl PCRs were performed containing 5 µl diluted RL mixture, 30 ng <italic>Hpa</italic>II primer, 30 ng <italic>Mse</italic>I primer, 0.2 mM dNTP, 0.4 U AmpliTaq® (Applied Biosystems) and 1× AmpliTaq buffer. PCR was performed for 20 cycles with the following cycle profile: 30 sec 94°C, 60 sec 56°C, 60 sec 72°C, followed by cooling down to 4°C.</p><p>The +1/+1 pre-amplification reaction was diluted 20-fold with T<sub>10</sub>E<sub>0.1</sub>, and used for the second selective amplification step, the so-called +1/+2 selective amplification. Primer sequences for the +1/+2 selective amplification were 5′-‘P-ACACGTAGACTGCGTACACGGA-3′ (<italic>Hpa</italic>II site, including 1 selective nucleotide “A”) and 5′-‘P-ACACGATGAGTCCTGAGTAACT-3′ (<italic>Mse</italic>I site, including 2 selective nucleotides “CT”) for sample B73. The four most 5′ bases of these primers serve as sample identification tag (KeyGene™ SeqTag technology). These 4-nt sample identification tags were selected from a collection of 4-nt sequences differing by at least 2 nt to exclude the possibility that a single nucleotide substitution error could cause incorrect assignment of the sequence to a sample. Similarly, primer sequences for the +1/+2 selective amplification of the Mo17 sample were 5′-‘P-AGCTGTAGACTGCGTACACGGA-3′ (<italic>Hpa</italic>II site, including 1 selective nucleotide “A”) and 5′-‘P-AGCTGATGAGTCCTGAGTAACT-3′ (<italic>Mse</italic>I site, including 2 selective nucleotides “CT”). Fifty µl PCRs were performed containing 5 µl diluted +1/+1 pre-amplification mixture, 75 ng <italic>Hpa</italic>II primer, 75 ng <italic>Mse</italic>I primer, 0.2 mM dNTP, 1 U AmpliTaq (Applied Biosystems) and 1× AmpliTaq buffer. PCR was performed for 30 cycles with the following cycle profile: 30 sec 94°C, 60 sec 56°C, 60 sec 72°C, followed by cooling down to 4°C.</p><p>Next, 100 µl of PCR products of each sample were purified using the QIAquick PCR Purification Kit (Qiagen). Concentrations of both samples were determined using the Nanodrop ND-1000 (Nanodrop Technologies), after which equal amounts of the two samples were pooled and further treated as one fragment library sample. This saves costs and prevents relying on physical compartmentalization to separate both samples. Furthermore this approach provides flexibility regarding processing multiple samples.</p></sec><sec id="s4b"><title>GS 20 library preparation &amp; titration</title><p>3.45 µg of the fragment library sample (i.e. pooled, purified and tagged AFLP products) were used as input for GS 20 library construction. The use of tagged and pooled PCR products, however, necessitated several adaptations in the published GS 20 library construction protocol <xref ref-type="bibr" rid="pone.0001172-Margulies1">[23]</xref>. First, no shearing was carried out. Second, the end-polishing step was omitted, and modified A and B adapters were used as follows: adapter A-upper strand: 5′-CCATCTCATCCCTGCGTGTCCCATCTGTTCCCTCCCTGTCTCAGT-3′, adapter A-lower strand: 5′-CTGAGACAGGGAGGGAACAGATGG-3′, adapter B-upper strand: 5′-BIO-TEG-CCTATCCCCTGTGTGCCTTGCCTATCCCCTGTTGCGTGTCTCAGT-3′ and adapter B-lower strand: 5′-P-CTGAGACACGCAACAGGGGATAGGCAAGGCACACAGGGGATAGG-3′. Finally, the <italic>Bst</italic> DNA polymerase fill-in step of the published protocol was left out.</p><p>After library construction, a titration run was carried out using 16, 64, 256 and 512 copies per bead. The copies per bead ratio to be used in the titration run is estimated based on the concentration of sstDNA (single stranded AB library). Therefore, the outcome of the titration run determines the ratio which needs to be applied in the actual sequencing run. Based on the titration run carried out for this experiment, a 48 copies per bead ratio was selected, founded on the “Predicted Recovery” of approximately 2.10<sup>6</sup> enriched beads, &gt; 60% PassFilter, &lt;20% Mixed+Dots and approximately 6000 Keypass reads.</p></sec><sec id="s4c"><title>GS 20 sequencing</title><p>Emulsion PCR and bead enrichment were carried out according to the standard GS 20 protocol (Roche Applied Science). One full picotiterplate (PTP) (70×75 mm) with two regions was used. Enriched beads were divided over both regions. Sequencing was performed according to the manufacturer's instructions (Roche Applied Science).</p></sec><sec id="s4d"><title>CRoPS bioinformatics pipeline</title><p>The basecalled reads from both regions were added together in one file and further processed for SNP mining using a fully automated pipeline (Keygene N.V.). The web based pipeline was written in Perl 5.8.0 and runs via an Apache web server on a Linux platform. Microsoft Internet Explorer was used as client. An Oracle 10<italic>g</italic> relational database served as the central repository for all raw and processed data and the material and process definition.</p><p>The SNP discovery process consisted of four parts, namely (1) GS 20 data processing, (2) CRoPS pre-processing, (3) the CRoPS analysis, and (4) CRoPS SNP mining (<xref ref-type="fig" rid="pone-0001172-g001">Fig. 1</xref>).</p><p>GS 20 data processing was performed on-rig using the standard GS software. This process resulted in the “GS 20 raw sequence reads” that were directly used for further processing in the CRoPS pre-processing step. During pre-processing, the origin of the reads was identified according to their four base sample identification tags. The implementation of this step was based on the internal BLAST function in Oracle 10<italic>g</italic> (Oracle). Furthermore, the AFLP adapter sequences were trimmed. Pre-processed reads were saved to the database. In the CRoPS analysis step, reads were clustered and assembled using the TGICL tool <xref ref-type="bibr" rid="pone.0001172-Pertea1">[26]</xref>. Clustering was performed using the following variable parameters: minimum percent identity for overlaps (94%), minimum overlap length (30 nt) and maximum length of unmatched overhangs (30 nt). Again, all data obtained were subsequently saved to the Oracle database.</p><p>Polymorphisms were selected during the SNP mining step. For each putative SNP a number of features were recorded, including relative position in the consensus sequence, sample count in the MSA, allele count per sample, the MSA depth (number of reads at the SNP position), distance to flanking SNPs, flanking size around SNP and the presence of a homopolymer stretch in which a SNP may occur. Mining rules were created from these features and defined as follows: 1) each allele should be present at least twice in a MSA, 2) SNPs should not be part of homopolymers larger than 3 bases, and 3) SNPs should be segregating according to sample origin. SNPs that passed the filters were selected as the best candidates for conversion into genotyping assays.</p><p>The pre-processed sequence data will be deposited at the NCBI Short Read Archive (SRA) as soon as this archive is ready to accept the data (expected at the end of 2007). Until then, the data can be requested from the authors.</p></sec><sec id="s4e"><title>SNPWave</title><p>Probes were designed for 30 putative SNPs in two multiplex (15-plex) SNPWave assays using ProbeDesigner software (Keygene N.V.) as described previously <xref ref-type="bibr" rid="pone.0001172-VanEijk1">[24]</xref>. SNPWave reactions were carried out as described previously <xref ref-type="bibr" rid="pone.0001172-VanEijk1">[24]</xref>. In short, ligation reactions were carried out in 10 µl volume containing 200–400 ng total genomic DNA, 1×Taq DNA ligase buffer [20 mM tris-HCl, 25 mM KAc, 10 mM MgAc<sub>2</sub>, 10 mM dithiothreitol (DTT), 1 mM NAD, 0.1% triton X-100; pH 7.6 at 25°C; New England Biolabs Inc], 2 U Taq DNA ligase (New England Biolabs Inc) and 0.5 fmol of each of the ligation probes. Next, 10 cycles of repeated denaturation, probe hybridization and ligation were performed in a Perkin Elmer 9700 thermal cycler (Applied Biosystems) using the following profile: initial denaturation for 2 min at 94°C, followed by 10 cycles of 15 s at 94°C and 60 min at 60°C, and storage at 4°C. After ligation, the mixture was diluted with 30 µl of 1×Taq DNA ligase buffer to 40 µl.</p><p>Ten µl of diluted ligation reaction was subsequently amplified in a 20 µl mixture containing 1×GeneAmp® PCR buffer (Applied Biosystems), 0.2 mM of each dNTP, 0.4 U AmpliTaq Gold DNA polymerase (Applied Biosystems), and 30 ng unlabeled forward primer (5-GACTGCGTACCAATTC-3) and 30 ng FAM-labeled reverse primer (5-GATGAGTCCTGAGTAA-3). The amplification profile was 12 min at 94°C, followed by 13 cycles of 30 s at 94°C, 30 s at 65°C with a reduction of 0.7°C per cycle to 56°C in cycle 13, followed by 1 min at 72°C. This was followed by 23 cycles of 30 s at 94°C, 30 s at 56°C and 1 min at 72°C, and storage at 4°C.</p><p>Purification of diluted SNPWave PCR products and subsequent detection on the MegaBACE 1000 (Amersham Biosciences/GE Healthcare Life Sciences) were as described previously <xref ref-type="bibr" rid="pone.0001172-VanEijk1">[24]</xref>.</p><p>SNPs and flanking sequences can be found in the supplementary file (<xref ref-type="supplementary-material" rid="pone.0001172.s001">Table S1</xref>). Probe sequences are available upon request from the authors.</p></sec></sec><sec id="s5"><title>Supporting Information</title><supplementary-material id="pone.0001172.s001" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0001172.s001" xlink:type="simple"><label>Table S1</label><caption><p>SNPs and flanking sequences used for the 13-plex and 10-plex SNPWave assays</p><p>(0.04 MB DOC)</p></caption></supplementary-material></sec></body><back><ack><p>We thank Rolf Mank and the staff of the MegaBACE unit at Keygene for excellent technical assistance.</p><p>The AFLP®, CRoPS™, SNPWave® and KeyGene™ SeqTag technologies are covered by patents and patent applications owned by Keygene N.V. AFLP and SNPWave are registered trademarks of Keygene NV. Applications for trademark registration for CRoPS and KeyGene have been filed by Keygene N.V. Other (registered) trademarks are the property of the respective owners.</p></ack><ref-list><title>References</title><ref id="pone.0001172-The1"><label>1</label><element-citation publication-type="journal" xlink:type="simple">             <collab xlink:type="simple">The Arabidopsis Initiative</collab>             <year>2000</year>             <article-title>Analysis of the genome sequence of the flowering plant Arabidopsis thaliana.</article-title>             <source>Nature</source>             <volume>408</volume>             <fpage>796</fpage>             <lpage>815</lpage>          </element-citation></ref><ref id="pone.0001172-International1"><label>2</label><element-citation publication-type="journal" xlink:type="simple">             <collab xlink:type="simple">International Rice Genome Sequencing Project</collab>             <year>2005</year>             <article-title>The map-based sequence of the rice genome.</article-title>             <source>Nature</source>             <volume>436</volume>             <fpage>793</fpage>             <lpage>800</lpage>          </element-citation></ref><ref id="pone.0001172-SanMiguel1"><label>3</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>SanMiguel</surname><given-names>P</given-names></name><name name-style="western"><surname>Tikhonov</surname><given-names>A</given-names></name><name name-style="western"><surname>Jin</surname><given-names>Y-K</given-names></name><name name-style="western"><surname>Motchoulskaia</surname><given-names>N</given-names></name><name name-style="western"><surname>Zakharov</surname><given-names>D</given-names></name><etal/></person-group>             <year>1996</year>             <article-title>Nested retrotransposons in the intergenic regions of the maize genome.</article-title>             <source>Science</source>             <volume>274</volume>             <fpage>765</fpage>             <lpage>768</lpage>          </element-citation></ref><ref id="pone.0001172-Li1"><label>4</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>W</given-names></name><name name-style="western"><surname>Zhang</surname><given-names>P</given-names></name><name name-style="western"><surname>Fellers</surname><given-names>JP</given-names></name><name name-style="western"><surname>Friebe</surname><given-names>B</given-names></name><name name-style="western"><surname>Gill</surname><given-names>BS</given-names></name></person-group>             <year>2004</year>             <article-title>Sequence composition, organization, and evolution of the core Triticeae genome.</article-title>             <source>Plant J.</source>             <volume>40</volume>             <fpage>500</fpage>             <lpage>511</lpage>          </element-citation></ref><ref id="pone.0001172-Swaminathan1"><label>5</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Swaminathan</surname><given-names>K</given-names></name><name name-style="western"><surname>Varala</surname><given-names>K</given-names></name><name name-style="western"><surname>Hudson</surname><given-names>ME</given-names></name></person-group>             <year>2007</year>             <article-title>Global repeat discovery and estimation of genomic copy number in a large, complex genome using a high-throughput 454 sequence survey.</article-title>             <source>BMC Genomics</source>             <volume>8</volume>             <fpage>1471</fpage>             <lpage>2164</lpage>          </element-citation></ref><ref id="pone.0001172-An1"><label>6</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>An</surname><given-names>CS</given-names></name><name name-style="western"><surname>Kim</surname><given-names>SC</given-names></name><name name-style="western"><surname>Go</surname><given-names>SL</given-names></name></person-group>             <year>1996</year>             <article-title>Analysis of red pepper (<italic>Capsicum annuum</italic>) genome.</article-title>             <source>J. Plant Biol.</source>             <volume>39</volume>             <fpage>57</fpage>             <lpage>61</lpage>          </element-citation></ref><ref id="pone.0001172-Oliphant1"><label>7</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Oliphant</surname><given-names>A</given-names></name><name name-style="western"><surname>Barker</surname><given-names>DL</given-names></name><name name-style="western"><surname>Stuelpnagel</surname><given-names>JR</given-names></name><name name-style="western"><surname>Chee</surname><given-names>MS</given-names></name></person-group>             <year>2002</year>             <article-title>BeadArray™ technology: enabling an accurate, cost-effective approach to high-throughput genotyping.</article-title>             <source>Biotechniques</source>             <volume>32</volume>             <fpage>S56</fpage>             <lpage>S61</lpage>          </element-citation></ref><ref id="pone.0001172-Hardenbol1"><label>8</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Hardenbol</surname><given-names>P</given-names></name><name name-style="western"><surname>Banér</surname><given-names>J</given-names></name><name name-style="western"><surname>Jain</surname><given-names>M</given-names></name><name name-style="western"><surname>Nilsson</surname><given-names>M</given-names></name><name name-style="western"><surname>Namsaraev</surname><given-names>E</given-names></name><etal/></person-group>             <year>2003</year>             <article-title>Multiplexed genotyping with sequence-tagged molecular inversion probes.</article-title>             <source>Nat. Biotechnol.</source>             <volume>21</volume>             <fpage>673</fpage>             <lpage>678</lpage>          </element-citation></ref><ref id="pone.0001172-Kennedy1"><label>9</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Kennedy</surname><given-names>GC</given-names></name><name name-style="western"><surname>Matsuzaki</surname><given-names>H</given-names></name><name name-style="western"><surname>Dong</surname><given-names>S</given-names></name><name name-style="western"><surname>Liu</surname><given-names>W-M</given-names></name><name name-style="western"><surname>Huang</surname><given-names>J</given-names></name><etal/></person-group>             <year>2003</year>             <article-title>Large-scale genotyping of complex DNA.</article-title>             <source>Nat. Biotechnol.</source>             <volume>21</volume>             <fpage>1233</fpage>             <lpage>1237</lpage>          </element-citation></ref><ref id="pone.0001172-Matsuzaki1"><label>10</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Matsuzaki</surname><given-names>H</given-names></name><name name-style="western"><surname>Loi</surname><given-names>H</given-names></name><name name-style="western"><surname>Dong</surname><given-names>S</given-names></name><name name-style="western"><surname>Tsai</surname><given-names>Y-Y</given-names></name><name name-style="western"><surname>Fang</surname><given-names>J</given-names></name><etal/></person-group>             <year>2004</year>             <article-title>Parallel genotyping of over 10,000 SNPs using a one-primer assay on a high-density oligonucleotide array.</article-title>             <source>Genome Res.</source>             <volume>14</volume>             <fpage>414</fpage>             <lpage>425</lpage>          </element-citation></ref><ref id="pone.0001172-The2"><label>11</label><element-citation publication-type="journal" xlink:type="simple">             <collab xlink:type="simple">The international HapMap Consortium</collab>             <year>2005</year>             <article-title>A haplotype map of the human genome.</article-title>             <source>Nature</source>             <volume>437</volume>             <fpage>1299</fpage>             <lpage>1320</lpage>          </element-citation></ref><ref id="pone.0001172-Thomas1"><label>12</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Thomas</surname><given-names>RK</given-names></name><name name-style="western"><surname>Nickerson</surname><given-names>E</given-names></name><name name-style="western"><surname>Simons</surname><given-names>JF</given-names></name><name name-style="western"><surname>Jänne</surname><given-names>PA</given-names></name><name name-style="western"><surname>Tengs</surname><given-names>T</given-names></name><etal/></person-group>             <year>2006</year>             <article-title>Sensitive mutation detection in heterogeneous cancer specimens by massive parallel picoliter reactor sequencing.</article-title>             <source>Nat. Med.</source>             <volume>12</volume>             <fpage>852</fpage>             <lpage>855</lpage>          </element-citation></ref><ref id="pone.0001172-Arumuganathan1"><label>13</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Arumuganathan</surname><given-names>K</given-names></name><name name-style="western"><surname>Earle</surname><given-names>ED</given-names></name></person-group>             <year>1991</year>             <article-title>Nuclear DNA content of some important plant species.</article-title>             <source>Plant Mol. Biol. Rep.</source>             <volume>9</volume>             <fpage>208</fpage>          </element-citation></ref><ref id="pone.0001172-Meyers1"><label>14</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Meyers</surname><given-names>BC</given-names></name><name name-style="western"><surname>Tingey</surname><given-names>SV</given-names></name><name name-style="western"><surname>Morgante</surname><given-names>M</given-names></name></person-group>             <year>2001</year>             <article-title>Abundance, distribution, and transcriptional activity of repetitive elements in the maize genome.</article-title>             <source>Genome Res.</source>             <volume>11</volume>             <fpage>1660</fpage>             <lpage>1676</lpage>          </element-citation></ref><ref id="pone.0001172-Peterson1"><label>15</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Peterson</surname><given-names>DG</given-names></name><name name-style="western"><surname>Wessler</surname><given-names>SR</given-names></name><name name-style="western"><surname>Paterson</surname><given-names>AH</given-names></name></person-group>             <year>2002</year>             <article-title>Efficient capture of unique sequences from eukaryotic genomes.</article-title>             <source>Trends Genet.</source>             <volume>18</volume>             <fpage>547</fpage>             <lpage>550</lpage>          </element-citation></ref><ref id="pone.0001172-Whitelaw1"><label>16</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Whitelaw</surname><given-names>CA</given-names></name><name name-style="western"><surname>Barbazuk</surname><given-names>WB</given-names></name><name name-style="western"><surname>Pertea</surname><given-names>G</given-names></name><name name-style="western"><surname>Chan</surname><given-names>AP</given-names></name><name name-style="western"><surname>Cheung</surname><given-names>F</given-names></name><etal/></person-group>             <year>2003</year>             <article-title>Enrichment of gene-coding sequences in maize by genome filtration.</article-title>             <source>Science</source>             <volume>302</volume>             <fpage>2118</fpage>             <lpage>2120</lpage>          </element-citation></ref><ref id="pone.0001172-Emberton1"><label>17</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Emberton</surname><given-names>J</given-names></name><name name-style="western"><surname>Ma</surname><given-names>J</given-names></name><name name-style="western"><surname>Yuan</surname><given-names>Y</given-names></name><name name-style="western"><surname>SanMiguel</surname><given-names>P</given-names></name><name name-style="western"><surname>Bennetzen</surname><given-names>JL</given-names></name></person-group>             <year>2005</year>             <article-title>Gene enrichment in maize with hypomethylated partial restriction (HMPR) libraries.</article-title>             <source>Genome Res.</source>             <volume>15</volume>             <fpage>1441</fpage>             <lpage>1446</lpage>          </element-citation></ref><ref id="pone.0001172-Bennetzen1"><label>18</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Bennetzen</surname><given-names>JL</given-names></name><name name-style="western"><surname>Schrick</surname><given-names>K</given-names></name><name name-style="western"><surname>Springer</surname><given-names>PS</given-names></name><name name-style="western"><surname>Brown</surname><given-names>WE</given-names></name><name name-style="western"><surname>SanMiguel</surname><given-names>P</given-names></name></person-group>             <year>1994</year>             <article-title>Active maize genes are unmodified and flanked by diverse classes of modified, highly repetitive DNA.</article-title>             <source>Genome</source>             <volume>37</volume>             <fpage>565</fpage>             <lpage>576</lpage>          </element-citation></ref><ref id="pone.0001172-Rabinowicz1"><label>19</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Rabinowicz</surname><given-names>PD</given-names></name><name name-style="western"><surname>Schutz</surname><given-names>K</given-names></name><name name-style="western"><surname>Dedhia</surname><given-names>N</given-names></name><name name-style="western"><surname>Yordan</surname><given-names>C</given-names></name><name name-style="western"><surname>Parnell</surname><given-names>LD</given-names></name><etal/></person-group>             <year>1999</year>             <article-title>Differential methylation of genes and retrotransposons facilitates shotgun sequencing of the maize genome.</article-title>             <source>Nature Genet.</source>             <volume>23</volume>             <fpage>305</fpage>             <lpage>308</lpage>          </element-citation></ref><ref id="pone.0001172-Vos1"><label>20</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Vos</surname><given-names>P</given-names></name><name name-style="western"><surname>Hogers</surname><given-names>R</given-names></name><name name-style="western"><surname>Bleeker</surname><given-names>M</given-names></name><name name-style="western"><surname>Reijans</surname><given-names>M</given-names></name><name name-style="western"><surname>van de Lee</surname><given-names>T</given-names></name><etal/></person-group>             <year>1995</year>             <article-title>AFLP: a new technique for DNA fingerprinting.</article-title>             <source>Nucleic Acids Res.</source>             <volume>23</volume>             <fpage>4407</fpage>             <lpage>4414</lpage>          </element-citation></ref><ref id="pone.0001172-Zabeau1"><label>21</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Zabeau</surname><given-names>M</given-names></name><name name-style="western"><surname>Vos</surname><given-names>P</given-names></name></person-group>             <year>1993</year>             <article-title>Selective restriction fragment amplification; a general method for DNA fingerprinting.</article-title>             <source>EP 0534858-A1,B1; US patent 6045994.</source>          </element-citation></ref><ref id="pone.0001172-Vuylsteke1"><label>22</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Vuylsteke</surname><given-names>M</given-names></name><name name-style="western"><surname>Peleman</surname><given-names>JD</given-names></name><name name-style="western"><surname>van Eijk</surname><given-names>MJT</given-names></name></person-group>             <year>2007</year>             <article-title>AFLP technology for DNA fingerprinting.</article-title>             <source>Nature protocols</source>             <volume>2</volume>             <fpage>1387</fpage>             <lpage>1398</lpage>          </element-citation></ref><ref id="pone.0001172-Margulies1"><label>23</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Margulies</surname><given-names>M</given-names></name><name name-style="western"><surname>Egholm</surname><given-names>M</given-names></name><name name-style="western"><surname>Altman</surname><given-names>WE</given-names></name><name name-style="western"><surname>Attiya</surname><given-names>S</given-names></name><name name-style="western"><surname>Bader</surname><given-names>JS</given-names></name><etal/></person-group>             <year>2005</year>             <article-title>Genome sequencing in microfabricated high-density picolitre reactors.</article-title>             <source>Nature</source>             <volume>437</volume>             <fpage>376</fpage>             <lpage>380</lpage>          </element-citation></ref><ref id="pone.0001172-VanEijk1"><label>24</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Van Eijk</surname><given-names>MJT</given-names></name><name name-style="western"><surname>Broekhof</surname><given-names>JLN</given-names></name><name name-style="western"><surname>van der Poel</surname><given-names>HJA</given-names></name><name name-style="western"><surname>Hogers</surname><given-names>RCJ</given-names></name><name name-style="western"><surname>Schneiders</surname><given-names>H</given-names></name><etal/></person-group>             <year>2004</year>             <article-title>SNPWave™: a flexible multiplexed SNP genotyping technology.</article-title>             <source>Nucleic Acids Res.</source>             <volume>32</volume>             <fpage>e47</fpage>          </element-citation></ref><ref id="pone.0001172-Weber1"><label>25</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Weber</surname><given-names>APM</given-names></name><name name-style="western"><surname>Weber</surname><given-names>KL</given-names></name><name name-style="western"><surname>Carr</surname><given-names>K</given-names></name><name name-style="western"><surname>Wilkerson</surname><given-names>C</given-names></name><name name-style="western"><surname>Ohlrogge</surname><given-names>JB</given-names></name></person-group>             <year>2007</year>             <article-title>Sampling the Arabidopsis transcriptome with massively parallel pyrosequencing.</article-title>             <source>Plant Physiol.</source>             <volume>144</volume>             <fpage>32</fpage>             <lpage>42</lpage>          </element-citation></ref><ref id="pone.0001172-Pertea1"><label>26</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pertea</surname><given-names>G</given-names></name><name name-style="western"><surname>Huang</surname><given-names>X</given-names></name><name name-style="western"><surname>Liang</surname><given-names>F</given-names></name><name name-style="western"><surname>Antonescu</surname><given-names>V</given-names></name><name name-style="western"><surname>Sultana</surname><given-names>R</given-names></name><etal/></person-group>             <year>2003</year>             <article-title>TIGR Gene Indices clustering tools (TGICL): a software system for fast clustering of large EST datasets.</article-title>             <source>Bioinformatics</source>             <volume>19</volume>             <fpage>651</fpage>             <lpage>652</lpage>          </element-citation></ref><ref id="pone.0001172-Binladen1"><label>27</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Binladen</surname><given-names>J</given-names></name><name name-style="western"><surname>Gilbert</surname><given-names>MTP</given-names></name><name name-style="western"><surname>Bollback</surname><given-names>JP</given-names></name><name name-style="western"><surname>Panitz</surname><given-names>F</given-names></name><name name-style="western"><surname>Bendixen</surname><given-names>C</given-names></name><etal/></person-group>             <year>2007</year>             <article-title>The use of coded PCR primers enables high-throughput sequencing of multiple homolog amplification products by 454 parallel sequencing.</article-title>             <source>Plos One</source>             <volume>2</volume>             <fpage>e197</fpage>          </element-citation></ref><ref id="pone.0001172-Barbazuk1"><label>28</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Barbazuk</surname><given-names>WB</given-names></name><name name-style="western"><surname>Emrich</surname><given-names>SJ</given-names></name><name name-style="western"><surname>Chen</surname><given-names>HD</given-names></name><name name-style="western"><surname>Schnable</surname><given-names>PS</given-names></name></person-group>             <year>2007</year>             <article-title>SNP discovery via 454 transcriptome sequencing.</article-title>             <source>Plant J.</source>             <volume>51</volume>             <fpage>910</fpage>             <lpage>918</lpage>          </element-citation></ref><ref id="pone.0001172-Stuart1"><label>29</label><element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Stuart</surname><given-names>CN</given-names></name><name name-style="western"><surname>Via</surname><given-names>LE</given-names></name></person-group>             <year>1993</year>             <article-title>A rapid CTAB DNA isolation technique useful for RAPD fingerprinting and other PCR applications.</article-title>             <source>Biotechniques</source>             <volume>14</volume>             <fpage>748</fpage>             <lpage>750</lpage>          </element-citation></ref></ref-list></back></article>