<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">ploscomp</journal-id>
<journal-title-group>
<journal-title>PLOS Computational Biology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1553-734X</issn>
<issn pub-type="epub">1553-7358</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007314</article-id>
<article-id pub-id-type="publisher-id">PCOMPBIOL-D-19-00495</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Education</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Bioinformatics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Science policy</subject><subj-group><subject>Science and technology workforce</subject><subj-group><subject>Careers in research</subject><subj-group><subject>Scientists</subject><subj-group><subject>Biologists</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>People and places</subject><subj-group><subject>Population groupings</subject><subj-group><subject>Professions</subject><subj-group><subject>Scientists</subject><subj-group><subject>Biologists</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Genome analysis</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genome analysis</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Microbial genomics</subject><subj-group><subject>Viral genomics</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Microbiology</subject><subj-group><subject>Microbial genomics</subject><subj-group><subject>Viral genomics</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Microbiology</subject><subj-group><subject>Virology</subject><subj-group><subject>Viral genomics</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Molecular biology</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>Genome sequencing</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>Genome sequencing</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Metagenomics</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Molecular biology</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>Nanopore sequencing</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>Nanopore sequencing</subject></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>An educational guide for nanopore sequencing in the classroom</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-8453-8769</contrib-id>
<name name-style="western">
<surname>Salazar</surname>
<given-names>Alex N.</given-names>
</name>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-8238-1083</contrib-id>
<name name-style="western">
<surname>Nobrega</surname>
<given-names>Franklin L.</given-names>
</name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-4208-0697</contrib-id>
<name name-style="western">
<surname>Anyansi</surname>
<given-names>Christine</given-names>
</name>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Aparicio-Maldonado</surname>
<given-names>Cristian</given-names>
</name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Costa</surname>
<given-names>Ana Rita</given-names>
</name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Haagsma</surname>
<given-names>Anna C.</given-names>
</name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-3797-6616</contrib-id>
<name name-style="western">
<surname>Hiralal</surname>
<given-names>Anwar</given-names>
</name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-8601-2149</contrib-id>
<name name-style="western">
<surname>Mahfouz</surname>
<given-names>Ahmed</given-names>
</name>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>McKenzie</surname>
<given-names>Rebecca E.</given-names>
</name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-5219-1799</contrib-id>
<name name-style="western">
<surname>van Rossum</surname>
<given-names>Teunke</given-names>
</name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Brouns</surname>
<given-names>Stan J. J.</given-names>
</name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-7205-7431</contrib-id>
<name name-style="western">
<surname>Abeel</surname>
<given-names>Thomas</given-names>
</name>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Delft Bioinformatics Laboratory, Delft University of Technology, Delft, Netherlands</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Kavli Institute of Nanoscience, Department of Bionanoscience, Delft University of Technology, Delft, Netherlands</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Broad Institute of MIT and Harvard, Boston, Massachusetts, United States of America</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>Leiden Computational Biology center, Leiden University Medical Center, Leiden, Netherlands</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Ouellette</surname>
<given-names>Francis</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>University of Toronto, CANADA</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">T.Abeel@tudelft.nl</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>1</month>
<year>2020</year>
</pub-date>
<pub-date pub-type="collection">
<month>1</month>
<year>2020</year>
</pub-date>
<volume>16</volume>
<issue>1</issue>
<elocation-id>e1007314</elocation-id>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Salazar et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pcbi.1007314"/>
<abstract>
<p>The last decade has witnessed a remarkable increase in our ability to measure genetic information. Advancements of sequencing technologies are challenging the existing methods of data storage and analysis. While methods to cope with the data deluge are progressing, many biologists have lagged behind due to the fast pace of computational advancements and tools available to address their scientific questions. Future generations of biologists must be more computationally aware and capable. This means they should be trained to give them the computational skills to keep pace with technological developments. Here, we propose a model that bridges experimental and bioinformatics concepts using the Oxford Nanopore Technologies (ONT) sequencing platform. We provide both a guide to begin to empower the new generation of educators, scientists, and students in performing long-read assembly of bacterial and bacteriophage genomes and a standalone virtual machine containing all the required software and learning materials for the course.</p>
</abstract>
<abstract abstract-type="summary">
<title>Author summary</title>
<p>Genomes contain all the information required for an organism to function. Understanding the genome sequence is often the key to answer important biological questions. For example, the sequences of human genomes are used for diagnosis of genetic disorders or for the development of personalized treatments, while the sequences of microbes may inform about their mechanisms of infection and guide the development of novel drugs. Today, our capacity to generate genome sequencing data is tremendous. However, our capacity to process this information is insufficient. This is partially due to limitations of current methods for data analysis but is mostly caused by lack of training for most biologists to leverage high-throughput sequencing data and use their full potential. It is urgent that we train the new generations of biologists to become computationally aware and able to keep pace with technological developments in the field. In this manuscript, we illustrate our efforts in adopting an integrated teaching model that bridges experimental and bioinformatics works. Our course integrates data generation in the lab with bioinformatics work to illustrate the interlinking of lab practices and downstream effects. In our demonstration course, we used nanopore sequencing to train nanobiology students, but the model is easily customizable to suit students of different educational backgrounds or alternative technologies. The tools we provide help not only science educators but also biologists to address many relevant questions in biology.</p>
</abstract>
<funding-group>
<funding-statement>ANS is supported by a grant from the BE-Basic Foundation related to FES funds from the Dutch Ministry of Economic Affairs. FLN is supported by the Netherlands Organization for Scientific Research (NWO) Veni grant 016.Veni.181.092. REM is supported by an NWO Frontiers of Nanoscience (NanoFront) grant. SJJB is supported by European Research Council (ERC) Stg grant 639707 and NWO Vici grant. Oxford Nanopore Technologies provided some consumables for the course. Funders had no role in the conceptualization, design, data collection, analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="0"/>
<page-count count="7"/>
</counts>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>What defines a biologist? In short, a biologist is a person who studies life and living organisms. But this simple definition hides the true complexity of the field of biology. Biology covers diverse topics such as molecular biology, structural biology, ecology, evolution, genetics, microbiology, immunology, and biotechnology. Importantly, most (if not all) of these topics have undergone incredible progress due to rapid discoveries and technological advances[<xref ref-type="bibr" rid="pcbi.1007314.ref001">1</xref>,<xref ref-type="bibr" rid="pcbi.1007314.ref002">2</xref>]. As such, a modern biologist has the inevitable tasks of adapting to rapid change and mastering new knowledge and technology.</p>
<p>One of the most important revolutions in the field of biology was caused by the development of next-generation sequencing (NGS) technologies. Using massively parallel processing of samples, NGS dramatically reduces sequencing time and costs, enabling the sequencing of entire genomes. Currently, genome sequencing and analysis have become a crucial component in biology, as evidenced by recent scientific breakthroughs [<xref ref-type="bibr" rid="pcbi.1007314.ref003">3</xref>,<xref ref-type="bibr" rid="pcbi.1007314.ref004">4</xref>] and by the exponential increase of reported genomes on GenBank (e.g., from 30,000 sequenced prokaryotic genomes in 2014 [<xref ref-type="bibr" rid="pcbi.1007314.ref005">5</xref>] to 183,000 in 2018 [<ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/genome/browse/#!/overview/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/genome/browse/#!/overview/</ext-link>], a 6-fold increase in only 4 years). Thus, not only do biologists need to adapt and learn how to use these emerging technologies, they also need to learn how to mine the ever-growing mountain of genomic information they generate, which requires bioinformatics skills. Now, the question is how do we train this generation of biologists so that they have the required computational skills?</p>
</sec>
<sec id="sec002">
<title>Bridging bioinformatics to biologists</title>
<p>Over the past few years, we have taught introductory bioinformatics to undergraduate (second year BSc) biology students with basic molecular biology training. They are versed in standard techniques (such as basic DNA extractions and PCR) but are unfamiliar with specific DNA sequencing chemistries. In the past, this mandatory computational course was entirely disconnected from lab work, making it hard for students to grasp how bioinformatics and biology are connected. To address this disconnect, we here share a more integrated approach to teach bioinformatics to biology students. These students have a conceptual grasp of sequencing and bioinformatics but not the detailed view on how various lab techniques (e.g., NGS chemistries) combined with various analysis methods (e.g., assembly, variant calling) can be used to answer specific biological questions and how these techniques interact with each other.</p>
<p>The overall idea is to start from where students are already familiar (i.e., biology) and expand from there. There are 4 types of learning activities in the course (see <xref ref-type="fig" rid="pcbi.1007314.g001">Fig 1</xref>): (1) lectures in which students receive classroom instruction on bioinformatics topics, (2) practical sessions in which students apply the material from the lectures to solve practical exercises supervised by teaching assistants, (3) lab work in which sequencing data are generated, and (4) a project that applies the bioinformatics concepts learned in the lectures on data from the lab work. This is concluded by a poster session in which all students get to review each other’s work. A week by week overview can be found in <xref ref-type="supplementary-material" rid="pcbi.1007314.s001">S1 Table</xref>.</p>
<fig id="pcbi.1007314.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007314.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Course overview.</title>
<p>Integrated bioinformatics training with time on the x-axis. Lectures (green) give students the necessary background to execute and understand Practical (blue) and Project (purple) sessions. Laboratory sessions (yellow) enable students to employ their biological background and prepare their own DNA libraries from samples of interest. Libraries prepared by each student group are pooled together and run on a MinION device (Oxford Nanopore Technologies, Oxford, UK), generating data to be processed in Project sessions. Backup data previously prepared from the same samples can be used if the students’ MinION run fails to provide enough quality data for analysis. In the Practical sessions, students learn to use established bioinformatics methods, with an emphasis on processing long-read data (see <xref ref-type="fig" rid="pcbi.1007314.g002">Fig 2</xref>, <xref ref-type="supplementary-material" rid="pcbi.1007314.s001">S1 Table</xref> and <xref ref-type="supplementary-material" rid="pcbi.1007314.s002">S1 Text</xref>). In the Project sessions, they then apply these methods to the generated data to answer specific research questions. After intragroup and intergroup discussions of results, students prepare their final project report and present their results in a poster format.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007314.g001" xlink:type="simple"/>
</fig>
<p>The formula presented here focuses on introducing bioinformatics to biology students, helping them to acquire the skills and insights needed to operate and troubleshoot existing algorithms. The course does not focus on developing skills needed to create novel algorithms or models.</p>
<p>During the pilot run of this course in the academic year from 2017 to 2018, we used Oxford Nanopore Technologies (ONT) MinION sequencing as a data generation platform. This platform was selected because it has low capital cost and is a new exciting technology easy to engage students with. Real-time data acquisition gives immediate feedback to the students that data are being produced, even if they have to keep it running overnight. It is easy to imagine they could get one of these devices at home. Students can see themselves as scientists, as people discovering something new, an idea that we really like to foster. Ultimately, any fast, cheap, and accessible sequencing platform would be good for our goals, yet only MinION is currently available.</p>
<p>MinION has already made its way into undergraduate and graduate courses [<xref ref-type="bibr" rid="pcbi.1007314.ref006">6</xref>,<xref ref-type="bibr" rid="pcbi.1007314.ref007">7</xref>]. Some of these courses focused on data analysis; they organized hackathons in which students needed to devise a pipeline to infer the ingredients of food DNA samples or identify human DNA samples[<xref ref-type="bibr" rid="pcbi.1007314.ref006">6</xref>]. Others developed the application of MinION further by also teaching laboratory techniques for DNA extraction and sequencing library preparation[<xref ref-type="bibr" rid="pcbi.1007314.ref007">7</xref>].</p>
<p>Additionally, the portable size of ONT’s MinION and the simplicity of library preparation enable scientists to use this technology in a wide variety of environments, including a standard classroom[<xref ref-type="bibr" rid="pcbi.1007314.ref008">8</xref>–<xref ref-type="bibr" rid="pcbi.1007314.ref010">10</xref>]. As such, this device is not only attractive for researchers but also for educational instructors: If this technology is empowering scientists to embark on novel scientific studies, why not also empower young students to embark on effective educational experiences?</p>
</sec>
<sec id="sec003">
<title>Integrating nanopore sequencing in the classroom</title>
<p>The challenge set for students in our course was to identify and discover novel phages from environmental samples and to reconstruct complete genomes from single-isolate and metagenomics samples. The students had to address the following research questions, which were introduced at the very beginning of the course: (1) Can we assemble and annotate fully closed genomes from a small number of long reads? (2) What are the considerations for the assembly of metagenomics samples compared to single isolates? (3) What is the advantage of long-read sequencing for the analysis of metagenomics samples? (4) Can we identify virulent and temperate phages in metagenomics samples? (5) What genes of interest can we find in both bacteria and phage genomes?</p>
<p>Twenty-four groups of 4 students (96 total) prepared their own DNA libraries of various single-isolate bacterial, bacteriophage, and metagenomic samples in the classroom. Number of groups and their size were determined to allow for sufficient supervision within the available lab space. If possible, smaller groups are preferable to increase the hands-on time of each student. We would like to emphasize the benefits of having multiple groups working on different related samples (e.g., each barcode represents a similar but different microbial isolate). This allows groups to initiate discussions about differences in their own findings—such as unique sequences, structural variants and presence and/or absence of genes—and hypothesize how those differences may influence the phenotypic traits of their sample. This exercise helps them further appreciate the value of bioinformatics skills in a biological setting and how the 2 are ultimately connected.</p>
<p>The DNA libraries were prepared using the rapid barcoding kit (SQK-RBK004), which has fewer steps than other available kits and thus allows the procedure to be completed within the 3-hour timeframe of the class. For longer sessions, the ligation sequencing kit (SQK-LSK109) could be used, increasing the robustness and throughput of the experiment. Both kits allow for barcoding of multiple genomic DNA samples. Samples were prepared individually by each group and then barcoded and pooled together at different proportions depending on the success of each group. When sequencing runs failed, the student was supplied with previously generated backup data.</p>
<p>After running DNA samples in MinION, students performed quality control of their data and then assembled the genomes. As we focused on teaching technical concepts of bioinformatics, we provided a computational guide (see <xref ref-type="supplementary-material" rid="pcbi.1007314.s002">S1 Text</xref> and summary in <xref ref-type="fig" rid="pcbi.1007314.g002">Fig 2</xref>) containing ready-to-go commands and scripts for commonly performed tasks that can be broadly used with MinION data. To facilitate the use of this guide, we provided a standalone virtual machine containing all required software used in <xref ref-type="supplementary-material" rid="pcbi.1007314.s002">S1 Text</xref>.</p>
<fig id="pcbi.1007314.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007314.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Pipeline for genome assembly using MinION data.</title>
<p>First, the barcoded sequences are demultiplexed using Deepbinner[<xref ref-type="bibr" rid="pcbi.1007314.ref011">11</xref>] and basecalled using Albacore (Oxford Nanopore Technologies, Oxford, UK). Nanoplot [<xref ref-type="bibr" rid="pcbi.1007314.ref012">12</xref>] is used to assess the quality of the sequencing data for downstream processing. If the data have sufficient quality, they are used for assembly using, e.g., Canu [<xref ref-type="bibr" rid="pcbi.1007314.ref013">13</xref>]. Confidence on the resulting consensus assembly is obtained using Minimap2[<xref ref-type="bibr" rid="pcbi.1007314.ref014">14</xref>]. The assembly is polished to remove common mistakes using Nanopolish[<xref ref-type="bibr" rid="pcbi.1007314.ref015">15</xref>], and then Circlator [<xref ref-type="bibr" rid="pcbi.1007314.ref016">16</xref>] is used to determine the zero-based start of the genome, which depends on whether it is a bacterial sequence or a bacteriophage sequence. Finally, the assembled genome is annotated using Prokka [<xref ref-type="bibr" rid="pcbi.1007314.ref017">17</xref>]. Please refer to <xref ref-type="supplementary-material" rid="pcbi.1007314.s002">S1 Text</xref> for further details.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007314.g002" xlink:type="simple"/>
</fig>
<p>Once data processing was completed, students pursued a variety of research questions, such as investigating the genomic composition of their bacterial sample as well as the population composition of their metagenomics sample. For example, students would determine the bacteriophage species in their barcoded sample and compare their assembled genome to that of the closest reference genome found in the National Center for Biotechnology Information (NCBI) reference sequencing database (RefSeq). In all cases, students found that their assembly had little overlap with the reference, prompting discussions about the novelty of the genetic content in their phage.</p>
<p>Students ran Centrifuge [<xref ref-type="bibr" rid="pcbi.1007314.ref018">18</xref>], a species classification and quantification tool, on their metagenomics sample and generally concluded a mixture of viral and bacterial species. This process stimulated discussion about a number of course-related topics: (1) limitations of k-mer-based tools (e.g., k-mers are not always unique to individual species), (2) biases when comparing against a reference data set (e.g., you can only classify what you have previously observed), (3) understanding bacteriophage biology (e.g., phages can integrate their DNA in a bacterial host; therefore, sequences that are labeled as “bacteria” may actually correspond to integrated phage DNA), and (4) understanding whether long-read sequencing is advantageous to the scientific question addressed (e.g., long-read sequencing helps improve assembly quality of metagenomes, but the high error rates of the technology still limit its usefulness; here, combining short-read and long-read data could be the best approach to improved contiguity and base pair–level accuracy). These topics were framed to explore how they may affect the student’s computational observations.</p>
</sec>
<sec id="sec004">
<title>Impact of integrated bioinformatics education</title>
<p>Through the integrated approach in our course, students can easily grasp the direct influence of the experimental protocol on data quality. For example, a student’s excessive pipetting leads to observably shorter read-length distributions, resulting in fewer unique overlaps in the pairwise alignments, a less contiguous assembly graph, and ultimately more fragmented assemblies. Furthermore, the setup is sufficiently generic that different scientific questions could be addressed using this pipeline, and it is sufficiently flexible to adjust to the students’ background.</p>
<p>We experienced increased interest and engagement in our course from both the instructors and the students. Students were much more interested in the course content because they could assume scientific responsibility and ownership. Spending several hours or days in the lab goes a long way to make “scientists-to-be” feel “this is my data.”</p>
<p>The instructors leveraged the practical classes as an opportunity to generate and analyze data for potential pilot studies, i.e., preliminary data for the next round of grants. In our pilot version of the course, the experiments were chosen such that they contribute to ongoing research in the lab. As a result, we generated several follow-up project ideas, one of which resulted in a master’s thesis on heterogeneity of bacteriophage genomes detected by nanopore sequencing, as well as a tripling of the number of undergraduate lab-rotations in the area of bioinformatics.</p>
<p>Naturally, many of the assignments, including interpretation and comparison of a genome assembly from single bacterial isolates to that of viral samples, were open-ended and initially challenged the students. However, the experience gave them a more realistic impression of academic research and foundational skills to help them in their future career as modern biologists. In particular, different samples required different data interpretations, naturally spurring discussions and collaborations among students. Future editions of such an integrated course could consider even developing the student ownership further by explaining the “problem” and asking students to design the DNA sequencing experiments given the boundaries of the reagents available. With adequate supervision and coaching to include proper controls and experiments, this could lead to even greater collaboration and ownership by the students.</p>
</sec>
<sec id="sec005" sec-type="conclusions">
<title>Conclusion</title>
<p>Considering the fast pace at which sequencing technologies progress and at which genomics data are generated, it is no longer possible to ignore the urgency of equipping young biologists with the required skills to manage the amount and type of sequencing data being generated. Here, we used nanopore sequencing as one possible tool to prepare a new generation of bioinformatics-aware modern biologists. Nanopore sequencing offers an exciting opportunity to not only introduce students to the field of genomics and bioinformatics but also to address advanced biological and computational problems. Simple customizations of the assignments are possible to make the course different every year and to make it suitable for teaching students of different backgrounds, such as computer science (e.g., toolbox handling, algorithm understanding), molecular biology (e.g., genomics, sequencing), or medicine (e.g., pathogen detection, cancer diagnostics). MinION also gives a chance to teach the students how to use different tools and community-based analysis and the importance of constantly updating their knowledge of recent technological developments.</p>
<p>The virtual machine and guide provided herein intend to assist science educators and also geneticists to address timely questions in biology, such as detection of epigenetic modifications, characterization of human genetic variation, real-time detection of pathogens, characterization of structural variation in cancer, and analysis of population transcriptomics.</p>
<p>A walkthrough of ONTassembly of prokaryotic genomes and their viruses is provided in <xref ref-type="supplementary-material" rid="pcbi.1007314.s002">S1 Text</xref>. All materials, including the virtual machine image, are available at <ext-link ext-link-type="uri" xlink:href="https://github.com/AbeelLab/integrated_bioinformatics" xlink:type="simple">https://github.com/AbeelLab/integrated_bioinformatics</ext-link>.</p>
</sec>
<sec id="sec006">
<title>Supporting information</title>
<supplementary-material id="pcbi.1007314.s001" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007314.s001" xlink:type="simple">
<label>S1 Table</label>
<caption>
<title>Detailed syllabus.</title>
<p>Detailed overview of course activities week by week. Lecture topics, practical topics, and project work align.</p>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007314.s002" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007314.s002" xlink:type="simple">
<label>S1 Text</label>
<caption>
<title>Student walkthrough.</title>
<p>Complete student manual with all work to be performed by students.</p>
<p>(DOCX)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="pcbi.1007314.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Karas</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Hillenkamp</surname> <given-names>F</given-names></name>. <article-title>Laser desorption ionization of proteins with molecular masses exceeding 10,000 daltons</article-title>. <source>Anal Chem</source>. <year>1988</year>;<volume>60</volume>: <fpage>2299</fpage>–<lpage>301</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1021/ac00171a028" xlink:type="simple">10.1021/ac00171a028</ext-link></comment> <object-id pub-id-type="pmid">3239801</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Budnik</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Levy</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Harmange</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Slavov</surname> <given-names>N</given-names></name>. <article-title>SCoPE-MS: mass spectrometry of single mammalian cells quantifies proteome heterogeneity during cell differentiation</article-title>. <source>Genome Biol</source>. <year>2018</year>;<volume>19</volume>: <fpage>161</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s13059-018-1547-5" xlink:type="simple">10.1186/s13059-018-1547-5</ext-link></comment> <object-id pub-id-type="pmid">30343672</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Norton</surname> <given-names>ME</given-names></name>. <article-title>Noninvasive prenatal testing to analyze the fetal genome</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2016</year>;<volume>113</volume>: <fpage>14173</fpage>–<lpage>14175</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1617112113" xlink:type="simple">10.1073/pnas.1617112113</ext-link></comment> <object-id pub-id-type="pmid">27911833</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Shendure</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Balasubramanian</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Church</surname> <given-names>GM</given-names></name>, <name name-style="western"><surname>Gilbert</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Rogers</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Schloss</surname> <given-names>JA</given-names></name>, <etal>et al</etal>. <article-title>DNA sequencing at 40: past, present and future</article-title>. <source>Nature</source>. <year>2017</year>;<volume>550</volume>: <fpage>345</fpage>–<lpage>353</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nature24286" xlink:type="simple">10.1038/nature24286</ext-link></comment> <object-id pub-id-type="pmid">29019985</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Land</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Hauser</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Jun</surname> <given-names>S-R</given-names></name>, <name name-style="western"><surname>Nookaew</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Leuze</surname> <given-names>MR</given-names></name>, <name name-style="western"><surname>Ahn</surname> <given-names>T-H</given-names></name>, <etal>et al</etal>. <article-title>Insights from 20 years of bacterial genome sequencing</article-title>. <source>Funct Integr Genomics</source>. <year>2015</year>;<volume>15</volume>: <fpage>141</fpage>–<lpage>61</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10142-015-0433-4" xlink:type="simple">10.1007/s10142-015-0433-4</ext-link></comment> <object-id pub-id-type="pmid">25722247</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zaaijer</surname> <given-names>S</given-names></name>, <article-title>Columbia University Ubiquitous Genomics 2015 class, Erlich Y. Using mobile sequencers in an academic classroom</article-title>. <source>Elife</source>. <year>2016</year>;<fpage>5</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.7554/eLife.14258" xlink:type="simple">10.7554/eLife.14258</ext-link></comment></mixed-citation></ref>
<ref id="pcbi.1007314.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zeng</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Martin</surname> <given-names>CH</given-names></name>. <article-title>Oxford Nanopore sequencing in a research-based undergraduate course</article-title>. <source>bioRxiv</source>. <year>2017</year>; <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/227439" xlink:type="simple">10.1101/227439</ext-link></comment> Available: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/227439" xlink:type="simple">https://doi.org/10.1101/227439</ext-link></mixed-citation></ref>
<ref id="pcbi.1007314.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Johnson</surname> <given-names>SS</given-names></name>, <name name-style="western"><surname>Zaikova</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Goerlitz</surname> <given-names>DS</given-names></name>, <name name-style="western"><surname>Bai</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Tighe</surname> <given-names>SW</given-names></name>. <article-title>Real-Time DNA Sequencing in the Antarctic Dry Valleys Using the Oxford Nanopore Sequencer</article-title>. <source>J Biomol Tech</source>. <year>2017</year>;<volume>28</volume>: <fpage>2</fpage>–<lpage>7</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.7171/jbt.17-2801-009" xlink:type="simple">10.7171/jbt.17-2801-009</ext-link></comment> <object-id pub-id-type="pmid">28337073</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hoenen</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Groseth</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Rosenke</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Fischer</surname> <given-names>RJ</given-names></name>, <name name-style="western"><surname>Hoenen</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Judson</surname> <given-names>SD</given-names></name>, <etal>et al</etal>. <article-title>Nanopore Sequencing as a Rapidly Deployable Ebola Outbreak Tool</article-title>. <source>Emerg Infect Dis</source>. <year>2016</year>;<volume>22</volume>: <fpage>331</fpage>–<lpage>4</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3201/eid2202.151796" xlink:type="simple">10.3201/eid2202.151796</ext-link></comment> <object-id pub-id-type="pmid">26812583</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Castro-Wallace</surname> <given-names>SL</given-names></name>, <name name-style="western"><surname>Chiu</surname> <given-names>CY</given-names></name>, <name name-style="western"><surname>John</surname> <given-names>KK</given-names></name>, <name name-style="western"><surname>Stahl</surname> <given-names>SE</given-names></name>, <name name-style="western"><surname>Rubins</surname> <given-names>KH</given-names></name>, <name name-style="western"><surname>McIntyre</surname> <given-names>ABR</given-names></name>, <etal>et al</etal>. <article-title>Nanopore DNA Sequencing and Genome Assembly on the International Space Station</article-title>. <source>Sci Rep</source>. <year>2017</year>;<volume>7</volume>: <fpage>18022</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41598-017-18364-0" xlink:type="simple">10.1038/s41598-017-18364-0</ext-link></comment> <object-id pub-id-type="pmid">29269933</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wick</surname> <given-names>RR</given-names></name>, <name name-style="western"><surname>Judd</surname> <given-names>LM</given-names></name>, <name name-style="western"><surname>Holt</surname> <given-names>KE</given-names></name>. <article-title>Deepbinner: Demultiplexing barcoded Oxford Nanopore reads with deep convolutional neural networks</article-title>. <source>PLoS Comput Biol</source>. <year>2018</year>;<volume>14</volume>: <fpage>1</fpage>–<lpage>11</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1006583" xlink:type="simple">10.1371/journal.pcbi.1006583</ext-link></comment> <object-id pub-id-type="pmid">30458005</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>De Coster</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>D’Hert</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Schultz</surname> <given-names>DT</given-names></name>, <name name-style="western"><surname>Cruts</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Van Broeckhoven</surname> <given-names>C</given-names></name>. <article-title>NanoPack: visualizing and processing long-read sequencing data</article-title>. <source>Bioinformatics</source>. <year>2018</year>;<volume>34</volume>: <fpage>2666</fpage>–<lpage>2669</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/bty149" xlink:type="simple">10.1093/bioinformatics/bty149</ext-link></comment> <object-id pub-id-type="pmid">29547981</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Koren</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Walenz</surname> <given-names>BP</given-names></name>, <name name-style="western"><surname>Berlin</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Miller</surname> <given-names>JR</given-names></name>, <name name-style="western"><surname>Bergman</surname> <given-names>NH</given-names></name>, <name name-style="western"><surname>Phillippy</surname> <given-names>AM</given-names></name>. <source>Canu: scalable and accurate long- - - read assembly via adaptive k - - - mer weighting and repeat separation</source>. <year>2016</year>; <fpage>1</fpage>–<lpage>35</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/gr.215087.116.Freely" xlink:type="simple">10.1101/gr.215087.116.Freely</ext-link></comment></mixed-citation></ref>
<ref id="pcbi.1007314.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Li</surname> <given-names>H.</given-names></name> <article-title>Minimap2: pairwise alignment for nucleotide sequences</article-title>. <source>Bioinformatics</source>. <year>2018</year>;<volume>34</volume>: <fpage>3094</fpage>–<lpage>3100</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/bty191" xlink:type="simple">10.1093/bioinformatics/bty191</ext-link></comment> <object-id pub-id-type="pmid">29750242</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Loman</surname> <given-names>NJ</given-names></name>, <name name-style="western"><surname>Quick</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Simpson</surname> <given-names>JT</given-names></name>. <article-title>A complete bacterial genome assembled de novo using only nanopore sequencing data</article-title>. <source>Nat Methods</source>. <year>2015</year>;<volume>12</volume>: <fpage>733</fpage>–<lpage>735</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nmeth.3444" xlink:type="simple">10.1038/nmeth.3444</ext-link></comment> <object-id pub-id-type="pmid">26076426</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hunt</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Silva</surname> <given-names>N De</given-names></name>, <name name-style="western"><surname>Otto</surname> <given-names>TD</given-names></name>, <name name-style="western"><surname>Parkhill</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Keane</surname> <given-names>JA</given-names></name>, <name name-style="western"><surname>Harris</surname> <given-names>SR</given-names></name>. <article-title>Circlator: automated circularization of genome assemblies using long sequencing reads</article-title>. <source>Genome Biol</source>. <year>2015</year>;<volume>16</volume>: <fpage>294</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s13059-015-0849-0" xlink:type="simple">10.1186/s13059-015-0849-0</ext-link></comment> <object-id pub-id-type="pmid">26714481</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Seemann</surname> <given-names>T.</given-names></name> <article-title>Prokka: rapid prokaryotic genome annotation</article-title>. <source>Bioinformatics</source>. <year>2014</year>;<volume>30</volume>: <fpage>2068</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btu153" xlink:type="simple">10.1093/bioinformatics/btu153</ext-link></comment> <object-id pub-id-type="pmid">24642063</object-id></mixed-citation></ref>
<ref id="pcbi.1007314.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kim</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Song</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Breitwieser</surname> <given-names>FP</given-names></name>, <name name-style="western"><surname>Salzberg</surname> <given-names>SL</given-names></name>. <article-title>Centrifuge: rapid and sensitive classification of metagenomic sequences</article-title>. <source>Genome Res</source>. <year>2016</year>;<volume>26</volume>: <fpage>1721</fpage>–<lpage>1729</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/gr.210641.116" xlink:type="simple">10.1101/gr.210641.116</ext-link></comment> <object-id pub-id-type="pmid">27852649</object-id></mixed-citation></ref>
</ref-list>
</back>
</article>