<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">PONE-D-24-06867</article-id>
<article-id pub-id-type="doi">10.1371/journal.pone.0311038</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Organisms</subject><subj-group><subject>Eukaryota</subject><subj-group><subject>Animals</subject><subj-group><subject>Vertebrates</subject><subj-group><subject>Amniotes</subject><subj-group><subject>Birds</subject><subj-group><subject>Seabirds</subject><subj-group><subject>Penguins</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Zoology</subject><subj-group><subject>Animals</subject><subj-group><subject>Vertebrates</subject><subj-group><subject>Amniotes</subject><subj-group><subject>Birds</subject><subj-group><subject>Seabirds</subject><subj-group><subject>Penguins</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Behavior</subject><subj-group><subject>Animal behavior</subject><subj-group><subject>Animal sociality</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Behavior</subject><subj-group><subject>Animal behavior</subject><subj-group><subject>Animal sociality</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Zoology</subject><subj-group><subject>Animal behavior</subject><subj-group><subject>Animal sociality</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Engineering and technology</subject><subj-group><subject>Equipment</subject><subj-group><subject>Optical equipment</subject><subj-group><subject>Cameras</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Earth sciences</subject><subj-group><subject>Geomorphology</subject><subj-group><subject>Topography</subject><subj-group><subject>Landforms</subject><subj-group><subject>Islands</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Imaging techniques</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>People and places</subject><subj-group><subject>Geographical locations</subject><subj-group><subject>Antarctica</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Imaging techniques</subject><subj-group><subject>Photography</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Molecular biology</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Artificial gene amplification and extension</subject><subj-group><subject>Recombinase polymerase amplification</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Artificial gene amplification and extension</subject><subj-group><subject>Recombinase polymerase amplification</subject></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Penguin colony georegistration using camera pose estimation and phototourism</article-title>
<alt-title alt-title-type="running-head">Penguin colony georegistration using camera pose estimation and phototourism</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0009-0008-5513-7553</contrib-id>
<name name-style="western">
<surname>Wu</surname> <given-names>Haoyu</given-names></name>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Flynn</surname> <given-names>Clare</given-names></name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0009-0007-1245-4631</contrib-id>
<name name-style="western">
<surname>Hall</surname> <given-names>Carole</given-names></name>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Che-Castaldo</surname> <given-names>Christian</given-names></name>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Samaras</surname> <given-names>Dimitris</given-names></name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-5018-8249</contrib-id>
<name name-style="western">
<surname>Schwaller</surname> <given-names>Mathew</given-names></name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Lynch</surname> <given-names>Heather J.</given-names></name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
</contrib-group>
<aff id="aff001">
<label>1</label>
<addr-line>Department of Computer Science, Stony Brook University, Stony Brook, New York, United States of America</addr-line>
</aff>
<aff id="aff002">
<label>2</label>
<addr-line>Department of Ecology &amp; Evolution, Stony Brook University, Stony Brook, New York, United States of America</addr-line>
</aff>
<aff id="aff003">
<label>3</label>
<addr-line>Department of Applied Mathematics and Statistics, Stony Brook University, Stony Brook, New York, United States of America</addr-line>
</aff>
<aff id="aff004">
<label>4</label>
<addr-line>U.S. Geological Survey, Wisconsin Cooperative Wildlife Research Unit, Department of Forest and Wildlife Ecology, University of Wisconsin-Madison, Madison, WI, United States of America</addr-line>
</aff>
<aff id="aff005">
<label>5</label>
<addr-line>Institute for Advanced Computational Science, Stony Brook University, Stony Brook, New York, United States of America</addr-line>
</aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>VishnuRadhan</surname> <given-names>Renjith</given-names></name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1">
<addr-line>Amity University Amity Institute of Biotechnology, INDIA</addr-line>
</aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">haoyuwu@cs.stonybrook.edu</email></corresp>
</author-notes>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<pub-date pub-type="epub">
<day>30</day>
<month>10</month>
<year>2024</year>
</pub-date>
<volume>19</volume>
<issue>10</issue>
<elocation-id>e0311038</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>2</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>9</month>
<year>2024</year>
</date>
</history>
<permissions>
<license xlink:href="https://creativecommons.org/publicdomain/zero/1.0/" xlink:type="simple">
<license-p>This is an open access article, free of all copyright, and may be freely reproduced, distributed, transmitted, modified, built upon, or otherwise used by anyone for any lawful purpose. The work is made available under the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/publicdomain/zero/1.0/" xlink:type="simple">Creative Commons CC0</ext-link> public domain dedication.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0311038"/>
<abstract>
<p>Satellite-based remote sensing and uncrewed aerial imagery play increasingly important roles in the mapping of wildlife populations and wildlife habitat, but the availability of imagery has been limited in remote areas. At the same time, ecotourism is a rapidly growing industry and can yield a vast catalog of photographs that could be harnessed for monitoring purposes, but the inherently ad-hoc and unstructured nature of these images make them difficult to use. To help address this, a subfield of computer vision known as phototourism has been developed to leverage a diverse collection of unstructured photographs to reconstruct a georeferenced three-dimensional scene capturing the environment at that location. Here we demonstrate the use of phototourism in an application involving Antarctic penguins, sentinel species whose dynamics are closely tracked as a measure of ecosystem functioning, and introduce a semi-automated pipeline for aligning and registering ground photographs using a digital elevation model (DEM) and satellite imagery. We employ the Segment Anything Model (SAM) for the interactive identification and segmentation of penguin colonies in these photographs. By creating a textured 3D mesh from the DEM and satellite imagery, we estimate camera poses to align ground photographs with the mesh and register the segmented penguin colony area to the mesh, achieving a detailed representation of the colony. Our approach has demonstrated promising performance, though challenges persist due to variations in image quality and the dynamic nature of natural landscapes. Nevertheless, our method offers a straightforward and effective tool for the georegistration of ad-hoc photographs in natural landscapes, with additional applications such as monitoring glacial retreat.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution>NSF</institution>
</funding-source>
<award-id>IIS-2212046</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Samaras</surname> <given-names>Dimitris</given-names></name>
</principal-award-recipient>
</award-group>
<award-group id="award002">
<funding-source>
<institution>NASA Biodiversity Program</institution>
</funding-source>
<award-id>Award 317 80NSSC21K1027</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Lynch</surname> <given-names>Heather J.</given-names></name>
</principal-award-recipient>
</award-group>
<funding-statement>This work was supported in part by the NASA Biodiversity Program (Award 317 80NSSC21K1027), and NSF Grant IIS-2212046. The funders had no say in the study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="4"/>
<page-count count="18"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All data are available at <ext-link ext-link-type="uri" xlink:href="https://github.com/hao-yu-wu/penguin_colony_registration" xlink:type="simple">https://github.com/hao-yu-wu/penguin_colony_registration</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Phototourism [<xref ref-type="bibr" rid="pone.0311038.ref001">1</xref>–<xref ref-type="bibr" rid="pone.0311038.ref003">3</xref>] is an emerging concept that harnesses the power of unstructured collections of photographs, often sourced from online platforms. It includes not only professional photographs but also images taken by tourists, explorers, research scientists, and others. The merit of this concept lies in its ability to pool together these disorganized images to reconstruct the three-dimensional details of a given scene via Structure from Motion (SfM) [<xref ref-type="bibr" rid="pone.0311038.ref002">2</xref>, <xref ref-type="bibr" rid="pone.0311038.ref004">4</xref>–<xref ref-type="bibr" rid="pone.0311038.ref006">6</xref>]. SfM starts with feature extraction and matching key points across images, followed by geometric verification. It then leverages these key points to estimate geometric relations (camera poses) between images, and applies triangulation to determine the three-dimensional (3D) coordinates of the points. SfM iteratively processes multiple images using the aforementioned steps to build a detailed 3D scene model. The methodology of phototourism has been most well-developed in the context of urban landscapes [<xref ref-type="bibr" rid="pone.0311038.ref003">3</xref>], since the defined edges of buildings and streets provide firm markers with which to match points across images. Three-dimensional reconstructions using ad-hoc photographs are far more difficult in natural contexts because these natural landscapes are highly dynamic and often lack sharp features that easily match across multiple images. Despite the computational challenges involved, the proliferation of cameras coupled with the growing affordability of ecotourism generates a massive influx of nature-based photography that might be harnessed for ecological monitoring [<xref ref-type="bibr" rid="pone.0311038.ref007">7</xref>].</p>
<p>While aerial imagery from remotely piloted aircraft systems (RPAS) is growing rapidly as a tool for environmental monitoring [<xref ref-type="bibr" rid="pone.0311038.ref008">8</xref>–<xref ref-type="bibr" rid="pone.0311038.ref011">11</xref>], there are many scenarios in which aerial imagery is unavailable. For one, an RPAS requires an experienced pilot and suitable conditions, which unavoidably limits the use of such equipment in surveying large areas. Secondly, current conditions are usually being compared against some measure of past conditions, and we cannot rely on RPAS imagery to establish a historical baseline against which more recent changes can be assessed. In these cases, historical photographs may be the only evidence available for past conditions. In fact, historical photos have been critical to our understanding of processes like glacial retreat, even when exact georeferencing of the photographs being compared is not possible [<xref ref-type="bibr" rid="pone.0311038.ref012">12</xref>, <xref ref-type="bibr" rid="pone.0311038.ref013">13</xref>]. Our goal is to extend the utility of photographs for a wider suite of applications, including those in which georeferencing of the images is required for interpretation. We use photographs of Antarctic penguin colonies—appearing as clusters of nesting penguins—to provide information on the abundance of these sentinel species from photographs that are already being collected and thus involve no additional disturbance to the species being monitored. In doing so we also demonstrate a general technique that may be employed for ecological monitoring in contexts where the spatial expanse of a landscape feature is of interest but where regular aerial mapping by RPAS is unavailable.</p>
<sec id="sec002">
<title>2D segmentation</title>
<p>Advances in computer vision have led to the development of sophisticated segmentation techniques [<xref ref-type="bibr" rid="pone.0311038.ref014">14</xref>–<xref ref-type="bibr" rid="pone.0311038.ref018">18</xref>]. These techniques include semantic segmentation, which assigns labels to each pixel based on semantic class [<xref ref-type="bibr" rid="pone.0311038.ref019">19</xref>–<xref ref-type="bibr" rid="pone.0311038.ref022">22</xref>], and instance segmentation, which goes further by grouping pixels into separate object instances [<xref ref-type="bibr" rid="pone.0311038.ref023">23</xref>–<xref ref-type="bibr" rid="pone.0311038.ref025">25</xref>]. Recently, models like detection transformer (DETR) [<xref ref-type="bibr" rid="pone.0311038.ref026">26</xref>] have shown significant progress in 2D segmentation [<xref ref-type="bibr" rid="pone.0311038.ref021">21</xref>, <xref ref-type="bibr" rid="pone.0311038.ref025">25</xref>, <xref ref-type="bibr" rid="pone.0311038.ref027">27</xref>–<xref ref-type="bibr" rid="pone.0311038.ref033">33</xref>], leveraging the Transformer architecture [<xref ref-type="bibr" rid="pone.0311038.ref034">34</xref>] for enhanced performance. In the realm of interactive segmentation [<xref ref-type="bibr" rid="pone.0311038.ref035">35</xref>–<xref ref-type="bibr" rid="pone.0311038.ref040">40</xref>], where user input guides the segmentation process, a variety of innovations have emerged. A notable example is the Segment Anything Model (SAM) [<xref ref-type="bibr" rid="pone.0311038.ref037">37</xref>], which has a prompt-based approach. SAM operates by receiving an input image and a collection of prompts, the latter of which is optional and could be comprised of single points, bounding boxes, textual descriptions, or even entire masks [<xref ref-type="bibr" rid="pone.0311038.ref037">37</xref>]. SAM capitalizes on its object recognition capabilities, developed through rigorous training on the extensive SA-1B dataset with 1 billion masks and 11 million images; this extensive training provides an intricate understanding of object structures and boundaries, allowing SAM to generate a predicted segmentation mask based on minimal prompts. This adeptness allows SAM to segment objects it has never encountered in its training, showcasing its zero-shot learning and ability to generalize beyond its training examples. It supports various forms of user interaction (prompts) like clicks or boxes. Segment-Everything-Everywhere-All-at-Once (SEEM) [<xref ref-type="bibr" rid="pone.0311038.ref041">41</xref>] further expands SAM’s scope by incorporating visual and audio prompts into a joint visual-semantic space, allowing for diverse prompt compositions.</p>
<p>In our endeavor, we have strategically adopted SAM for its ease of use since our goal was to develop a pipeline for georeferencing ground photographs that could be adopted by the ecological community. SAM’s inherent flexibility and user-friendly interface have proven to be particularly well-suited for dealing with unstructured images, a common challenge for phototourism-based projects. The segmentation of the colonies from satellite images is a long-standing challenge; initial efforts required labor-intensive manual annotations [<xref ref-type="bibr" rid="pone.0311038.ref042">42</xref>], and efforts to accelerate the process with convolutional neural networks (CNNs) have been challenged by the limited availability of training data [<xref ref-type="bibr" rid="pone.0311038.ref043">43</xref>]. Le et al. [<xref ref-type="bibr" rid="pone.0311038.ref044">44</xref>] were able to achieve good performance for penguin colony semantic segmentation using a weakly-supervised deep learning framework, but did so by leveraging segmentation annotations in the form of medium-resolution Landsat imagery [<xref ref-type="bibr" rid="pone.0311038.ref042">42</xref>] and commercial satellite imagery from prior years (e.g., from [<xref ref-type="bibr" rid="pone.0311038.ref045">45</xref>]), the latter of which can harness the fact that penguins are highly site faithful and colony shape changes only slowly in time. Here we seek a solution to the segmentation of penguin colonies in ground-based photography, which offers the same challenges faced in interpreting satellite imagery, most notably that the boundary between the colony and the surrounding landscape can be fuzzy. Our use of SAM in the task of penguin colony segmentation is novel, but we anticipate that its ease of use could make it an attractive option for a variety of segmentation tasks in ecological applications, such as environmental monitoring [<xref ref-type="bibr" rid="pone.0311038.ref046">46</xref>] and ecotope segmentation (the classification of habitat types into distinct ecological zones) [<xref ref-type="bibr" rid="pone.0311038.ref047">47</xref>].</p>
</sec>
<sec id="sec003">
<title>Visual localization</title>
<p>In the domain of visual localization (camera pose estimation), state-of-the-art methods usually require the use of local features to represent scenes [<xref ref-type="bibr" rid="pone.0311038.ref048">48</xref>–<xref ref-type="bibr" rid="pone.0311038.ref061">61</xref>]. These methods typically involve creating SfM point clouds where each 3D point is linked with 2D image features from database images. The pose of a query image is estimated by matching its features to the 3D points in the scene model, often employing a random sample consensus (RANSAC) scheme for optimization [<xref ref-type="bibr" rid="pone.0311038.ref062">62</xref>–<xref ref-type="bibr" rid="pone.0311038.ref069">69</xref>]. To enhance scalability and performance, hierarchical localization approaches have been employed, incorporating an initial image retrieval phase [<xref ref-type="bibr" rid="pone.0311038.ref049">49</xref>, <xref ref-type="bibr" rid="pone.0311038.ref059">59</xref>, <xref ref-type="bibr" rid="pone.0311038.ref060">60</xref>, <xref ref-type="bibr" rid="pone.0311038.ref070">70</xref>–<xref ref-type="bibr" rid="pone.0311038.ref072">72</xref>]. This step narrows down the search area for 2D-3D matching, allowing for more focused and efficient processing. While sparse SfM point clouds are common, some methods also explore the use of dense meshes as a scene representation [<xref ref-type="bibr" rid="pone.0311038.ref048">48</xref>, <xref ref-type="bibr" rid="pone.0311038.ref073">73</xref>–<xref ref-type="bibr" rid="pone.0311038.ref075">75</xref>], potentially providing a more detailed view of the environment.</p>
<p>Our work diverges significantly from existing approaches by focusing on localizing 2D ground photographs to a 3D mesh at the scale of satellite images, presenting a challenge far greater than the day-night variations considered challenging in the prior studies. The resolution discrepancy between the mesh and the 2D ground photograph is vast, diminishing the comparability with previous methods. We experimented with local feature matching using SuperGlue [<xref ref-type="bibr" rid="pone.0311038.ref053">53</xref>] and the dense feature matching algorithm GLU-Net [<xref ref-type="bibr" rid="pone.0311038.ref076">76</xref>], but these methods proved to be inadequate due to the exceptionally challenging nature of our problem. Instead, our approach relies on manual alignment for camera pose estimation, navigating through challenges scarcely addressed in conventional visual localization frameworks.</p>
</sec>
</sec>
<sec id="sec004" sec-type="materials|methods">
<title>Materials and methods</title>
<p>In this paper, we present a semi-automated pipeline that leverages a 2-meter digital elevation model (DEM) from the Reference Elevation Model of Antarctica (REMA) [<xref ref-type="bibr" rid="pone.0311038.ref077">77</xref>, <xref ref-type="bibr" rid="pone.0311038.ref078">78</xref>] and medium-resolution (10-meter) satellite imagery (Sentinel Hub services, Sentinel-2 L2A) [<xref ref-type="bibr" rid="pone.0311038.ref079">79</xref>] to align and georegister ground photographs. Ground photographs were collected from our collection of photographs taken in the field as well as photographs that were posted online. To find photographs available online, we used an online image search engine (Google Image) and downloaded photographs that we could confirm based on personal experience were taken at the target location. Importantly, we did not require that the photograph contain geographic metadata as to the location where the photo was taken. In our experience (see, for example, [<xref ref-type="bibr" rid="pone.0311038.ref007">7</xref>]), geographic metadata are often extracted from photographs posted online even when the camera is capable of recording location and geographic data retained is often inaccurate in the Antarctic. Moreover, as our goal was to develop a pipeline that could work equally well for historic imagery, we did not want to rely only on photographs for which location data were available. Photographs used in this study were collected on several expeditions permitted by the US National Science Foundation under the Antarctic Conservation Act (Permit ACA 2005-005, 2009-015, 2014-0001, 2019-001). All research was conducted with approval from Stony Brook University’s Institutional Animal Care and Use Committee (237420). Links to all data sources including licenses for internet photos are available in <xref ref-type="supplementary-material" rid="pone.0311038.s001">S1 Appendix</xref>.</p>
<p>Our goal is to develop a method that detects and segments the penguin colony in each high-resolution ground photograph and georegisters it to a textured 3D mesh derived from the DEM and satellite imagery, as depicted in <xref ref-type="fig" rid="pone.0311038.g001">Fig 1</xref>. Initially, human operators provide minimal input through a few key annotations to guide SAM [<xref ref-type="bibr" rid="pone.0311038.ref037">37</xref>], which then proceeds to identify and segment penguin colonies in ground photographs. This minimal intervention significantly enhances processing speed and ensures accuracy that is comparable to manual human annotations. Following this, the pipeline autonomously generates a textured 3D mesh by overlaying the satellite image on the DEM. Human experts align the rendering of the 3D model with the ground photograph to obtain the camera pose. Finally, our automated process registers the segmented penguin area to the 3D mesh, offering a highly detailed view of the colony’s location and an estimate of its area.</p>
<fig id="pone.0311038.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Overview of penguin colony registration on Devil Island, Antarctica.</title>
<p>First (panel a), we segment the penguin colony area in the ground photograph. The green dots represent prompts provided by a human annotator and the red polygons represent the segmentation results of the Segment Anything Model (SAM) [<xref ref-type="bibr" rid="pone.0311038.ref037">37</xref>]. Next (panel b), we estimate the ground photo’s camera pose by matching it with a rendered image from the colorized 3D mesh derived from the digital elevation map (DEM) and satellite imagery from Sentinel Hub [<xref ref-type="bibr" rid="pone.0311038.ref079">79</xref>]. Finally (panel c), we register the penguin colony to the 3D mesh and visualize it from an aerial view.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.g001" xlink:type="simple"/>
</fig>
<sec id="sec005">
<title>Semi-automated georegistration</title>
<p>Our proposed semi-automated pipeline for accurate ground photograph alignment and georegistration encompasses the following steps.</p>
<list list-type="simple">
<list-item>
<p><bold>Step 1: Segmentation of the penguin colony.</bold> We use SAM with the human annotator providing prompts in the form of positive pixels (colony) and negative pixels (non-colony). These annotations harness the potential of prompt engineering for the segmentation task [<xref ref-type="bibr" rid="pone.0311038.ref080">80</xref>], enabling precise delineation of the penguin colony in the ground photograph. The entire process of segmentation for a single image, including the creation of 10-to-15-pixel prompts, is accomplished in approximately 5 to 10 seconds. This showcases the efficiency of SAM in handling this task, particularly given that manual segmentation requires considerably more time (at least 1–2 minutes and potentially much longer) given the intricate and highly crenulated structure of a penguin colony.</p>
</list-item>
<list-item>
<p><bold>Step 2: Colored 3D mesh generation.</bold> Integrating the texture from a 10-meter satellite image with a 2-meter DEM, which can be perceived as a depth map, we generate an RGB-Depth image. This essentially transforms the elevation data and satellite imagery into a colorized point cloud. We then linked adjacent pixels based on their depth values to construct a colored 3D triangle mesh using Trimesh [<xref ref-type="bibr" rid="pone.0311038.ref081">81</xref>], which is used in later steps to render images from different camera poses.</p>
</list-item>
<list-item>
<p><bold>Step 3: Camera pose estimation for ground photograph.</bold> In order to determine the camera pose for a high-resolution ground photograph, we use a manual annotation process with the aid of Meshlab software [<xref ref-type="bibr" rid="pone.0311038.ref082">82</xref>], an open-source tool for processing and editing 3D triangular meshes. We begin by importing both the 3D mesh and the high-resolution ground photograph into Meshlab, which then renders a 2D image based on the 3D mesh. By carefully examining the differences between this rendered image and the original ground photograph, human annotators continuously adjust the camera pose of the 3D mesh until the two images roughly align.</p>
</list-item>
<list-item>
<p><bold>Step 4 (Optional): Camera pose refinement using feature matching.</bold> Similar to the manual annotation process in the second step, we use the feature matching algorithm GLU-Net [<xref ref-type="bibr" rid="pone.0311038.ref076">76</xref>] to estimate pixel-wise correspondences between the rendered 2D image and the ground photograph. Using the rendered depth map alongside the pixel correspondences in the rendered 2D image, we derive corresponding points in the 3D space. This forms a set of 2D-3D correspondences between the 3D mesh and the ground photograph. Then, we solve the Perspective-n-Point (PnP) problem [<xref ref-type="bibr" rid="pone.0311038.ref067">67</xref>] using the Levenberg-Marquardt optimization method [<xref ref-type="bibr" rid="pone.0311038.ref083">83</xref>, <xref ref-type="bibr" rid="pone.0311038.ref084">84</xref>] to obtain a more precise camera pose. This algorithm determines the camera pose by minimizing the re-projection error between the observed 2D points in the image and the projected 3D points using a non-linear least squares method.</p>
</list-item>
<list-item>
<p><bold>Step 5: Registration of the penguin colony to the 3D model.</bold> Based on the estimated camera pose of the ground photograph, we register the segmented area of the penguin colony to the 3D mesh. Specifically, using the camera pose, we project the segmented area into the view of the medium-resolution satellite image, effectively giving us a 3D reconstruction of the penguin colony area. It is important to note that the projected penguin colony area still maintains its high-resolution shape, as shown in <xref ref-type="fig" rid="pone.0311038.g001">Fig 1</xref>.</p>
</list-item>
</list>
</sec>
<sec id="sec006">
<title>Experimental evaluation</title>
<p>We demonstrated our pipeline using data at two penguin colonies on the Antarctic Peninsula—Devil Island, which contains an Adélie penguin (<italic>Pygoscelis adeliae</italic>) colony, and Brown Bluff, which contains a mixed Adélie and gentoo penguin (<italic>P. papua</italic>) colony. We georegistered eight ground-level photographs from Devil Island and nine ground-level photographs from Brown Bluff (details in <xref ref-type="table" rid="pone.0311038.t001">Table 1</xref>). The dates on which these photos were taken were not available.</p>
<table-wrap id="pone.0311038.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.t001</object-id>
<label>Table 1</label>
<caption>
<title>Photograph sources for Devil Island and Brown Bluff Antarctic penguin colonies.</title>
<p>This table enumerates the selected photographs from an initial pool of over 70 images, filtered based on criteria detailed in the discussion of ‘the appropriateness of ground photos’ (see <xref ref-type="sec" rid="sec007">Results and discussion</xref> section).</p>
</caption>
<alternatives>
<graphic id="pone.0311038.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.t001" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Colonies</th>
<th align="left">Sources</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Devil Island</td>
<td align="left">Our team, and Dreamstime (<ext-link ext-link-type="uri" xlink:href="https://www.dreamstime.com" xlink:type="simple">www.dreamstime.com</ext-link>)</td>
</tr>
<tr>
<td align="left">Brown Bluff</td>
<td align="left">Our team, Flickr users Outward_bound and Delphinidaesy, Alek Komarnitsky (<ext-link ext-link-type="uri" xlink:href="https://www.komar.org" xlink:type="simple">www.komar.org</ext-link>), and Antarctic Treaty Secretariat (<ext-link ext-link-type="uri" xlink:href="https://www.ats.aq" xlink:type="simple">www.ats.aq</ext-link>)</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>For evaluating our penguin colony segmentation results, we employed the following metrics: mean intersection-over-union (mean IoU), pixel accuracy, perimeter-area ratio, and area error. Mean IoU, a common metric for segmentation tasks, is calculated as:
<disp-formula id="pone.0311038.e001"><alternatives><graphic id="pone.0311038.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0311038.e001" xlink:type="simple"/><mml:math display="block" id="M1"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mtext>mean</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>IoU</mml:mtext> <mml:mo>=</mml:mo> <mml:mfrac><mml:mrow><mml:mtext>True</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Positives</mml:mtext></mml:mrow> <mml:mrow><mml:mtext>False</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Negatives</mml:mtext> <mml:mo>+</mml:mo> <mml:mtext>True</mml:mtext> <mml:mspace width="4pt"/><mml:mo>&amp;</mml:mo><mml:mspace width="4pt"/><mml:mtext>False</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Positives</mml:mtext></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(1)</label></disp-formula></p>
<p>This metric specifically measures the overlap between our predicted segmentation (colony or non-colony) and the ground truth.</p>
<p>Pixel accuracy is a simpler and more intuitive metric defined as the ratio of correctly predicted pixels to the total number of pixels:
<disp-formula id="pone.0311038.e002"><alternatives><graphic id="pone.0311038.e002g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0311038.e002" xlink:type="simple"/><mml:math display="block" id="M2"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mtext>Pixel</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Accuracy</mml:mtext> <mml:mo>=</mml:mo> <mml:mfrac><mml:mrow><mml:mtext>True</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Positives</mml:mtext> <mml:mo>+</mml:mo> <mml:mtext>True</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Negatives</mml:mtext></mml:mrow> <mml:mrow><mml:mtext>Total</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Number</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>of</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Pixels</mml:mtext></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(2)</label></disp-formula></p>
<p>Perimeter-area ratio (PAR)—a region’s perimeter divided by its area—is a simple shape complexity metric, often used in studying landscapes and wilderness areas [<xref ref-type="bibr" rid="pone.0311038.ref085">85</xref>]. Here, we use PAR to estimate the level of shape complexity captured by our colony registration procedure, as colonies with excessive perimeter extents can imply a greater risk of predation to nesting penguins [<xref ref-type="bibr" rid="pone.0311038.ref086">86</xref>]. For a shape with multiple components, we calculate PAR as the total perimeter divided by the total area. Note that for a shape with holes (i.e. areas within a colony that do contain nesting penguins), we take the perimeter to be the combined perimeters of the boundary and holes.</p>
<p>Area prediction error is a measure comparing the predicted area (in this case, the penguin colony) to its actual area, expressed as the ratio of the absolute error in the predicted area to the actual area. Formally, it is expressed as:
<disp-formula id="pone.0311038.e003"><alternatives><graphic id="pone.0311038.e003g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0311038.e003" xlink:type="simple"/><mml:math display="block" id="M3"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mtext>Area</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Error</mml:mtext> <mml:mo>=</mml:mo> <mml:mfrac><mml:mrow><mml:mo>|</mml:mo> <mml:mtext>Predicted</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Area</mml:mtext> <mml:mo>-</mml:mo> <mml:mtext>Actual</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Area</mml:mtext> <mml:mo>|</mml:mo></mml:mrow> <mml:mrow><mml:mtext>Actual</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Area</mml:mtext></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(3)</label></disp-formula></p>
<p>This metric is vital in our application because the area of these segmented penguin colonies is directly related to the number of penguins estimated to be breeding within each colony [<xref ref-type="bibr" rid="pone.0311038.ref087">87</xref>], but may be valuable for a range of ecological applications (e.g., patch area for vegetation monitoring, herd area in a study of grazers, pond area in hydrology, etc.).</p>
</sec>
</sec>
<sec id="sec007" sec-type="conclusions">
<title>Results and discussion</title>
<p>Our method, illustrated schematically in <xref ref-type="fig" rid="pone.0311038.g002">Fig 2</xref>, successfully segments and georegisters penguin colonies in complex environments, solving the challenge of the heterogeneous nature of assembling preexisting photos and the highly dynamic surface dominated by shifting snow (Figs <xref ref-type="fig" rid="pone.0311038.g003">3</xref> and <xref ref-type="fig" rid="pone.0311038.g004">4</xref>, Tables <xref ref-type="table" rid="pone.0311038.t002">2</xref> and <xref ref-type="table" rid="pone.0311038.t003">3</xref>).</p>
<fig id="pone.0311038.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Diagram with results of each step.</title>
<p>We show the sequential outputs for our pipeline: penguin colony segmentation (panels a, d), camera pose estimation for ground photographs (panels b, e), georegistrations via projection (panels c, f), and the final combined georegistration result (panel g).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.g002" xlink:type="simple"/>
</fig>
<fig id="pone.0311038.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Segmentation and registration.</title>
<p>Visualization of segmentation (a-c) and registration (d-f) of penguin colonies at Devil Island and Brown Bluff in Antarctica.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.g003" xlink:type="simple"/>
</fig>
<fig id="pone.0311038.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Segmentation and registration at Devil Island.</title>
<p>Additional visualization of segmentation (a-c) and registration (d-f) of penguin colonies at Devil Island, Antarctica.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.g004" xlink:type="simple"/>
</fig>
<table-wrap id="pone.0311038.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.t002</object-id>
<label>Table 2</label>
<caption>
<title>Segmentation evaluation.</title>
<p>Evaluation of the the Segment Anything Model (SAM) for penguin colony segmentation using mean intersection over union (mIoU), difference in perimeter to area ratio (PAR), area error, and accuracy (i.e. panels a-c in Figs <xref ref-type="fig" rid="pone.0311038.g003">3</xref> and <xref ref-type="fig" rid="pone.0311038.g004">4</xref> vs. ground truth). 95% confidence intervals are shown. An up (down) arrow indicates a measure where a larger (smaller) number is preferred.</p>
</caption>
<alternatives>
<graphic id="pone.0311038.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.t002" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Colonies</th>
<th align="center">mIoU (%) ↑</th>
<th align="center">PAR Difference ↓</th>
<th align="center">Area Error (%) ↓</th>
<th align="center">Accuracy (%) ↑</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Devil Island</td>
<td align="center">76.8 ±0.4</td>
<td align="center">0.004 ±0.001</td>
<td align="center">7.8 ±0.6</td>
<td align="center">98.2 ±0.1</td>
</tr>
<tr>
<td align="left">Brown Bluff</td>
<td align="center">76.1 ±0.8</td>
<td align="center">0.012 ±0.001</td>
<td align="center">12.6 ±0.7</td>
<td align="center">97.1 ±0.1</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<table-wrap id="pone.0311038.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.t003</object-id>
<label>Table 3</label>
<caption>
<title>Model evaluation.</title>
<p>Evaluation of final predicted penguin colony areas at Devil Island using mean intersection over union (mIoU), difference in perimeter to area ratio (PAR), area error, and accuracy (i.e. <xref ref-type="fig" rid="pone.0311038.g005">Fig 5</xref> vs. ground truth). 95% confidence intervals are shown. We also show the evaluation of a fully manual approach. An up (down) arrow indicates a measure where a larger (smaller) number is preferred.</p>
</caption>
<alternatives>
<graphic id="pone.0311038.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.t003" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="center">mIoU (%) ↑</th>
<th align="center">PAR Difference ↓</th>
<th align="center">Area Error (%) ↓</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Ours</td>
<td align="center">45.3 ±0.1</td>
<td align="center">0.017 ±0.001</td>
<td align="center"><bold>20.4 ±0.3</bold></td>
</tr>
<tr>
<td align="left">Manual Method</td>
<td align="center"><bold>45.6</bold></td>
<td align="center"><bold>0.015</bold></td>
<td align="center">20.5</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>Inside our pipeline, SAM does an excellent job tracing the irregular contours of the colony (<xref ref-type="table" rid="pone.0311038.t002">Table 2</xref>, Figs <xref ref-type="fig" rid="pone.0311038.g003">3</xref> and <xref ref-type="fig" rid="pone.0311038.g004">4</xref>), and it can represent the detailed and high-resolution structures of the penguin nesting area. Notably, when compared with the ground truth segmentation, our method achieves a mean IoU of over 70%, an area error of approximately 7–12%, and performs well in terms of the perimeter-area ratio difference and accuracy for both the Devil Island and Brown Bluff colonies.</p>
<p>In <xref ref-type="table" rid="pone.0311038.t003">Table 3</xref> and <xref ref-type="fig" rid="pone.0311038.g005">Fig 5</xref>, we show the final georegistration results, including a composite of the segmented areas of penguin colonies from an aerial view (<xref ref-type="fig" rid="pone.0311038.g005">Fig 5</xref>). The availability of high-resolution satellite image annotations for Devil Island provide the opportunity to directly compare the georegistered composite to high-resolution satellite imagery (<xref ref-type="table" rid="pone.0311038.t003">Table 3</xref>). Compared with a fully manual approach, we show good mean IoU and even better area error. Although the accuracy of the composite colony area leaves room for improvement, in this particular application where inter-annual variability in abundance is substantial and greater than 20%, estimates of area with this level of precision can be highly informative when modelling population change through time (see Fig 3d in [<xref ref-type="bibr" rid="pone.0311038.ref088">88</xref>]). The precision is limited by the challenges of projecting ground photographs to an aerial view using a DEM, particularly because the 2-meter resolution of the DEM available is at least 10 times coarser in resolution than the photographs (typically 4K) taken by tourists. In other words, there may be over 100 pixels in the photograph that get mapped to a single pixel in the DEM. Despite these challenges, our overall results illustrate the effectiveness of the method even under challenging environmental conditions (<xref ref-type="fig" rid="pone.0311038.g005">Fig 5</xref>).</p>
<fig id="pone.0311038.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Final composite.</title>
<p>The final composite penguin colony areas at Devil Island (a) and Brown Bluff (b) in Antarctica from an aerial view.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.g005" xlink:type="simple"/>
</fig>
<p>In Tables <xref ref-type="table" rid="pone.0311038.t002">2</xref> and <xref ref-type="table" rid="pone.0311038.t003">3</xref>, we also present 95% confidence intervals for all metrics, calculated by repeatedly running our method 30 times. Our method yields only small variance across different experimental runs. In <xref ref-type="table" rid="pone.0311038.t004">Table 4</xref>, we perform a sensitivity analysis on the Devil Island dataset to determine the optimal number of pixel prompts for an image. Our evaluation shows that using only 3 pixel prompts is inadequate. In contrast, using 9-to-15-pixel prompts yields comparable results, indicating a plateau in performance. This confirms that our approach is robust with a reasonably small number of pixel prompts. In practice, we use 10–15 pixel prompts per image.</p>
<table-wrap id="pone.0311038.t004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.t004</object-id>
<label>Table 4</label>
<caption>
<title>Sensitivity analysis.</title>
<p>We use the Devil Island dataset to conduct a sensitivity analysis for the number of pixel prompts needed using mean intersection over union (mIoU), difference in perimeter to area ratio (PAR), and area error. An up (down) arrow indicates a measure where a larger (smaller) number is preferred.</p>
</caption>
<alternatives>
<graphic id="pone.0311038.t004g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.t004" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Number of Pixel Prompts</th>
<th align="center">mIoU (%) ↑</th>
<th align="center">PAR Difference ↓</th>
<th align="center">Area Error (%) ↓</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">3</td>
<td align="char" char=".">37.0</td>
<td align="char" char=".">0.044</td>
<td align="char" char=".">36.1</td>
</tr>
<tr>
<td align="left">9</td>
<td align="char" char="."><bold>46.2</bold></td>
<td align="char" char=".">0.022</td>
<td align="char" char="."><bold>19.1</bold></td>
</tr>
<tr>
<td align="left">12</td>
<td align="char" char=".">45.6</td>
<td align="char" char=".">0.023</td>
<td align="char" char=".">20.4</td>
</tr>
<tr>
<td align="left">15</td>
<td align="char" char=".">45.3</td>
<td align="char" char=".">0.017</td>
<td align="char" char=".">20.4</td>
</tr>
<tr>
<td align="left">Manual Method</td>
<td align="char" char=".">45.6</td>
<td align="char" char="."><bold>0.015</bold></td>
<td align="char" char=".">20.5</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>Citizen science is a growing area of interest for ecologists looking to study large or remote areas, and photographs have been harnessed in a large number of these citizen scientist applications [<xref ref-type="bibr" rid="pone.0311038.ref089">89</xref>]. However, the vast majority of these photograph-based projects have actively solicited photographs from tourists or have set up dedicated portals for image submission. The alternative approach, to gather images placed online for other purposes, is less common. Some examples of this ‘passive’ approach to citizen science include studies of whale sharks (<italic>Rhincodon typus</italic>) [<xref ref-type="bibr" rid="pone.0311038.ref090">90</xref>, <xref ref-type="bibr" rid="pone.0311038.ref091">91</xref>] and Weddell seals (<italic>Leptonychotes weddellii</italic>) [<xref ref-type="bibr" rid="pone.0311038.ref007">7</xref>], two species that can be individually identified in photographs by their spotted coloration. Though most cameras now capture geographic metadata, our experience has been that such data are typically unavailable by the time an image is posted online. Here we present an alternative approach for geolocating photographs sourced from the internet that does not require the camera to record its location. This method greatly expands the possible applications of passively sourced photographs for monitoring environmental conditions or, as we have demonstrated in our application, populations of wildlife. Antarctica is difficult to survey because of its remoteness, so harnessing tourists’ photos of penguin colonies can appreciably add to the robustness of datasets of population size, colony shape, and phenology.</p>
<p>We found GLU-Net [<xref ref-type="bibr" rid="pone.0311038.ref076">76</xref>] was capable of successfully feature matching in the pose refinement process (step 4 in method section; <xref ref-type="fig" rid="pone.0311038.g006">Fig 6</xref>) whereas the correspondences across images were found to be too sparse for SuperGlue [<xref ref-type="bibr" rid="pone.0311038.ref053">53</xref>] and this led to unsuccessful pose refinement (<xref ref-type="fig" rid="pone.0311038.g006">Fig 6</xref>). While pose refinement offers improved results in some cases, the relatively coarse resolution of the satellite imagery we were using limited its benefit for our application. Consequently, the segmentation results used for computing our metrics omit the pose refinement step. Though we anticipate that future developments in the area of feature matching may help mitigate this issue, the use of the highest resolution satellite imagery for a given location is likely to provide the best opportunities for feature matching.</p>
<fig id="pone.0311038.g006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.g006</object-id>
<label>Fig 6</label>
<caption>
<title>Feature matching.</title>
<p>Comparative visualization of feature matching: (a) Dense pixel-wise correspondences between the rendered and ground photographs using GLU-Net [<xref ref-type="bibr" rid="pone.0311038.ref076">76</xref>], indicating successful matching; (b) Sparse and incorrect pixel-wise correspondences using SuperGlue [<xref ref-type="bibr" rid="pone.0311038.ref053">53</xref>], reflecting poor matching performance in the challenging scenario.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.g006" xlink:type="simple"/>
</fig>
<p>When considering the appropriateness of ground photographs for alignment with 3D mesh, it is essential to prioritize those captured from a relatively distant viewpoint, as shown in the bottom row of <xref ref-type="fig" rid="pone.0311038.g007">Fig 7</xref>. Images that provide sufficient context for georegistration offer clear and easily recognizable features that can be used for alignment. In contrast, close-up images or images that do not provide any sense of the larger landscape do not provide enough context for the alignment procedure that we have developed and tested. The use of telephoto lenses, while impacting the determination of the camera’s location due to their parallel projection characteristics, should not be overly concerning. This is because the primary limitations in the accuracy of our method currently stem from the resolution constraints of available satellite imagery and DEM. Though our primary goal was to develop the tools needed to georeference ‘found’ images, there are contexts in which photographs might be explicitly solicited for a scientific purpose. In particular, photography provides a straightforward way for travelers to remote regions to get involved as ‘citizen scientists’ and in that light, <xref ref-type="fig" rid="pone.0311038.g007">Fig 7</xref> provides some guidance for photographers.</p>
<fig id="pone.0311038.g007" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0311038.g007</object-id>
<label>Fig 7</label>
<caption>
<title>A visual guide for selecting appropriate ground photographs.</title>
<p>Photos by Heather Lynch / Creative Commons CC-BY, Liam Quinn / Creative Commons CC-BY-SA, and Flickr user Outward_bound / Creative Commons CC-BY-NC-ND.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.g007" xlink:type="simple"/>
</fig>
<p>For 2D to 3D colony registration, working within entirely natural environments presents distinct challenges. One predominant issue is the lack of stable landmarks like buildings which, with their well-defined shapes, straight edges, and 90-degree angles, provide clear reference points that facilitate the alignment process [<xref ref-type="bibr" rid="pone.0311038.ref092">92</xref>]. Moreover, there exists an abundance of training data specifically designed to identify such man-made structures, making them even more advantageous for registration tasks [<xref ref-type="bibr" rid="pone.0311038.ref093">93</xref>–<xref ref-type="bibr" rid="pone.0311038.ref095">95</xref>]. In contrast, natural environments lack these distinct, consistent features, complicating the alignment process. Furthermore, changing snow conditions can introduce additional complexities; as snow accumulates, melts, or shifts, the physical terrain and its visual representation can change substantially. Though not all applications will be as heavily impacted by snow accumulation, more dynamic landscapes are unavoidably challenging and represent an area for continued technical development.</p>
<p>Our general schema for using georeferencing ground photos for ecological monitoring is not specific to penguins. In fact, this technique could be used anytime there is a feature of interest on the landscape that can be segmented and where the landscape contains enough topography for a digital elevation model to be useful for alignment. Though its utility in any specific application would need to be rigorously tested, potential applications include the tracking of marsh grasses through time [<xref ref-type="bibr" rid="pone.0311038.ref096">96</xref>], flowering phenology [<xref ref-type="bibr" rid="pone.0311038.ref097">97</xref>], and the mapping of vernal pools [<xref ref-type="bibr" rid="pone.0311038.ref098">98</xref>]. Though it was not the focus of our study, one natural application for this technique would be in the study of glacial retreat, since glaciers are a natural focus for ground photography and changes in their size and shape are of interest for studying the impacts of climate change. Though 3D data are now commonly available to researchers through techniques such as lidar and photogrammetry, our approach offers an alternative that can incorporate older images and those taken without special equipment or a specific monitoring aim in mind. It proves particularly valuable in scenarios where manual data annotation might otherwise be required, providing a more intuitive solution through the use of colored mesh rendering.</p>
<p>One limitation of our method is the dependency on a DEM to generate images that can be used to align with ground photographs. Obtaining high-precision DEMs, especially those finer than 2-meter resolution, can be particularly challenging. Such granular DEMs are essential for accurate alignment, yet they are not always readily available or accessible for every location of interest. Another limitation of our approach is the requirement of manual alignment, which can introduce errors. It is worth noting that while some landscapes are inherently more straightforward to align, thereby reducing the propensity for alignment errors, the complexity of the landscape remains a significant factor in alignment quality. Drawing upon literature in computational anatomy [<xref ref-type="bibr" rid="pone.0311038.ref099">99</xref>, <xref ref-type="bibr" rid="pone.0311038.ref100">100</xref>], certain geometric primitives, including spheres, cylinders, and rectangular prisms, are more readily identifiable by the human eye, facilitating easier registration and matching. Artificial structures or prominent landmarks, like architectural features in satellite images, can act as useful reference points during the alignment process. However, manual interventions from human operators not only introduce potential inaccuracies but also result in increased time and cost implications.</p>
<p>While we explored state-of-the-art deep learning and feature matching algorithms for camera pose estimation, such as SuperGlue [<xref ref-type="bibr" rid="pone.0311038.ref053">53</xref>] and GLU-Net [<xref ref-type="bibr" rid="pone.0311038.ref076">76</xref>], these methods demonstrated sub-optimal performance in identifying correspondences between images. The difference between high-resolution ground photographs and medium-resolution images rendered from 3D mesh is substantial, posing significant challenges even for human experts. Future advancements, such as feature enhancement techniques, may help address these challenges. Additionally, incorporating machine learning models to predict and adapt to dynamic changes in colony boundaries could complement feature-matching processes, potentially improving georegistration accuracy over time.</p>
</sec>
<sec id="sec008" sec-type="conclusions">
<title>Conclusion</title>
<p>Though satellites and uncrewed aerial vehicles are now routinely used for tracking changes on the landscape through time, there are many applications in which neither type of data are readily available. The proliferation of cameras in mobile phones now greatly expands the volume of data potentially available for long-term environmental monitoring. Thus, creative approaches for georeferencing these photos are essential to fully harness their value. Our proposed pipeline combines state-of-the-art segmentation tools with an alignment technique that does not require a priori information on the position of the camera, and paves the way for expanded use of crowd-sourced or historical photography.</p>
</sec>
<sec id="sec009" sec-type="supplementary-material">
<title>Supporting information</title>
<supplementary-material id="pone.0311038.s001" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pone.0311038.s001" xlink:type="simple">
<label>S1 Appendix</label>
<caption>
<title>Links to all data sources are available in S1 Appendix.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0311038.ref001">
<label>1</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Agarwal</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Furukawa</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Snavely</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Simon</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Curless</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Seitz</surname> <given-names>SM</given-names></name>, <etal>et al</etal>. <article-title>Building Rome in a day</article-title>. <source>Commun ACM</source>. <year>2011</year>;<volume>54</volume>(<issue>10</issue>):<fpage>105</fpage>–<lpage>112</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/2001269.2001293" xlink:type="simple">10.1145/2001269.2001293</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref002">
<label>2</label>
<mixed-citation publication-type="other" xlink:type="simple">Snavely N, Seitz SM, Szeliski R. Photo Tourism: Exploring Photo Collections in 3D. In: ACM SIGGRAPH 2006 Papers. SIGGRAPH’06. New York, NY, USA: Association for Computing Machinery; 2006. p. 835–846.</mixed-citation>
</ref>
<ref id="pone.0311038.ref003">
<label>3</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Snavely</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Garg</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Seitz</surname> <given-names>SM</given-names></name>, <name name-style="western"><surname>Szeliski</surname> <given-names>R</given-names></name>. <article-title>Finding Paths through the World’s Photos</article-title>. <source>ACM Trans Graph</source>. <year>2008</year>;<volume>27</volume>(<issue>3</issue>):<fpage>1</fpage>–<lpage>11</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/1360612.1360614" xlink:type="simple">10.1145/1360612.1360614</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref004">
<label>4</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Pollefeys</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Nistér</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Frahm</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Akbarzadeh</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Mordohai</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Clipp</surname> <given-names>B</given-names></name>, <etal>et al</etal>. <article-title>Detailed real-time urban 3D reconstruction from video</article-title>. <source>International Journal of Computer Vision</source>. <year>2008</year>;<volume>78</volume>:<fpage>143</fpage>–<lpage>167</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s11263-007-0086-4" xlink:type="simple">10.1007/s11263-007-0086-4</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref005">
<label>5</label>
<mixed-citation publication-type="other" xlink:type="simple">Schaffalitzky F, Zisserman A. Multi-View Matching for Unordered Image Sets, or “How do I organize my holiday snaps?”. In: Proceedings of the 7th European Conference on Computer Vision-Part I. ECCV’02. Berlin, Heidelberg: Springer-Verlag; 2002. p. 414–431.</mixed-citation>
</ref>
<ref id="pone.0311038.ref006">
<label>6</label>
<mixed-citation publication-type="other" xlink:type="simple">Schönberger JL, Frahm JM. Structure-from-Motion Revisited. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR); 2016. p. 4104–4113.</mixed-citation>
</ref>
<ref id="pone.0311038.ref007">
<label>7</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Borowicz</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Lynch</surname> <given-names>HJ</given-names></name>, <name name-style="western"><surname>Estro</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Foley</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Gonçalves</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Herman</surname> <given-names>KB</given-names></name>, <etal>et al</etal>. <article-title>Social Sensors for Wildlife: Ecological Opportunities in the Era of Camera Ubiquity</article-title>. <source>Frontiers in Marine Science</source>. <year>2021</year>;<volume>8</volume>:<fpage>645288</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmars.2021.645288" xlink:type="simple">10.3389/fmars.2021.645288</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref008">
<label>8</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Klosterman</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Melaas</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>JA</given-names></name>, <name name-style="western"><surname>Martinez</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Frederick</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>O’Keefe</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Fine-scale perspectives on landscape phenology from unmanned aerial vehicle (UAV) photography</article-title>. <source>Agricultural and Forest Meteorology</source>. <year>2018</year>;<volume>248</volume>:<fpage>397</fpage>–<lpage>407</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.agrformet.2017.10.015" xlink:type="simple">10.1016/j.agrformet.2017.10.015</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref009">
<label>9</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Manfreda</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>McCabe</surname> <given-names>MF</given-names></name>, <name name-style="western"><surname>Miller</surname> <given-names>PE</given-names></name>, <name name-style="western"><surname>Lucas</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Pajuelo Madrigal</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Mallinis</surname> <given-names>G</given-names></name>, <etal>et al</etal>. <article-title>On the use of unmanned aerial systems for environmental monitoring</article-title>. <source>Remote Sensing</source>. <year>2018</year>;<volume>10</volume>(<issue>4</issue>):<fpage>641</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/rs10040641" xlink:type="simple">10.3390/rs10040641</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref010">
<label>10</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Pfeifer</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Barbosa</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Mustafa</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Peter</surname> <given-names>HU</given-names></name>, <name name-style="western"><surname>Rümmler</surname> <given-names>MC</given-names></name>, <name name-style="western"><surname>Brenning</surname> <given-names>A</given-names></name>. <article-title>Using fixed-wing UAV for detecting and mapping the distribution and abundance of penguins on the South Shetlands Islands, Antarctica</article-title>. <source>Drones</source>. <year>2019</year>;<volume>3</volume>(<issue>2</issue>):<fpage>39</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/drones3020039" xlink:type="simple">10.3390/drones3020039</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref011">
<label>11</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zmarz</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Rodzewicz</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Dąbski</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Karsznia</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Korczak-Abshire</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Chwedorzewska</surname> <given-names>KJ</given-names></name>. <article-title>Application of UAV BVLOS remote sensing data for multi-faceted analysis of Antarctic ecosystem</article-title>. <source>Remote Sensing of Environment</source>. <year>2018</year>;<volume>217</volume>:<fpage>375</fpage>–<lpage>388</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.rse.2018.08.031" xlink:type="simple">10.1016/j.rse.2018.08.031</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref012">
<label>12</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Kamp</surname> <given-names>U</given-names></name>, <name name-style="western"><surname>McManigal</surname> <given-names>KG</given-names></name>, <name name-style="western"><surname>Dashtseren</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Walther</surname> <given-names>M</given-names></name>. <article-title>Documenting glacial changes between 1910, 1970, 1992 and 2010 in the Turgen Mountains, Mongolian Altai, using repeat photographs, topographic maps, and satellite imagery</article-title>. <source>The Geographical Journal</source>. <year>2013</year>;<volume>179</volume>(<issue>3</issue>):<fpage>248</fpage>–<lpage>263</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/j.1475-4959.2012.00486.x" xlink:type="simple">10.1111/j.1475-4959.2012.00486.x</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref013">
<label>13</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Kavan</surname> <given-names>J</given-names></name>. <article-title>Early twentieth century evolution of Ferdinand Glacier, Svalbard, based on historic photographs and structure-from-motion technique</article-title>. <source>Geografiska Annaler: Series A, Physical Geography</source>. <year>2020</year>;<volume>102</volume>(<issue>1</issue>):<fpage>57</fpage>–<lpage>67</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/04353676.2020.1715124" xlink:type="simple">10.1080/04353676.2020.1715124</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref014">
<label>14</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Felzenszwalb</surname> <given-names>PF</given-names></name>, <name name-style="western"><surname>Girshick</surname> <given-names>RB</given-names></name>, <name name-style="western"><surname>McAllester</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Ramanan</surname> <given-names>D</given-names></name>. <article-title>Object detection with discriminatively trained part-based models</article-title>. <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>. <year>2009</year>;<volume>32</volume>(<issue>9</issue>):<fpage>1627</fpage>–<lpage>1645</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/TPAMI.2009.167" xlink:type="simple">10.1109/TPAMI.2009.167</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref015">
<label>15</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Fu</surname> <given-names>KS</given-names></name>, <name name-style="western"><surname>Mui</surname> <given-names>J</given-names></name>. <article-title>A survey on image segmentation</article-title>. <source>Pattern Recognition</source>. <year>1981</year>;<volume>13</volume>(<issue>1</issue>):<fpage>3</fpage>–<lpage>16</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/0031-3203(72)90015-5" xlink:type="simple">10.1016/0031-3203(72)90015-5</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref016">
<label>16</label>
<mixed-citation publication-type="other" xlink:type="simple">Kirillov A, He K, Girshick R, Rother C, Dollár P. Panoptic segmentation. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2019. p. 9404–9413.</mixed-citation>
</ref>
<ref id="pone.0311038.ref017">
<label>17</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Minaee</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Boykov</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Porikli</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Plaza</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Kehtarnavaz</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Terzopoulos</surname> <given-names>D</given-names></name>. <article-title>Image segmentation using deep learning: A survey</article-title>. <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>. <year>2021</year>;<volume>44</volume>(<issue>7</issue>):<fpage>3523</fpage>–<lpage>3542</lpage>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref018">
<label>18</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zou</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Chen</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Shi</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Guo</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Ye</surname> <given-names>J</given-names></name>. <article-title>Object Detection in 20 Years: A Survey</article-title>. <source>Proceedings of the IEEE</source>. <year>2023</year>;<volume>111</volume>(<issue>3</issue>):<fpage>257</fpage>–<lpage>276</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/JPROC.2023.3238524" xlink:type="simple">10.1109/JPROC.2023.3238524</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref019">
<label>19</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Chen</surname> <given-names>LC</given-names></name>, <name name-style="western"><surname>Papandreou</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Kokkinos</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Murphy</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Yuille</surname> <given-names>AL</given-names></name>. <article-title>Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs</article-title>. <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>. <year>2017</year>;<volume>40</volume>(<issue>4</issue>):<fpage>834</fpage>–<lpage>848</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/TPAMI.2017.2699184" xlink:type="simple">10.1109/TPAMI.2017.2699184</ext-link></comment> <object-id pub-id-type="pmid">28463186</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref020">
<label>20</label>
<mixed-citation publication-type="other" xlink:type="simple">Chen LC, Papandreou G, Schroff F, Adam H. Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:170605587. 2017.</mixed-citation>
</ref>
<ref id="pone.0311038.ref021">
<label>21</label>
<mixed-citation publication-type="other" xlink:type="simple">Cheng B, Misra I, Schwing AG, Kirillov A, Girdhar R. Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2022. p. 1290–1299.</mixed-citation>
</ref>
<ref id="pone.0311038.ref022">
<label>22</label>
<mixed-citation publication-type="other" xlink:type="simple">Long J, Shelhamer E, Darrell T. Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2015. p. 3431–3440.</mixed-citation>
</ref>
<ref id="pone.0311038.ref023">
<label>23</label>
<mixed-citation publication-type="other" xlink:type="simple">Bolya D, Zhou C, Xiao F, Lee YJ. YOLACT: Real-time instance segmentation. In: Proceedings of the IEEE/CVF International Conference on Computer Vision; 2019. p. 9157–9166.</mixed-citation>
</ref>
<ref id="pone.0311038.ref024">
<label>24</label>
<mixed-citation publication-type="other" xlink:type="simple">He K, Gkioxari G, Dollár P, Girshick R. Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision; 2017. p. 2961–2969.</mixed-citation>
</ref>
<ref id="pone.0311038.ref025">
<label>25</label>
<mixed-citation publication-type="other" xlink:type="simple">Li F, Zhang H, Xu H, Liu S, Zhang L, Ni LM, et al. Mask DINO: Towards a unified transformer-based framework for object detection and segmentation. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2023. p. 3041–3050.</mixed-citation>
</ref>
<ref id="pone.0311038.ref026">
<label>26</label>
<mixed-citation publication-type="other" xlink:type="simple">Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S. End-to-end object detection with transformers. In: European Conference on Computer Vision. Springer; 2020. p. 213–229.</mixed-citation>
</ref>
<ref id="pone.0311038.ref027">
<label>27</label>
<mixed-citation publication-type="other" xlink:type="simple">Chen Q, Wang J, Han C, Zhang S, Li Z, Chen X, et al. Group DETR v2: Strong object detector with encoder-decoder pretraining. arXiv preprint arXiv:221103594. 2022;.</mixed-citation>
</ref>
<ref id="pone.0311038.ref028">
<label>28</label>
<mixed-citation publication-type="other" xlink:type="simple">Chen Q, Chen X, Wang J, Zhang S, Yao K, Feng H, et al. Group DETR: Fast DETR training with group-wise one-to-many assignment. In: Proceedings of the IEEE/CVF International Conference on Computer Vision; 2023. p. 6633–6642.</mixed-citation>
</ref>
<ref id="pone.0311038.ref029">
<label>29</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Chen</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Ding</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Xin</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Mo</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>Y</given-names></name>, <etal>et al</etal>. <article-title>Context autoencoder for self-supervised representation learning</article-title>. <source>International Journal of Computer Vision</source>. <year>2023</year>; p. <fpage>1</fpage>–<lpage>16</lpage>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref030">
<label>30</label>
<mixed-citation publication-type="other" xlink:type="simple">Jain J, Li J, Chiu MT, Hassani A, Orlov N, Shi H. OneFormer: One transformer to rule universal image segmentation. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2023. p. 2989–2998.</mixed-citation>
</ref>
<ref id="pone.0311038.ref031">
<label>31</label>
<mixed-citation publication-type="other" xlink:type="simple">Li Z, Wang W, Xie E, Yu Z, Anandkumar A, Alvarez JM, et al. Panoptic SegFormer: Delving deeper into panoptic segmentation with transformers. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2022. p. 1280–1289.</mixed-citation>
</ref>
<ref id="pone.0311038.ref032">
<label>32</label>
<mixed-citation publication-type="other" xlink:type="simple">Meng D, Chen X, Fan Z, Zeng G, Li H, Yuan Y, et al. Conditional DETR for fast training convergence. In: Proceedings of the IEEE/CVF International Conference on Computer Vision; 2021. p. 3651–3660.</mixed-citation>
</ref>
<ref id="pone.0311038.ref033">
<label>33</label>
<mixed-citation publication-type="other" xlink:type="simple">Zhang H, Li F, Xu H, Huang S, Liu S, Ni LM, et al. MP-Former: Mask-piloted transformer for image segmentation. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2023. p. 18074–18083.</mixed-citation>
</ref>
<ref id="pone.0311038.ref034">
<label>34</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Vaswani</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Shazeer</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Parmar</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Uszkoreit</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Jones</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Gomez</surname> <given-names>AN</given-names></name>, <etal>et al</etal>. <article-title>Attention is all you need</article-title>. <source>Advances in Neural Information Processing Systems</source>. <year>2017</year>;<volume>30</volume>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref035">
<label>35</label>
<mixed-citation publication-type="other" xlink:type="simple">Chen X, Zhao Z, Zhang Y, Duan M, Qi D, Zhao H. FocalClick: Towards practical interactive image segmentation. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2022. p. 1300–1309.</mixed-citation>
</ref>
<ref id="pone.0311038.ref036">
<label>36</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Grady</surname> <given-names>L</given-names></name>. <article-title>Random walks for image segmentation</article-title>. <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>. <year>2006</year>;<volume>28</volume>(<issue>11</issue>):<fpage>1768</fpage>–<lpage>1783</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/TPAMI.2006.233" xlink:type="simple">10.1109/TPAMI.2006.233</ext-link></comment> <object-id pub-id-type="pmid">17063682</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref037">
<label>37</label>
<mixed-citation publication-type="other" xlink:type="simple">Kirillov A, Mintun E, Ravi N, Mao H, Rolland C, Gustafson L, et al. Segment anything. arXiv preprint arXiv:230402643. 2023;.</mixed-citation>
</ref>
<ref id="pone.0311038.ref038">
<label>38</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Li</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Sun</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Tang</surname> <given-names>CK</given-names></name>, <name name-style="western"><surname>Shum</surname> <given-names>HY</given-names></name>. <article-title>Lazy snapping</article-title>. <source>ACM Transactions on Graphics (ToG)</source>. <year>2004</year>;<volume>23</volume>(<issue>3</issue>):<fpage>303</fpage>–<lpage>308</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/1015706.1015719" xlink:type="simple">10.1145/1015706.1015719</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref039">
<label>39</label>
<mixed-citation publication-type="other" xlink:type="simple">Liu Q, Xu Z, Bertasius G, Niethammer M. SimpleClick: Interactive image segmentation with simple vision transformers. In: Proceedings of the IEEE/CVF International Conference on Computer Vision; 2023. p. 22290–22300.</mixed-citation>
</ref>
<ref id="pone.0311038.ref040">
<label>40</label>
<mixed-citation publication-type="other" xlink:type="simple">Xu N, Price B, Cohen S, Yang J, Huang TS. Deep interactive object selection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2016. p. 373–381.</mixed-citation>
</ref>
<ref id="pone.0311038.ref041">
<label>41</label>
<mixed-citation publication-type="other" xlink:type="simple">Zou X, Yang J, Zhang H, Li F, Li L, Gao J, et al. Segment everything everywhere all at once. arXiv preprint arXiv:230406718. 2023;.</mixed-citation>
</ref>
<ref id="pone.0311038.ref042">
<label>42</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Lynch</surname> <given-names>HJ</given-names></name>, <name name-style="western"><surname>LaRue</surname> <given-names>MA</given-names></name>. <article-title>First global census of the Adélie Penguin</article-title>. <source>The Auk: Ornithological Advances</source>. <year>2014</year>;<volume>131</volume>(<issue>4</issue>):<fpage>457</fpage>–<lpage>466</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1642/AUK-14-31.1" xlink:type="simple">10.1642/AUK-14-31.1</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref043">
<label>43</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Le</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Samaras</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Lynch</surname> <given-names>HJ</given-names></name>. <article-title>A convolutional neural network architecture designed for the automated survey of seabird colonies</article-title>. <source>Remote Sensing in Ecology and Conservation</source>. <year>2022</year>;<volume>8</volume>(<issue>2</issue>):<fpage>251</fpage>–<lpage>262</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/rse2.240" xlink:type="simple">10.1002/rse2.240</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref044">
<label>44</label>
<mixed-citation publication-type="other" xlink:type="simple">Le H, Goncalves B, Samaras D, Lynch H. Weakly labeling the Antarctic: The penguin colony case. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops; 2019. p. 18–25.</mixed-citation>
</ref>
<ref id="pone.0311038.ref045">
<label>45</label>
<mixed-citation publication-type="other" xlink:type="simple">Maxar Technologies. Maxar Technologies; 2023. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.maxar.com/" xlink:type="simple">https://www.maxar.com/</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref046">
<label>46</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Johnson</surname> <given-names>BA</given-names></name>, <name name-style="western"><surname>Ma</surname> <given-names>L</given-names></name>. <article-title>Image segmentation and object-based image analysis for environmental monitoring: Recent areas of interest, researchers’ views on the future priorities</article-title>. <source>Remote Sensing</source>. <year>2020</year>;<volume>12</volume>(<issue>11</issue>):<fpage>1772</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/rs12111772" xlink:type="simple">10.3390/rs12111772</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref047">
<label>47</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Radoux</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Bourdouxhe</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Coos</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Dufrêne</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Defourny</surname> <given-names>P</given-names></name>. <article-title>Improving ecotope segmentation by combining topographic and spectral data</article-title>. <source>Remote Sensing</source>. <year>2019</year>;<volume>11</volume>(<issue>3</issue>):<fpage>354</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/rs11030354" xlink:type="simple">10.3390/rs11030354</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref048">
<label>48</label>
<mixed-citation publication-type="other" xlink:type="simple">Brejcha J, Lukáč M, Hold-Geoffroy Y, Wang O, Čadík M. LandscapeAR: Large Scale Outdoor Augmented Reality by Matching Photographs with Terrain Models Using Learned Descriptors. In: European Conference on Computer Vision. Springer; 2020. p. 295–312.</mixed-citation>
</ref>
<ref id="pone.0311038.ref049">
<label>49</label>
<mixed-citation publication-type="other" xlink:type="simple">Humenberger M, Cabon Y, Guerin N, Morat J, Leroy V, Revaud J, et al. Robust image retrieval-based visual localization using Kapture. arXiv preprint arXiv:200713867. 2020;.</mixed-citation>
</ref>
<ref id="pone.0311038.ref050">
<label>50</label>
<mixed-citation publication-type="other" xlink:type="simple">Li Y, Snavely N, Huttenlocher D, Fua P. Worldwide Pose Estimation Using 3D Point Clouds. In: European Conference on Computer Vision. Springer; 2012. p. 15–29.</mixed-citation>
</ref>
<ref id="pone.0311038.ref051">
<label>51</label>
<mixed-citation publication-type="other" xlink:type="simple">Peng S, He Z, Zhang H, Yan R, Wang C, Zhu Q, et al. MegLoc: A robust and accurate visual localization pipeline. arXiv preprint arXiv:211113063. 2021;.</mixed-citation>
</ref>
<ref id="pone.0311038.ref052">
<label>52</label>
<mixed-citation publication-type="other" xlink:type="simple">Sarlin PE, Cadena C, Siegwart R, Dymczyk M. From Coarse to Fine: Robust Hierarchical Localization at Large Scale. In: 2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR); 2019. p. 12708–12717.</mixed-citation>
</ref>
<ref id="pone.0311038.ref053">
<label>53</label>
<mixed-citation publication-type="other" xlink:type="simple">Sarlin PE, DeTone D, Malisiewicz T, Rabinovich A. SuperGlue: Learning Feature Matching With Graph Neural Networks. In: 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR); 2020. p. 4937–4946.</mixed-citation>
</ref>
<ref id="pone.0311038.ref054">
<label>54</label>
<mixed-citation publication-type="other" xlink:type="simple">Sattler T, Havlena M, Radenovic F, Schindler K, Pollefeys M. Hyperpoints and Fine Vocabularies for Large-Scale Location Recognition. In: 2015 IEEE International Conference on Computer Vision (ICCV); 2015. p. 2102–2110.</mixed-citation>
</ref>
<ref id="pone.0311038.ref055">
<label>55</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Sattler</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Leibe</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Kobbelt</surname> <given-names>L</given-names></name>. <article-title>Efficient &amp; Effective Prioritized Matching for Large-Scale Image-Based Localization</article-title>. <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>. <year>2017</year>;<volume>39</volume>(<issue>9</issue>):<fpage>1744</fpage>–<lpage>1756</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/TPAMI.2016.2611662" xlink:type="simple">10.1109/TPAMI.2016.2611662</ext-link></comment> <object-id pub-id-type="pmid">27662671</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref056">
<label>56</label>
<mixed-citation publication-type="other" xlink:type="simple">Schönberger JL, Pollefeys M, Geiger A, Sattler T. Semantic visual localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2018. p. 6896–6906.</mixed-citation>
</ref>
<ref id="pone.0311038.ref057">
<label>57</label>
<mixed-citation publication-type="other" xlink:type="simple">Shan Q, Wu C, Curless B, Furukawa Y, Hernandez C, Seitz SM. Accurate geo-registration by ground-to-aerial image matching. In: 2014 2nd International Conference on 3D Vision. vol. 1. IEEE; 2014. p. 525–532.</mixed-citation>
</ref>
<ref id="pone.0311038.ref058">
<label>58</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Svärm</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Enqvist</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Kahl</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Oskarsson</surname> <given-names>M</given-names></name>. <article-title>City-scale localization for cameras with known vertical direction</article-title>. <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>. <year>2016</year>;<volume>39</volume>(<issue>7</issue>):<fpage>1455</fpage>–<lpage>1461</lpage>. <object-id pub-id-type="pmid">27514034</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref059">
<label>59</label>
<mixed-citation publication-type="other" xlink:type="simple">Taira H, Okutomi M, Sattler T, Cimpoi M, Pollefeys M, Sivic J, et al. InLoc: Indoor visual localization with dense matching and view synthesis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2018. p. 7199–7209.</mixed-citation>
</ref>
<ref id="pone.0311038.ref060">
<label>60</label>
<mixed-citation publication-type="other" xlink:type="simple">Taira H, Rocco I, Sedlar J, Okutomi M, Sivic J, Pajdla T, et al. Is this the right place? geometric-semantic pose verification for indoor visual localization. In: Proceedings of the IEEE/CVF International Conference on Computer Vision; 2019. p. 4373–4383.</mixed-citation>
</ref>
<ref id="pone.0311038.ref061">
<label>61</label>
<mixed-citation publication-type="other" xlink:type="simple">Zeisl B, Sattler T, Pollefeys M. Camera pose voting for large-scale image-based localization. In: Proceedings of the IEEE International Conference on Computer Vision; 2015. p. 2704–2712.</mixed-citation>
</ref>
<ref id="pone.0311038.ref062">
<label>62</label>
<mixed-citation publication-type="other" xlink:type="simple">Barath D, Matas J. Graph-cut RANSAC. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2018. p. 6733–6741.</mixed-citation>
</ref>
<ref id="pone.0311038.ref063">
<label>63</label>
<mixed-citation publication-type="other" xlink:type="simple">Barath D, Matas J, Noskova J. MAGSAC: marginalizing sample consensus. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2019. p. 10197–10205.</mixed-citation>
</ref>
<ref id="pone.0311038.ref064">
<label>64</label>
<mixed-citation publication-type="other" xlink:type="simple">Barath D, Ivashechkin M, Matas J. Progressive NAPSAC: sampling from gradually growing neighborhoods. arXiv preprint arXiv:190602295. 2019;.</mixed-citation>
</ref>
<ref id="pone.0311038.ref065">
<label>65</label>
<mixed-citation publication-type="other" xlink:type="simple">Barath D, Noskova J, Ivashechkin M, Matas J. MAGSAC++, a fast, reliable and accurate robust estimator. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2020. p. 1304–1312.</mixed-citation>
</ref>
<ref id="pone.0311038.ref066">
<label>66</label>
<mixed-citation publication-type="other" xlink:type="simple">Chum O, Perd’och M, Matas J. Geometric min-hashing: Finding a (thick) needle in a haystack. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. IEEE; 2009. p. 17–24.</mixed-citation>
</ref>
<ref id="pone.0311038.ref067">
<label>67</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Fischler</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Bolles</surname> <given-names>RC</given-names></name>. <article-title>Random Sample Consensus: A Paradigm for Model Fitting with Applications to Image Analysis and Automated Cartography</article-title>. <source>Commun ACM</source>. <year>1981</year>;<volume>24</volume>(<issue>6</issue>):<fpage>381</fpage>–<lpage>395</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/358669.358692" xlink:type="simple">10.1145/358669.358692</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref068">
<label>68</label>
<mixed-citation publication-type="other" xlink:type="simple">Lebeda K, Matas J, Chum O. Fixing the Locally Optimized RANSAC. In: Proceedings of the British Machine Vision Conference. BMVA Press; 2012. p. 95.1–95.11.</mixed-citation>
</ref>
<ref id="pone.0311038.ref069">
<label>69</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Raguram</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Chum</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Pollefeys</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Matas</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Frahm</surname> <given-names>JM</given-names></name>. <article-title>USAC: A universal framework for random sample consensus</article-title>. <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source>. <year>2012</year>;<volume>35</volume>(<issue>8</issue>):<fpage>2022</fpage>–<lpage>2038</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/TPAMI.2012.257" xlink:type="simple">10.1109/TPAMI.2012.257</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref070">
<label>70</label>
<mixed-citation publication-type="other" xlink:type="simple">Irschara A, Zach C, Frahm JM, Bischof H. From structure-from-motion point clouds to fast location recognition. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. IEEE; 2009. p. 2599–2606.</mixed-citation>
</ref>
<ref id="pone.0311038.ref071">
<label>71</label>
<mixed-citation publication-type="other" xlink:type="simple">Sarlin PE, Debraine F, Dymczyk M, Siegwart R, Cadena C. Leveraging deep visual descriptors for hierarchical efficient localization. In: Conference on Robot Learning. PMLR; 2018. p. 456–465.</mixed-citation>
</ref>
<ref id="pone.0311038.ref072">
<label>72</label>
<mixed-citation publication-type="other" xlink:type="simple">Sattler T, Weyand T, Leibe B, Kobbelt L. Image Retrieval for Image-Based Localization Revisited. In: British Machine Vision Conference. vol. 1; 2012. p. 4.</mixed-citation>
</ref>
<ref id="pone.0311038.ref073">
<label>73</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Mueller</surname> <given-names>MS</given-names></name>, <name name-style="western"><surname>Sattler</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Pollefeys</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Jutzi</surname> <given-names>B</given-names></name>. <article-title>Image-to-image translation for enhanced feature matching, image retrieval and visual localization</article-title>. <source>ISPRS Annals of the Photogrammetry, Remote Sensing and Spatial Information Sciences</source>. <year>2019</year>;<volume>4</volume>:<fpage>111</fpage>–<lpage>119</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5194/isprs-annals-IV-2-W7-111-2019" xlink:type="simple">10.5194/isprs-annals-IV-2-W7-111-2019</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref074">
<label>74</label>
<mixed-citation publication-type="other" xlink:type="simple">Panek V, Kukelova Z, Sattler T. MeshLoc: Mesh-based visual localization. In: European Conference on Computer Vision. Springer; 2022. p. 589–609.</mixed-citation>
</ref>
<ref id="pone.0311038.ref075">
<label>75</label>
<mixed-citation publication-type="other" xlink:type="simple">Panek V, Kukelova Z, Sattler T. Visual Localization using Imperfect 3D Models from the Internet. In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2023. p. 13175–13186.</mixed-citation>
</ref>
<ref id="pone.0311038.ref076">
<label>76</label>
<mixed-citation publication-type="other" xlink:type="simple">Truong P, Danelljan M, Timofte R. GLU-Net: Global-Local Universal Network for Dense Flow and Correspondences. In: 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR); 2020. p. 6257–6267.</mixed-citation>
</ref>
<ref id="pone.0311038.ref077">
<label>77</label>
<mixed-citation publication-type="other" xlink:type="simple">Howat I, et al. The Reference Elevation Model of Antarctica—Strips, Version 4.1; 2022. Harvard Dataverse. Available from: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.7910/DVN/X7NDNY" xlink:type="simple">https://doi.org/10.7910/DVN/X7NDNY</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref078">
<label>78</label>
<mixed-citation publication-type="other" xlink:type="simple">Howat I, et al. The Reference Elevation Model of Antarctica—Mosaics, Version 2; 2022. Harvard Dataverse. Available from: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.7910/DVN/EBW8UC" xlink:type="simple">https://doi.org/10.7910/DVN/EBW8UC</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref079">
<label>79</label>
<mixed-citation publication-type="other" xlink:type="simple">Sinergise Ltd. Sentinel Hub; 2023. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.sentinel-hub.com" xlink:type="simple">https://www.sentinel-hub.com</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref080">
<label>80</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Brown</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Mann</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Ryder</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Subbiah</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Kaplan</surname> <given-names>JD</given-names></name>, <name name-style="western"><surname>Dhariwal</surname> <given-names>P</given-names></name>, <etal>et al</etal>. <article-title>Language models are few-shot learners</article-title>. <source>Advances in Neural Information Processing Systems</source>. <year>2020</year>;<volume>33</volume>:<fpage>1877</fpage>–<lpage>1901</lpage>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref081">
<label>81</label>
<mixed-citation publication-type="other" xlink:type="simple">Dawson-Haggerty et al. Trimesh; 2019. Available from: <ext-link ext-link-type="uri" xlink:href="https://trimsh.org/" xlink:type="simple">https://trimsh.org/</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0311038.ref082">
<label>82</label>
<mixed-citation publication-type="other" xlink:type="simple">Cignoni P, Callieri M, Corsini M, Dellepiane M, Ganovelli F, Ranzuglia G, et al. Meshlab: an open-source mesh processing tool. In: Eurographics Italian Chapter Conference. vol. 2008. Salerno, Italy; 2008. p. 129–136.</mixed-citation>
</ref>
<ref id="pone.0311038.ref083">
<label>83</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Levenberg</surname> <given-names>K</given-names></name>. <article-title>A method for the solution of certain non-linear problems in least squares</article-title>. <source>Quarterly of Applied Mathematics</source>. <year>1944</year>;<volume>2</volume>(<issue>2</issue>):<fpage>164</fpage>–<lpage>168</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1090/qam/10666" xlink:type="simple">10.1090/qam/10666</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref084">
<label>84</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Marquardt</surname> <given-names>DW</given-names></name>. <article-title>An Algorithm for Least-Squares Estimation of Nonlinear Parameters</article-title>. <source>Journal of the Society for Industrial and Applied Mathematics</source>. <year>1963</year>;<volume>11</volume>(<issue>2</issue>):<fpage>431</fpage>–<lpage>441</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1137/0111030" xlink:type="simple">10.1137/0111030</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref085">
<label>85</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Bhardwaj</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Kumar</surname> <given-names>A</given-names></name>. <article-title>The comparison of shape indices and perimeter interface of selected protected areas especially with reference to Sariska Tiger Reserve, India</article-title>. <source>Global Ecology and Conservation</source>. <year>2019</year>;<volume>17</volume>:<fpage>e00504</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.gecco.2018.e00504" xlink:type="simple">10.1016/j.gecco.2018.e00504</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref086">
<label>86</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Schmidt</surname> <given-names>AE</given-names></name>, <name name-style="western"><surname>Ballard</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Lescroël</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Dugger</surname> <given-names>KM</given-names></name>, <name name-style="western"><surname>Jongsomjit</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Elrod</surname> <given-names>ML</given-names></name>, <etal>et al</etal>. <article-title>The influence of subcolony-scale nesting habitat on the reproductive success of Adélie penguins</article-title>. <source>Scientific Reports</source>. <year>2021</year>;<volume>11</volume>(<issue>1</issue>):<fpage>15380</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41598-021-94861-7" xlink:type="simple">10.1038/s41598-021-94861-7</ext-link></comment> <object-id pub-id-type="pmid">34321573</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref087">
<label>87</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>LaRue</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Lynch</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Lyver</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Barton</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Ainley</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Pollard</surname> <given-names>A</given-names></name>, <etal>et al</etal>. <article-title>A method for estimating colony sizes of Adélie penguins using remote sensing imagery</article-title>. <source>Polar Biology</source>. <year>2014</year>;<volume>37</volume>:<fpage>507</fpage>–<lpage>517</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s00300-014-1451-8" xlink:type="simple">10.1007/s00300-014-1451-8</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref088">
<label>88</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Che-Castaldo</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Jenouvrier</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Youngflesh</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Shoemaker</surname> <given-names>KT</given-names></name>, <name name-style="western"><surname>Humphries</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>McDowall</surname> <given-names>P</given-names></name>, <etal>et al</etal>. <article-title>Pan-Antarctic analysis aggregating spatial estimates of Adélie penguin abundance reveals robust dynamics despite stochastic noise</article-title>. <source>Nature Communications</source>. <year>2017</year>;<volume>8</volume>(<issue>1</issue>):<fpage>832</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41467-017-00890-0" xlink:type="simple">10.1038/s41467-017-00890-0</ext-link></comment> <object-id pub-id-type="pmid">29018199</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref089">
<label>89</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Butler</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Ross</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Beaman</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Hoepner</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Baring</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>da Silva</surname> <given-names>KB</given-names></name>. <article-title>Utilising tourist-generated citizen science data in response to environmental challenges: A systematic literature review</article-title>. <source>Journal of Environmental Management</source>. <year>2023</year>;<volume>339</volume>:<fpage>117889</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jenvman.2023.117889" xlink:type="simple">10.1016/j.jenvman.2023.117889</ext-link></comment> <object-id pub-id-type="pmid">37058928</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref090">
<label>90</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Davies</surname> <given-names>TK</given-names></name>, <name name-style="western"><surname>Stevens</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Meekan</surname> <given-names>MG</given-names></name>, <name name-style="western"><surname>Struve</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Rowcliffe</surname> <given-names>JM</given-names></name>. <article-title>Can citizen science monitor whale-shark aggregations? Investigating bias in mark–recapture modelling using identification photographs sourced from the public</article-title>. <source>Wildlife Research</source>. <year>2012</year>;<volume>39</volume>(<issue>8</issue>):<fpage>696</fpage>–<lpage>704</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1071/WR12092" xlink:type="simple">10.1071/WR12092</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref091">
<label>91</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Magson</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Monacella</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Scott</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Buffat</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Arunrugstichai</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Chuangcharoendee</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>Citizen science reveals the population structure and seasonal presence of whale sharks in the Gulf of Thailand</article-title>. <source>Journal of Fish Biology</source>. <year>2022</year>;<volume>101</volume>(<issue>3</issue>):<fpage>540</fpage>–<lpage>549</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/jfb.15121" xlink:type="simple">10.1111/jfb.15121</ext-link></comment> <object-id pub-id-type="pmid">35638311</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref092">
<label>92</label>
<mixed-citation publication-type="other" xlink:type="simple">DeTone D, Malisiewicz T, Rabinovich A. SuperPoint: Self-Supervised Interest Point Detection and Description. In: 2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW); 2018. p. 337–33712.</mixed-citation>
</ref>
<ref id="pone.0311038.ref093">
<label>93</label>
<mixed-citation publication-type="other" xlink:type="simple">Dai A, Chang AX, Savva M, Halber M, Funkhouser T, Nießner M. ScanNet: Richly-Annotated 3D Reconstructions of Indoor Scenes. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR); 2017. p. 2432–2443.</mixed-citation>
</ref>
<ref id="pone.0311038.ref094">
<label>94</label>
<mixed-citation publication-type="other" xlink:type="simple">DeTone D, Malisiewicz T, Rabinovich A. Toward geometric deep SLAM. arXiv preprint arXiv:170707410. 2017;.</mixed-citation>
</ref>
<ref id="pone.0311038.ref095">
<label>95</label>
<mixed-citation publication-type="other" xlink:type="simple">Radenovic F, Iscen A, Tolias G, Avrithis Y, Chum O. Revisiting Oxford and Paris: Large-Scale Image Retrieval Benchmarking. In: 2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition; 2018. p. 5706–5715.</mixed-citation>
</ref>
<ref id="pone.0311038.ref096">
<label>96</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Donnelly</surname> <given-names>JP</given-names></name>, <name name-style="western"><surname>Bertness</surname> <given-names>MD</given-names></name>. <article-title>Rapid shoreward encroachment of salt marsh cordgrass in response to accelerated sea-level rise</article-title>. <source>Proceedings of the National Academy of Sciences</source>. <year>2001</year>;<volume>98</volume>(<issue>25</issue>):<fpage>14218</fpage>–<lpage>14223</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.251209298" xlink:type="simple">10.1073/pnas.251209298</ext-link></comment> <object-id pub-id-type="pmid">11724926</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref097">
<label>97</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Morisette</surname> <given-names>JT</given-names></name>, <name name-style="western"><surname>Richardson</surname> <given-names>AD</given-names></name>, <name name-style="western"><surname>Knapp</surname> <given-names>AK</given-names></name>, <name name-style="western"><surname>Fisher</surname> <given-names>JI</given-names></name>, <name name-style="western"><surname>Graham</surname> <given-names>EA</given-names></name>, <name name-style="western"><surname>Abatzoglou</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Tracking the rhythm of the seasons in the face of global change: phenological research in the 21st century</article-title>. <source>Frontiers in Ecology and the Environment</source>. <year>2009</year>;<volume>7</volume>(<issue>5</issue>):<fpage>253</fpage>–<lpage>260</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1890/070217" xlink:type="simple">10.1890/070217</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref098">
<label>98</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>DiBello</surname> <given-names>FJ</given-names></name>, <name name-style="western"><surname>Calhoun</surname> <given-names>AJ</given-names></name>, <name name-style="western"><surname>Morgan</surname> <given-names>DE</given-names></name>, <name name-style="western"><surname>F</surname> <given-names>SA</given-names></name>. <article-title>Efficiency and detection accuracy using print and digital stereo aerial photography for remotely mapping vernal pools in New England landscapes</article-title>. <source>Wetlands</source>. <year>2016</year>;<volume>36</volume>:<fpage>505</fpage>–<lpage>514</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s13157-016-0759-2" xlink:type="simple">10.1007/s13157-016-0759-2</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0311038.ref099">
<label>99</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Biederman</surname> <given-names>I</given-names></name>. <article-title>Recognition-by-components: a theory of human image understanding</article-title>. <source>Psychological Review</source>. <year>1987</year>;<volume>94</volume>(<issue>2</issue>):<fpage>115</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037/0033-295X.94.2.115" xlink:type="simple">10.1037/0033-295X.94.2.115</ext-link></comment> <object-id pub-id-type="pmid">3575582</object-id></mixed-citation>
</ref>
<ref id="pone.0311038.ref100">
<label>100</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Hussain Ismail</surname> <given-names>AM</given-names></name>, <name name-style="western"><surname>Solomon</surname> <given-names>JA</given-names></name>, <name name-style="western"><surname>Hansard</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Mareschal</surname> <given-names>I</given-names></name>. <article-title>A perceptual bias for man-made objects in humans</article-title>. <source>Proceedings of the Royal Society B</source>. <year>2019</year>;<volume>286</volume>(<issue>1914</issue>):<fpage>20191492</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1098/rspb.2019.1492" xlink:type="simple">10.1098/rspb.2019.1492</ext-link></comment> <object-id pub-id-type="pmid">31690239</object-id></mixed-citation>
</ref>
</ref-list>
</back>
</article>