<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0312289</article-id>
<article-id pub-id-type="publisher-id">PONE-D-24-10963</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Semantics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Data management</subject><subj-group><subject>Data visualization</subject><subj-group><subject>Infographics</subject><subj-group><subject>Charts</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Applied mathematics</subject><subj-group><subject>Algorithms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject><subj-group><subject>Algorithms</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Data management</subject><subj-group><subject>Metadata</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Software engineering</subject><subj-group><subject>Computer software</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Engineering and technology</subject><subj-group><subject>Software engineering</subject><subj-group><subject>Computer software</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Research design</subject><subj-group><subject>Survey research</subject><subj-group><subject>Surveys</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Phenotypes</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Endocrinology</subject><subj-group><subject>Endocrine disorders</subject><subj-group><subject>Diabetes mellitus</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Medical conditions</subject><subj-group><subject>Metabolic disorders</subject><subj-group><subject>Diabetes mellitus</subject></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Value sets and the problem of redundancy in value set repositories</article-title>
<alt-title alt-title-type="running-head">Value sets and the problem of redundancy in value set repositories</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-7853-6137</contrib-id>
<name name-style="western">
<surname>Gold</surname>
<given-names>Sigfried</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/resources/">Resources</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Lehmann</surname>
<given-names>Harold P.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-6878-189X</contrib-id>
<name name-style="western">
<surname>Schilling</surname>
<given-names>Lisa M.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0003-4193-2114</contrib-id>
<name name-style="western">
<surname>Lutters</surname>
<given-names>Wayne G.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>College of Information Studies, University of Maryland, College Park, MD, United States of America</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Johns Hopkins University, Baltimore, Maryland, United States of America</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Division of General Internal Medicine, University of Colorado, Aurora, CO, United States of America</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Dey</surname>
<given-names>Avanti</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Public Library of Science, UNITED STATES OF AMERICA</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">sigfried@sigfried.org</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>9</day>
<month>12</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>19</volume>
<issue>12</issue>
<elocation-id>e0312289</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>3</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>4</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-year>2024</copyright-year>
<copyright-holder>Gold et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0312289"/>
<abstract>
<sec id="sec001">
<title>Objective</title>
<p>Crafting high-quality value sets is time-consuming and requires a range of clinical, terminological, and informatics expertise. Despite widespread agreement on the importance of reusing value sets, value set repositories suffer from clutter and redundancy, greatly complicating efforts at reuse. When users encounter multiple value sets with the same name or ostensibly representing the same clinical condition, it can be difficult to choose amongst them or determine if any differences among them are due to error or intentional decision.</p>
</sec>
<sec id="sec002">
<title>Methods</title>
<p>This paper offers a view of value set development and reuse based on a field study of researchers and informaticists. The results emerge from an analysis of relevant literature, reflective practice, and the field research data.</p>
</sec>
<sec id="sec003">
<title>Results</title>
<p>Qualitative analysis of our study data, the relevant literature, and our own professional experience led us to three dichotomous concepts that frame an understanding of diverse practices and perspectives surrounding value set development:</p>
<list list-type="order">
<list-item><p>Permissible values versus analytic value sets;</p></list-item>
<list-item><p>Prescriptive versus descriptive approaches to controlled medical vocabulary use; and</p></list-item>
<list-item><p>Semantic and empirical types of value set development and evaluation practices and the data they rely on.</p></list-item>
</list>
<p>This three-fold framework opens up the redundancy problem, explaining why multiple value sets may or may not be needed and advancing academic understanding of value set development.</p>
</sec>
<sec id="sec004">
<title>Conclusion</title>
<p>In order for value set repositories to become more rather than less useful over time, software must channel user efforts into either improving existing value sets or making new ones only when absolutely necessary. This would require major, innovative changes to value set repository platforms. We believe the most direct path to giving value set developers the ability to leverage prior work is by encouraging them to compare existing value sets using advanced interfaces like VS-Hub, and by collecting and using metadata about code inclusion and exclusion decisions during the authoring process.</p>
</sec>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/100000001</institution-id>
<institution>National Science Foundation</institution>
</institution-wrap>
</funding-source>
<award-id>DGE- 1632976</award-id>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-7853-6137</contrib-id>
<name name-style="western">
<surname>Gold</surname>
<given-names>Sigfried</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>SG was partially supported while doing this work by a National Science Foundation (<ext-link ext-link-type="uri" xlink:href="https://www.nsf.gov/" xlink:type="simple">https://www.nsf.gov/</ext-link>) training grant, DGE- 1632976. Beyond providing the stipend, NSF had no role or influence on the research or the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="7"/>
<page-count count="20"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>Survey data have been anonymized and made available at Gold S. Value sets and the problem of redundancy in value set repositories. Survey data. OSF. 2024. doi:<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1001/jama.2018.10136" xlink:type="simple">10.17605/OSF.IO/ABTJU</ext-link> Interview data cannot be anonymized and are not included to protect participant privacy.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec005" sec-type="intro">
<title>1. Introduction</title>
<p>Controlled medical vocabularies (e.g., ICD10, SNOMED, RxNorm, CPT, LOINC) catalogue clinical concepts and relationships between them. A concept is signified by an entry in a medical vocabulary generally consisting of a definition, one or more synonymous labels, and a <italic>code</italic> to identify the concept in representing specific clinical events in electronic health records (EHR), registries, claims databases, and clinical data warehouses. Value sets are groupings of these identifiers that facilitate data collection, representation, harmonization, and analysis. (We treat the term “value set” as more or less synonymous with “code set”, “concept set”, “code list”, and “enumeration”, which are also used in some contexts).</p>
<p>This paper focuses on the use of value sets in the context of observational research using real-world data (RWD) [<xref ref-type="bibr" rid="pone.0312289.ref001">1</xref>]. Despite the use of hierarchical classifications and other data structures to signify concepts at different levels of granularity, value sets are almost always needed when querying clinical data sets since a phenomenon of interest can usually be indicated using a variety of different codes. A study algorithm to determine the relative likelihood of outcome O in patients experiencing condition C depending on their receiving treatment T<sub>1</sub> or T<sub>2</sub> will need to define cohort or phenotype algorithms for identifying patient records indicating O, C, T<sub>1</sub>, and T<sub>2</sub>. (Our use of the term electronic phenotype, or just phenotype, follows others in the field of observational research with RWD, e.g., [<xref ref-type="bibr" rid="pone.0312289.ref002">2</xref>–<xref ref-type="bibr" rid="pone.0312289.ref005">5</xref>], and can be confusing for those not accustomed to this usage. See Section 3.3 for a definition.) An essential step in such algorithms is to select patient records containing specified fields whose values are any of the codes in a value set. Though further temporal and conditional logic are often needed beyond the simple presence or absence of matching records in a patient’s digital chart, value sets are usually the starting point for phenotype or cohort algorithms.</p>
<p>Crafting high-quality value sets is time-consuming and requires a range of clinical, terminological, and informatics expertise. Scholarly and practical efforts to address challenges in value set management (i.e., helping RWD researchers identify and select the set of codes best fitted to their hypothesis testing and analysis goals) [<xref ref-type="bibr" rid="pone.0312289.ref003">3</xref>–<xref ref-type="bibr" rid="pone.0312289.ref012">12</xref>] have resulted in value set definition and documentation standards [<xref ref-type="bibr" rid="pone.0312289.ref013">13</xref>–<xref ref-type="bibr" rid="pone.0312289.ref017">17</xref>] and in methods and tools for authoring value sets [<xref ref-type="bibr" rid="pone.0312289.ref018">18</xref>–<xref ref-type="bibr" rid="pone.0312289.ref022">22</xref>], for assessing value set semantics and quality [<xref ref-type="bibr" rid="pone.0312289.ref023">23</xref>–<xref ref-type="bibr" rid="pone.0312289.ref029">29</xref>], and for enabling and promoting value set sharing and reuse [<xref ref-type="bibr" rid="pone.0312289.ref030">30</xref>–<xref ref-type="bibr" rid="pone.0312289.ref032">32</xref>]. These papers demonstrate problems of bias and inaccuracy in value sets shared on public repositories and many present specific methods to improve value set development. Williams, et al. [<xref ref-type="bibr" rid="pone.0312289.ref004">4</xref>]—in a paper we used as a seed article for our literature review—performs a comparative review of the value set literature, offering nomenclature, a consolidated articulation of published knowledge on value sets, and a valuable catalog of recommendations for advancing technology for managing value sets.</p>
<p>The current paper offers a view of value set development and reuse based on a field study of researchers and informaticists. We conducted an online survey, semi-structured interviews with a subset of survey participants, and observation where possible of participants working on value sets, finding a diversity in real-world value set development practices and perspectives previously unexplored in the literature.</p>
<p>While there seems to be universal agreement on the importance of reusing value sets (or phenotype definitions containing value sets), we have recognized through interviews and our own experience that repositories of these objects suffer from clutter and redundancy, greatly complicating efforts at reuse.</p>
<p>Value set repositories tend to contain many value sets with the same name or ostensibly representing the same clinical condition, making it difficult for potential re-users to choose amongst them. When multiple value sets are found, it can be difficult to tell if they are redundant, that is, if any differences among them are due to error or if there are principled reasons to define multiple value sets for certain phenomena.</p>
<p>It has been implicitly assumed that value set repositories would improve and grow in utility as they gained wider and longer use. We ourselves have claimed that repositories would benefit by cooperating to consolidate or centralize in order to generate positive network effects by attracting wider audiences [<xref ref-type="bibr" rid="pone.0312289.ref009">9</xref>]. As we demonstrate, the opposite appears to be the case. With ongoing use, repositories accumulate redundant and low-quality value sets, making it increasingly difficult for a potential re-user to identify high-quality value sets appropriate to their needs. Positive network effects will only accrue if all contributions to a repository are dedicated either to improving existing value sets or making new ones when absolutely necessary.</p>
<p>Qualitative analysis of our study data, the relevant literature, and our own professional experience led us to three dichotomous concepts that frame an understanding of diverse practices and perspectives surrounding value set development. These three dichotomies distinguish:</p>
<list list-type="order">
<list-item><p>Permissible values versus analytic value sets. Permissible value sets are used in applications where data capture occurs (primary use.) Analytic value sets are used in analysis or research application (secondary use) in order to select records matching clinical conditions or events of interest.</p></list-item>
<list-item><p>Prescriptive versus descriptive perspectives on controlled medical vocabulary use. These tend to be held as implicit beliefs about coded concept, a prescriptive orientation is appropriate to permissible values contexts, while a descriptive orientation may be appropriate in secondary use, analytic contexts.</p></list-item>
<list-item><p>Semantic and empirical types of value set development and evaluation practices and the data they rely on. Semantic practices and data relate to vocabularies and meaning and are always necessary. A descriptive approach to identifying codes for an analytic value set, however, would require empirical analysis of patient-level data. Empirical analysis and validation are always desirable for analytic value sets, but it is frequently not feasible.</p></list-item>
</list>
<p>We will show how this three-fold framework opens up the redundancy problem, explaining why multiple value sets may or may not be needed (see 3.6.) Our field needs innovative software to help users navigate thickets of ostensibly redundant value sets not just to choose between them, but to make use of their differences in crafting value sets appropriate to researchers’ needs.</p>
</sec>
<sec id="sec006" sec-type="materials|methods">
<title>2. Methods</title>
<p>As noted, the intent of this research effort is to more deeply understand the diversity of real-world value set development practices, especially mapping the influence of specific contextual factors to those practices. The intended outcomes are both theoretical—developing a more precise, informative set of distinctions between approaches—and practical—providing guidance to informaticists to be deliberate in their decisions, thus enabling more accessible opportunities for both value set and process reuse in the RWD research community.</p>
<p>Our study design has been guided by the scholarly tradition of computer-supported cooperative work (CSCW). It is first predicated on the lived experience of the authors as reflective practitioners [<xref ref-type="bibr" rid="pone.0312289.ref033">33</xref>] with decades of experience creating and managing code-sets in research contexts. Their initial insight was bolstered or challenged through triangulation among three specific data collection activities: surveys, interviews, and participant observation.</p>
<p>Firstly, a custom, 21-question, web-based survey investigated participants’ experiences using value sets in the analysis of RWD. Recruitment focused on professionals with such experience, identifying them through the first author’s professional networks. Given the variety and inconsistency in nomenclature for RWD analysis elements and processes, questions were carefully balanced to capture differences in interpretation and use.</p>
<p>Secondly, a sub-set of survey participants were invited for a follow-on semi-structured interview. The purpose was to explore their value set authoring and reuse practices. The contextual nature of the interviews allowed them to demonstrate their tools and processes for developing value sets in person or via screen share.</p>
<p>The survey and interviews were approved by the University of Maryland IRB (#1405794–8). Recruiting began August 1, 2019 and ended on September 14, 2021. Taking the online Qualtrics survey required human subjects to read and sign our consent form. Interviewees signed a separate Qualtrics consent form. The survey and deidentified data are available at [<xref ref-type="bibr" rid="pone.0312289.ref034">34</xref>]. The Excel file contains 187 columns based on the 21 questions (many multi-part) in the survey. For deidentification, five columns were removed as well as identifying information in the open-ended answers. The services, research, and sector columns are composites of information from the surveys, interviews, and other knowledge that could be inferred about participants. A text box in the worksheet provides a legend for the codes in those columns.</p>
<p>Thirdly, the first three authors acted as participant observers, embedded in key communities and numerous projects in this space, including OHDSI, PCORNet, Health Data Compass, the Army Pharmacovigilance Center, and the American Medical Informatics Association. While writing this paper, SG and HL worked on the National COVID Cohort Collaborative (N3C), observing and contributing to large-scale value set development and management efforts in a novel context. Their active participation in this wide range of projects has made them careful observers of value set development and curation practices.</p>
<p>The qualitative data collected from the surveys, interviews, and participant observations were content coded in NVivo through a process of analytic induction. Codes and emerging themes were iteratively developed with co-authors.</p>
<p>In this paper we present the unfolding interpretation of the results of this study as a dialogue among the literature, the reflective practice, and the field research data. The resultant theorizing yields a conceptual framework that is both <italic>descriptive</italic> (making sense and ordering the world as it is) and <italic>prescriptive</italic> (giving structure to practice to inform the world as it ought to be).</p>
</sec>
<sec id="sec007" sec-type="conclusions">
<title>3. Results and discussion</title>
<sec id="sec008">
<title>3.1 Diversity of value set development contexts</title>
<p>Seventy survey invitations were sent out. Of the 49 responses, 36 were complete enough for analysis, yielding a response rate of 64% and completion rate of 47%. Tables <xref ref-type="table" rid="pone.0312289.t001">1</xref> and <xref ref-type="table" rid="pone.0312289.t002">2</xref> show the diversity of our sample population in terms of relevant demographic and work environment characteristics. Participants hold an array of degrees and work in a variety of disciplines. Most reported being involved in a small number of studies requiring value set development each year, working in teams of between 2 and 10 people, often from multiple organizations. Participants and their fellow team members brought a range of skills and expertise to these projects (Tables <xref ref-type="table" rid="pone.0312289.t001">1</xref>–<xref ref-type="table" rid="pone.0312289.t003">3</xref>) and they worked on projects involving a range of vocabularies, domains, and data models (<xref ref-type="table" rid="pone.0312289.t004">Table 4</xref>.)</p>
<table-wrap id="pone.0312289.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.t001</object-id>
<label>Table 1</label> <caption><title>Participant demographics and work contexts.</title></caption>
<alternatives>
<graphic id="pone.0312289.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Participant Degrees</th>
<th align="right">#</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">PhD</td>
<td align="right">17</td>
</tr>
<tr>
<td align="left">MS/MA</td>
<td align="right">6</td>
</tr>
<tr>
<td align="left">MD</td>
<td align="right">5</td>
</tr>
<tr>
<td align="left">MPH</td>
<td align="right">2</td>
</tr>
<tr>
<td align="left">BSN</td>
<td align="right">2</td>
</tr>
<tr>
<td align="left">RN</td>
<td align="right">1</td>
</tr>
<tr>
<td align="left">JD</td>
<td align="right">1</td>
</tr>
<tr>
<td align="left" colspan="2"><bold>Sector/industry</bold></td>
</tr>
<tr>
<td align="left">Academic</td>
<td align="right">12</td>
</tr>
<tr>
<td align="left">Public</td>
<td align="right">9</td>
</tr>
<tr>
<td align="left">Academic professional</td>
<td align="right">8</td>
</tr>
<tr>
<td align="left">Pharma</td>
<td align="right">3</td>
</tr>
<tr>
<td align="left">Consulting</td>
<td align="right">4</td>
</tr>
<tr>
<td align="left" colspan="2"><bold>Discipline</bold></td>
</tr>
<tr>
<td align="left">Informatics</td>
<td align="right">22</td>
</tr>
<tr>
<td align="left">Clinical quality measurement</td>
<td align="right">8</td>
</tr>
<tr>
<td align="left">Health economics</td>
<td align="right">3</td>
</tr>
<tr>
<td align="left">Software, epidemiology, ontology</td>
<td align="right">1 each</td>
</tr>
<tr>
<td align="left" colspan="2"><bold>Studies conducted per year</bold></td>
</tr>
<tr>
<td align="left">0 to 1</td>
<td align="right">1</td>
</tr>
<tr>
<td align="left">1 to 5</td>
<td align="right">22</td>
</tr>
<tr>
<td align="left">More than 5</td>
<td align="right">13</td>
</tr>
<tr>
<td align="left" colspan="2"><bold>Team size (people)</bold></td>
</tr>
<tr>
<td align="left">1</td>
<td align="right">0</td>
</tr>
<tr>
<td align="left">2 to 5</td>
<td align="right">26</td>
</tr>
<tr>
<td align="left">5 to 10</td>
<td align="right">9</td>
</tr>
<tr>
<td align="left">More than 10</td>
<td align="right">1</td>
</tr>
<tr>
<td align="left" colspan="2"><bold>Team size (organizations)</bold></td>
</tr>
<tr>
<td align="left">1</td>
<td align="right">10</td>
</tr>
<tr>
<td align="left">2 to 5</td>
<td align="right">22</td>
</tr>
<tr>
<td align="left">More than 5</td>
<td align="right">5</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<table-wrap id="pone.0312289.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.t002</object-id>
<label>Table 2</label> <caption><title>Study or value set development roles played by participant and other team members.</title></caption>
<alternatives>
<graphic id="pone.0312289.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.t002" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Study roles</th>
<th align="right">Any team member</th>
<th align="right">Participant alone</th>
<th align="left">Participant with others</th>
<th align="left">Others</th>
<th align="left">No one</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Analyst</td>
<td align="right">35</td>
<td align="right">11</td>
<td align="right">26</td>
<td align="right">10</td>
<td align="right">1</td>
</tr>
<tr>
<td align="left">Programmer</td>
<td align="right">35</td>
<td align="right">10</td>
<td align="right">20</td>
<td align="right">15</td>
<td align="right">1</td>
</tr>
<tr>
<td align="left">Statistician</td>
<td align="right">33</td>
<td align="right">5</td>
<td align="right">14</td>
<td align="right">19</td>
<td align="right">1</td>
</tr>
<tr>
<td align="left">Clinical expert</td>
<td align="right">31</td>
<td align="right">3</td>
<td align="right">5</td>
<td align="right">26</td>
<td align="right">3</td>
</tr>
<tr>
<td align="left">Informaticist</td>
<td align="right">30</td>
<td align="right">11</td>
<td align="right">22</td>
<td align="right">7</td>
<td align="right">4</td>
</tr>
<tr>
<td align="left">Investigator</td>
<td align="right">30</td>
<td align="right">18</td>
<td align="right">22</td>
<td align="right">10</td>
<td align="right">1</td>
</tr>
<tr>
<td align="left">Epidemiologist</td>
<td align="right">25</td>
<td align="right">2</td>
<td align="right">7</td>
<td align="right">17</td>
<td align="right">6</td>
</tr>
<tr>
<td align="left">Terminologist</td>
<td align="right">20</td>
<td align="right">6</td>
<td align="right">13</td>
<td align="right">8</td>
<td align="right">13</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<table-wrap id="pone.0312289.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.t003</object-id>
<label>Table 3</label> <caption><title>Software tools, platforms, and repositories used in value set development and sharing.</title></caption>
<alternatives>
<graphic id="pone.0312289.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.t003" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Software/tools used</th>
<th align="left">#</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">R<break/>SQL database<break/>OHDSI<break/>SAS<break/>Custom<break/>Python<break/>Tableau<break/>EPIC<break/>i2b2<break/>Stata<break/>Other</td>
<td align="right">26</td>
</tr>
<tr>
<td align="left">SQL database<break/>ODSHIa</td>
<td align="right">24</td>
</tr>
<tr>
<td align="left">OHDSI/ATLAS<break/>asdfasdf</td>
<td align="right">17</td>
</tr>
<tr>
<td align="left">SAS</td>
<td align="right">13</td>
</tr>
<tr>
<td align="left">Python</td>
<td align="right">8</td>
</tr>
<tr>
<td align="left">Tableau</td>
<td align="right">7</td>
</tr>
<tr>
<td align="left">Epic</td>
<td align="right">6</td>
</tr>
<tr>
<td align="left">VSAC</td>
<td align="right">5</td>
</tr>
<tr>
<td align="left">i2b2</td>
<td align="right">4</td>
</tr>
<tr>
<td align="left">Other</td>
<td align="right">13</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<table-wrap id="pone.0312289.t004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.t004</object-id>
<label>Table 4</label> <caption><title>Vocabularies, vocabulary domains, and data models targeted by participants’ value sets.</title></caption>
<alternatives>
<graphic id="pone.0312289.t004g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.t004" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center">Vocabularies used</th>
<th align="center">#</th>
<th align="left">Data models used</th>
<th align="center">#</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">ICD10CM</td>
<td align="right">29</td>
<td align="left">OMOP</td>
<td align="right">18</td>
</tr>
<tr>
<td align="left">CPT</td>
<td align="right">27</td>
<td align="left">PCORNet</td>
<td align="right">9</td>
</tr>
<tr>
<td align="left">ICD9CM</td>
<td align="right">26</td>
<td align="left">Local system</td>
<td align="right">9</td>
</tr>
<tr>
<td align="left">LOINC</td>
<td align="right">26</td>
<td align="left">Claim forms</td>
<td align="right">5</td>
</tr>
<tr>
<td align="left">SNOMED-CT</td>
<td align="right">25</td>
<td align="left">i2b2</td>
<td align="right">4</td>
</tr>
<tr>
<td align="left">RxNorm</td>
<td align="right">24</td>
<td align="left">Other</td>
<td align="right">9</td>
</tr>
<tr>
<td align="left">HCPCS</td>
<td align="right">22</td>
<td align="left"><bold>Value set domains</bold></td>
<td align="right"><bold>#</bold></td>
</tr>
<tr>
<td align="left">NDC</td>
<td align="right">21</td>
<td align="left">Conditions</td>
<td align="right">34</td>
</tr>
<tr>
<td align="left">OHDSI/OMOP</td>
<td align="right">14</td>
<td align="left">Procedures</td>
<td align="right">30</td>
</tr>
<tr>
<td align="left">UMLS</td>
<td align="right">13</td>
<td align="left">Medications</td>
<td align="right">29</td>
</tr>
<tr>
<td align="left">MedDRA</td>
<td align="right">8</td>
<td align="left">Lab tests</td>
<td align="right">28</td>
</tr>
<tr>
<td align="left">PCORNet</td>
<td align="right">8</td>
<td align="left">Other</td>
<td align="right">9</td>
</tr>
<tr>
<td align="left">FDB</td>
<td align="right">5</td>
</tr>
<tr>
<td align="left">Other</td>
<td align="right">19</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>Of the nine most common tools our respondents reported using for value set development listed in <xref ref-type="table" rid="pone.0312289.t003">Table 3</xref>, several are specifically designed for clinical or clinical research applications, providing support for authoring, sharing, and using value sets. The others are general programming and analysis tools with which value sets can be composed, evaluated, and used by linking to database resources containing vocabulary and patient information.</p>
</sec>
<sec id="sec009">
<title>3.2 Diversity of value set development practices</title>
<p>Value sets and processes for developing them vary in many critical ways. The effectiveness of a given value set development process and the accuracy of the value set it produces depend as much on the thoroughness with which methods are applied as on the selection of those methods.</p>
<p>A particularly important factor shaping value set development practice is whether the value set is being developed for a single project, for use across multiple known projects, or for sharing and reuse in unknown future projects. Literature cited in the introduction [<xref ref-type="bibr" rid="pone.0312289.ref007">7</xref>, <xref ref-type="bibr" rid="pone.0312289.ref008">8</xref>, <xref ref-type="bibr" rid="pone.0312289.ref014">14</xref>, <xref ref-type="bibr" rid="pone.0312289.ref016">16</xref>–<xref ref-type="bibr" rid="pone.0312289.ref018">18</xref>, <xref ref-type="bibr" rid="pone.0312289.ref020">20</xref>, <xref ref-type="bibr" rid="pone.0312289.ref023">23</xref>–<xref ref-type="bibr" rid="pone.0312289.ref028">28</xref>] asserts the importance of reuse in addressing problems with value set quality. We suspected at the outset of this study that reuse was uncommon, as there is considerable evidence [<xref ref-type="bibr" rid="pone.0312289.ref009">9</xref>] that reuse is fraught with difficulties and that repositories accumulate many value sets ostensibly representing the same clinical phenomenon. Our field data (see <xref ref-type="table" rid="pone.0312289.t005">Table 5</xref>) show 30 (83%) of our respondents reuse value sets made by others and 20 (55%) use repositories to find value sets for reuse. Many participants mentioned sharing value sets to public or private repositories—about a third to the Observational Health Data Sciences and Informatics (OHDSI) ATLAS web interface [<xref ref-type="bibr" rid="pone.0312289.ref035">35</xref>], a third to the Value Set Authority Center (VSAC) [<xref ref-type="bibr" rid="pone.0312289.ref031">31</xref>], and several to other repositories, publications, and research networks.</p>
<table-wrap id="pone.0312289.t005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.t005</object-id>
<label>Table 5</label> <caption><title>Value set-related tasks performed by survey respondents or their team members.</title></caption>
<alternatives>
<graphic id="pone.0312289.t005g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.t005" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Value set-related tasks</th>
<th align="center">#</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><bold>Create value sets</bold></td>
<td align="right"/>
</tr>
<tr>
<td align="left">Create value sets for local use</td>
<td align="right">28</td>
</tr>
<tr>
<td align="left">Create value sets for use by others</td>
<td align="right">27</td>
</tr>
<tr>
<td align="left">Vocabulary search</td>
<td align="right">15</td>
</tr>
<tr>
<td align="left">Vocabulary navigation</td>
<td align="right">16</td>
</tr>
<tr>
<td align="left">Consult clinical or terminology experts</td>
<td align="right">16</td>
</tr>
<tr>
<td align="left">Optimize value set to more parsimonious expression by replacing codes with intensional rules where possible.</td>
<td align="right">5</td>
</tr>
<tr>
<td align="left">Translate value sets across terminologies</td>
<td align="right">21</td>
</tr>
<tr>
<td align="left">Add value sets to repositories</td>
<td align="right">24</td>
</tr>
<tr>
<td align="left"><bold>Use existing value sets</bold></td>
<td align="right"/>
</tr>
<tr>
<td align="left">Use value sets created by others</td>
<td align="right">30</td>
</tr>
<tr>
<td align="left">Use value sets from repositories</td>
<td align="right">20</td>
</tr>
<tr>
<td align="left">Use value sets from publications</td>
<td align="right">8</td>
</tr>
<tr>
<td align="left">Manual or automated comparison with existing value sets</td>
<td align="right">29</td>
</tr>
<tr>
<td align="left"><bold>Evaluation</bold></td>
<td align="right"/>
</tr>
<tr>
<td align="left">Evaluate value sets (code-by-code or at the set level)</td>
<td align="right">26</td>
</tr>
<tr>
<td align="left">Approval for use by subject matter experts or other decision makers.</td>
<td align="right">15</td>
</tr>
<tr>
<td align="left">Review by terminology and clinical experts.</td>
<td align="right">18</td>
</tr>
<tr>
<td align="left"><bold>Empirical valuation</bold></td>
<td align="right"/>
</tr>
<tr>
<td align="left">Examine frequently occurring codes in patients with phenomenon for possible inclusion.</td>
<td align="right">2</td>
</tr>
<tr>
<td align="left">Identify false negatives if a reference standard is available.</td>
<td align="right">7</td>
</tr>
<tr>
<td align="left">Identify false positives through chart review of matching patients.</td>
<td align="right">12</td>
</tr>
<tr>
<td align="left">If codes are semantically appropriate but absent in the intended data, they may be discarded as irrelevant or included for the benefit of future use. (Precalculated term usage counts sufficient.)</td>
<td align="right">2</td>
</tr>
<tr>
<td align="left">If prevalence of the target clinical condition is known, a significant discrepancy between prevalence and phenotype/value set result counts may be taken by developers to mean the value set requires further work before release. (Requires patient-level.)</td>
<td align="right">2</td>
</tr>
<tr>
<td align="left">Inspect patient and record counts.</td>
<td align="right">15</td>
</tr>
<tr>
<td align="left">Review patients matched by code(s) to confirm phenomenon.</td>
<td align="right">11</td>
</tr>
<tr>
<td align="left">Sanity check and review the count of patients matched by the whole value set—preferably by executing its containing phenotype algorithm. (Requires access to patient-level detail to generate value set counts.)</td>
<td align="right">13</td>
</tr>
<tr>
<td align="left">Sensitivity analysis of changes in results caused by modification of the value set.</td>
<td align="right">4</td>
</tr>
<tr>
<td align="left">Test value set on patient data, presumably in context of phenotype algorithm.</td>
<td align="right">13</td>
</tr>
<tr>
<td align="left"><bold>Other</bold></td>
<td align="right"/>
</tr>
<tr>
<td align="left">If reference standard was used, report how it was made.</td>
<td align="right">2</td>
</tr>
<tr>
<td align="left">Report description and justification for validation methods used and any resulting statistics.</td>
<td align="right">3</td>
</tr>
<tr>
<td align="left">Informatics, standards, or infrastructure work related to value sets</td>
<td align="right">22</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
<sec id="sec010">
<title>3.3 Permissible values versus analytic value sets</title>
<p>We distinguish two general types of value set based on the way they are used: <italic>permissible value sets</italic>, used for capturing clinical data in patient records, specifying code systems and code system values that can be entered into a particular data element. The items in a permissible value set might be presented to the user as a dropdown list or typeahead field, serving both to prompt the user with the allowable selection of values and prohibit entry of values not included in the set. <italic>Analytic value sets</italic>, on the other hand, are used in the analysis or querying of existing patient records to select those that are indicative of a clinical observation or event of interest where that phenomenon might have been captured using any of a number of codes. (In other contexts, such as data harmonization and clinical quality measures, value sets are used in more ambiguous ways that have both permissible and analytic qualities.) There are other use cases for value sets (see <xref ref-type="table" rid="pone.0312289.t006">Table 6</xref>), but the differences between these two contexts (data capture and RWD analysis) will show why the distinction is needed.</p>
<table-wrap id="pone.0312289.t006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.t006</object-id>
<label>Table 6</label> <caption><title>Contexts for value set development.</title></caption>
<alternatives>
<graphic id="pone.0312289.t006g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.t006" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Context</th>
<th align="center">Reuse repositories</th>
<th align="center">Permissible / analytic</th>
<th align="center">Prescriptive / descriptive</th>
<th align="center">Semantic / empirical evaluation</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Value sets for data capture</td>
<td align="center">VSAC</td>
<td align="center">Permissible</td>
<td align="center">Prescriptive</td>
<td align="center">Semantic</td>
</tr>
<tr>
<td align="left">Other value sets for terminology services (FHIR, CTS2, etc.)</td>
<td align="center">VSAC</td>
<td align="center">Permissible</td>
<td align="center">Prescriptive</td>
<td align="center">Semantic</td>
</tr>
<tr>
<td align="left">Clinical quality measures</td>
<td align="center">VSAC</td>
<td align="center">Both</td>
<td align="center">Both</td>
<td align="center">Mostly semantic</td>
</tr>
<tr>
<td align="left">Single study, single database</td>
<td align="center"/>
<td align="center">Analytic</td>
<td align="center">Descriptive</td>
<td align="center">Both—but empirical is vital and possible</td>
</tr>
<tr>
<td align="left">Network study or multiple related studies</td>
<td align="center">ATLAS, N3C</td>
<td align="center">Analytic</td>
<td align="center">Descriptive</td>
<td align="center">Both</td>
</tr>
<tr>
<td align="left">For analytic reuse but not for a specific study, database, or question</td>
<td align="center">N3C</td>
<td align="center">Analytic</td>
<td align="center">Descriptive</td>
<td align="center">Mostly semantic</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>The distinction is not about the digital structure of value sets or their definitions, but about the ways they are used and the practices appropriate to the development and validation of each type. While the distinction is not generally made in the literature or in value set repositories, our findings cannot be understood without drawing it. The HL7 definitions and other discussions of value sets tend to imply permissible as their archetypal use case [<xref ref-type="bibr" rid="pone.0312289.ref018">18</xref>, <xref ref-type="bibr" rid="pone.0312289.ref023">23</xref>, <xref ref-type="bibr" rid="pone.0312289.ref024">24</xref>, <xref ref-type="bibr" rid="pone.0312289.ref026">26</xref>, <xref ref-type="bibr" rid="pone.0312289.ref027">27</xref>, <xref ref-type="bibr" rid="pone.0312289.ref030">30</xref>, <xref ref-type="bibr" rid="pone.0312289.ref031">31</xref>, <xref ref-type="bibr" rid="pone.0312289.ref036">36</xref>–<xref ref-type="bibr" rid="pone.0312289.ref043">43</xref>]. While this paper covers both types, analytic are our primary focus [<xref ref-type="bibr" rid="pone.0312289.ref017">17</xref>, <xref ref-type="bibr" rid="pone.0312289.ref002">2</xref>–<xref ref-type="bibr" rid="pone.0312289.ref004">4</xref>, <xref ref-type="bibr" rid="pone.0312289.ref009">9</xref>, <xref ref-type="bibr" rid="pone.0312289.ref013">13</xref>, <xref ref-type="bibr" rid="pone.0312289.ref032">32</xref>, <xref ref-type="bibr" rid="pone.0312289.ref044">44</xref>, <xref ref-type="bibr" rid="pone.0312289.ref045">45</xref>], and a central claim we make is that analytic value sets necessitate different methods and tools to author, validate, share, and reuse value sets.</p>
<p><xref ref-type="fig" rid="pone.0312289.g001">Fig 1</xref> shows permissible value sets in context: a clinical data management system includes screens or forms, each of which will include data elements for capturing clinical phenomena like diagnoses, observations, and treatments. Data elements are defined in part by the values they are allowed to take. Specific screens and data elements in EHR, clinical trial, or registry applications may be focused on particular clinical phenomena such as diabetes complications or hypertension medications. A permissible value set then provides a list of subcategories or instances—e.g., cardiomyopathy or retinopathy, etc. for a diabetes complications data element—to populate dropdowns and constrain data element values.</p>
<fig id="pone.0312289.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Permissible value sets in context.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.g001" xlink:type="simple"/>
</fig>
<p>In RWD studies, analytic value sets are used in the definition and identification of clinical phenomena of interest, representing study variables such as exposure and comparator cohorts, treatment or exposure criteria, process and outcome, covariates, confounders, etc. [<xref ref-type="bibr" rid="pone.0312289.ref003">3</xref>] The algorithmic components that identify specific clinical phenomena in the data may be called electronic phenotypes, phenotype algorithms, cohort definitions, or just variables; this paper mostly refers to them as “phenotypes.” Phenotype algorithms may use various types of data (narrative notes, images, EKG or other device output, etc.), but insofar as terminology codes are used in the algorithm, a phenotype will include one or more value sets as diagrammed in <xref ref-type="fig" rid="pone.0312289.g002">Fig 2</xref> and may use temporal and conditional logic in performing set operations on the groups of patient records matched by different value sets. However, phenotypes can also be as simple as a single value set, the algorithm consisting of nothing but the selection of patient records containing one of the codes in that set. (See, e.g., “Finding Existing Phenotype Definitions” in the phenotyping chapter of the online textbook, Rethinking Clinical Trials [<xref ref-type="bibr" rid="pone.0312289.ref005">5</xref>] which lists value set repositories alongside repositories of more complex algorithms as sources of reusable electronic phenotypes).</p>
<fig id="pone.0312289.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Analytic value sets in context.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.g002" xlink:type="simple"/>
</fig>
<p>Permissible value sets are generally developed for specific clinical data systems, often for a single institution. Analytic value sets are usually designed to reflect nuances of a particular research question, such as a need for sensitivity or specificity or the need for study-specific exclusion criteria.</p>
</sec>
<sec id="sec011">
<title>3.4 Prescriptive and descriptive perspectives on value sets</title>
<p>Distilling and analyzing the catalogue of value set development practices in <xref ref-type="table" rid="pone.0312289.t005">Table 5</xref> —how value sets are made, used, evaluated, and reused—led us to re-evaluate the literature around value sets, observing that it falls into three research focus areas, which often do not seem to be in dialog with each other:</p>
<list list-type="order">
<list-item><p>On permissible value sets for permissible values or clinical quality measures (CQM) [<xref ref-type="bibr" rid="pone.0312289.ref018">18</xref>, <xref ref-type="bibr" rid="pone.0312289.ref023">23</xref>–<xref ref-type="bibr" rid="pone.0312289.ref027">27</xref>, <xref ref-type="bibr" rid="pone.0312289.ref030">30</xref>, <xref ref-type="bibr" rid="pone.0312289.ref031">31</xref>, <xref ref-type="bibr" rid="pone.0312289.ref036">36</xref>–<xref ref-type="bibr" rid="pone.0312289.ref043">43</xref>]; Although value sets are not used as permissible value constraints in CQMs, the value set literature does not treat them any differently and does not address issues around aligning them with patient data—perhaps because CQM use by health care organizations is often required by payers, so alignment of value set and patient data is the responsibility of provider organizations, not value set developers.)</p></list-item>
<list-item><p>On phenotypes (cohort selection) for RWD research [<xref ref-type="bibr" rid="pone.0312289.ref002">2</xref>, <xref ref-type="bibr" rid="pone.0312289.ref003">3</xref>, <xref ref-type="bibr" rid="pone.0312289.ref013">13</xref>, <xref ref-type="bibr" rid="pone.0312289.ref044">44</xref>, <xref ref-type="bibr" rid="pone.0312289.ref045">45</xref>]; and</p></list-item>
<list-item><p>On analytic value sets (often called code sets) for use in phenotyping applications [<xref ref-type="bibr" rid="pone.0312289.ref004">4</xref>, <xref ref-type="bibr" rid="pone.0312289.ref009">9</xref>, <xref ref-type="bibr" rid="pone.0312289.ref017">17</xref>, <xref ref-type="bibr" rid="pone.0312289.ref019">19</xref>, <xref ref-type="bibr" rid="pone.0312289.ref032">32</xref>].</p></list-item>
</list>
<p>The computable phenotype literature sometimes conflates phenotypes and analytic value sets. Where it does discuss reuse, the focus is on phenotype rather than value set reuse. While some in this group do not believe that sharing value sets separately from phenotypes is worthwhile, we have seen that reuse does occur and there is demand for value sets that can be used across phenotypes. The permissible value set and value set literatures both tend to focus on value set repositories and reuse.</p>
<p>The permissible value set literature is not concerned with adapting value sets to specific clinical databases and looks to expert review or published sources of authority for value set validation. The phenotype literature, on the other hand, evaluates value set correctness primarily through empirical analysis with clinical data. The analytic value set literature falls somewhere in between. (The current paper shares more in common with this third group than the others, but it draws on all three. Those who identify, explicitly or implicitly, with one of these groups will benefit from being also informed by the others).</p>
<p>Whether one considers expert authority or empirical analysis as the primary means of value set evaluation can reflect almost ideological beliefs about the nature of value sets and medical vocabulary use generally. Outside the domain of medicine and controlled medical vocabularies, in lexicographical or grammatical terms, the dichotomy between <italic>prescriptivist</italic> views and <italic>descriptivist</italic> views is well-known. For prescriptivists, dictionary entries and grammatical rules define how words and language <italic>should be</italic> used; proper language should conform with such rules and definitions. For descriptivists, dictionaries and grammars are attempts to capture a snapshot of how words and language <italic>are</italic> used in a given time and milieu. Non-conformant usage patterns indicate that the rules are lacking, not that the usage is wrong.</p>
<p>While terminological prescriptivism in natural language is generally considered unscientific and pedantic [<xref ref-type="bibr" rid="pone.0312289.ref046">46</xref>], the imposition of prescriptive terminology is, of course, the foundational purpose of standardized medical vocabularies and arguably the foundational practice of medical informatics generally [<xref ref-type="bibr" rid="pone.0312289.ref047">47</xref>–<xref ref-type="bibr" rid="pone.0312289.ref050">50</xref>].</p>
<p>Exemplifying a prescriptivist orientation, Winnenburg, et al. 2013 suggests that a value set should be anchored in a single concept, consisting of that concept and its descendants. That view is rejected by the descriptivist perspective held by many RWD researchers, expressed in rather extreme terms by one survey respondent:</p>
<disp-quote>
<p>Code sets are always context specific. There is no such thing as diabetes in an RWD data source, there might be 50 definitions of diabetes and you have to pick the one that matches your question, data, and methods…We may spend months developing a code set for a specific question, iterating on different algorithms until the investigator is satisfied that the definition matches the needs of the study. (P04)</p>
</disp-quote>
<p>The following section distinguishes semantic from empirical techniques in value set development and validation. We have observed in a minority of our participants and in some of the literature a bias towards either semantic or empirical techniques that seems to override consideration of context and to reflect an implicit commitment to prescriptivist or descriptivist perspectives on controlled vocabulary use. That is, a person holding one of these views can find it difficult to see merit in the alternative. Overall, prescriptivist views should be appropriate for permissible <bold>value set</bold>s and data capture contexts; and descriptivist for analytic value sets and RWD research.</p>
</sec>
<sec id="sec012">
<title>3.5 Semantic versus empirical methods and resources</title>
<p>Williams 2017 alludes to a central tension in the choice of methods for value set validation [<xref ref-type="bibr" rid="pone.0312289.ref004">4</xref>]. On the one hand, a rigorous validation would be to compare patient selection results against a reference (“gold”) standard created through medical record abstraction (MRA.) On the other, creating such a standard can be prohibitively time-consuming and require data that may be challenging or impossible to obtain. In our data, evaluation fell into two major categories, which we label overall as semantic and empirical.</p>
<sec id="sec013">
<title>3.5.1 Semantic methods and evaluation by authority</title>
<p>We asked participants, “How do you verify that you have selected the best codes for representing a clinical concept in your analyses?” and received a range of answers. For many, confidence in their code selection came by reusing existing value sets from “previous[ly] published results” (P01), “examin[ing] the literature for validation studies” (P41), or “validated codesets when possible” (P04). (Value sets are available from value set repositories such as VSAC, ClinicalCodes [<xref ref-type="bibr" rid="pone.0312289.ref032">32</xref>], or the OHDSI/ATLAS or N3C concept set editors [<xref ref-type="bibr" rid="pone.0312289.ref035">35</xref>, <xref ref-type="bibr" rid="pone.0312289.ref051">51</xref>, <xref ref-type="bibr" rid="pone.0312289.ref052">52</xref>]); published papers that follow RECORD and other data-based observational study reporting guidelines [<xref ref-type="bibr" rid="pone.0312289.ref013">13</xref>, <xref ref-type="bibr" rid="pone.0312289.ref015">15</xref>]; previous projects available to the value set developer; and groupers such as Clinical Classifications Software Refined (CCSR) [<xref ref-type="bibr" rid="pone.0312289.ref053">53</xref>, <xref ref-type="bibr" rid="pone.0312289.ref054">54</xref>].</p>
<p>Other participants described an evaluation or validation process based on review by terminologists and clinicians (P19), clinical experts (P26), or “our coding panel, a group of experts that give us advice and feedback” (P21).</p>
</sec>
<sec id="sec014">
<title>3.5.2 Patient data and empirical evaluation</title>
<p>Many participants consider these semantic evaluation methods—reuse of existing concept sets and expert review—sufficient; others, however, hold that a value set for analytic use cannot be trusted without evaluating it or the phenotype or algorithm containing it through some form empirical review of clinical data:</p>
<disp-quote>
<p>Chart Review. Some internal checking of codes against expected lab results, vital measurements, patient histories, etc. Ex. Diabetes codes should associate with histories of certain blood glucose measurements or A1C. (P28)</p>
<p>First conduct discussion with clinical experts; Second, evaluate coverage of clinical concept in a data set; Third, perform random chart review to help detect if presence of code indicates disease. (P16)</p>
<p>Lexical search, semantic exploration (navigate OHDSI vocab), empirical assessment thru characterization, and clinical expert review. (P05)</p>
</disp-quote>
<p>Every one of our 36 survey respondents reported that their <italic>studies</italic> use patient data. Nevertheless, of the 32 who answered the open-ended validation question, only nine indicated using patient data during value set development and validation. Empirical evaluation can range from a check of patient or record counts for individual codes or the value set as a whole, to spot checking of matched patient records, to systematic validation against a reference standard of clinically reviewed records classified as positive or negative for the condition of interest. Participants who mentioned medical record abstraction and reference standards spoke about the impracticality of using them. A few mentioned spot checking. The most frequent use of empirical patient data was for informal sanity checking against aggregate counts.</p>
<p>Choice of evaluation methods can be guided by clinical nuances of the research question or how the value set will be used. According to one survey respondent:</p>
<disp-quote>
<p>Depends on the purpose and whether we are aiming for sensitivity or specificity. It may be chart review, or comparison with other <bold>value set</bold>s. (P34)</p>
</disp-quote>
<p>To unpack that statement a bit, a highly <italic>sensitive</italic> value set might be appropriate for instance for selecting patients to be screened for some condition where the goal is to capture as many patients as possible. When a sensitive value set is needed, comparison with existing value sets could help to make sure that appropriate codes are not missed. A highly <italic>specific</italic> value set may be suitable when recruiting patients for a clinical trial or when constructing the main cohort for an observational study, where false positives are costly. In this case, chart review of a sample of identified patients and value set/phenotype modifications will help.</p>
<p>Evaluating the accuracy of a sensitive value set requires a thorough semantic exploration to identify all codes that could indicate the condition of interest, while evaluating the accuracy of a specific value set should involve empirical examination of matching patient records to prevent false positives. (Software tools are available particularly to help value set developers discover codes related to the ones they start with: Term Sets [<xref ref-type="bibr" rid="pone.0312289.ref017">17</xref>] and PHenotype Observed Entity Baseline Endorsements (PHOEBE) [<xref ref-type="bibr" rid="pone.0312289.ref055">55</xref>]. PHOEBE functionality has recently been added to OHDSI’s ATLAS concept set editor).</p>
<p><xref ref-type="table" rid="pone.0312289.t006">Table 6</xref> lists general contexts in which value sets are used and relates them to the three literature categories listed above, to value set repositories that support them, and to the three conceptual dichotomies described in Sections 3.3, 3.4, 3.5: permissible/analytic, prescriptive/descriptive, and semantic/empirical.</p>
</sec>
</sec>
<sec id="sec015">
<title>3.6 A taxonomy of reasons for value sets to differ</title>
<p>While repositories make it possible to share and reuse value sets, clutter and redundancy can present serious challenges. For instance, a search for COPD (chronic obstructive pulmonary disease) on ATLAS (<ext-link ext-link-type="uri" xlink:href="https://atlas-demo.ohdsi.org/#/conceptsets" xlink:type="simple">https://atlas-demo.ohdsi.org/#/conceptsets</ext-link>) gives 56 results. While many of these are usefully distinguished by their titles (e.g., Stage III-IV COPD or Concomitant COPD), many are not. In this section we break down the reasons that value sets for (ostensibly) the same clinical concept may differ into three categories: semantic, empirical, and due to error. These are summarized in <xref ref-type="table" rid="pone.0312289.t007">Table 7</xref>.</p>
<table-wrap id="pone.0312289.t007" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0312289.t007</object-id>
<label>Table 7</label> <caption><title>Reasons for value sets with same name to differ in definition and composition.</title></caption>
<alternatives>
<graphic id="pone.0312289.t007g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0312289.t007" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Valid semantic</th>
<th align="left">Valid empirical</th>
<th align="left">Erroneous and arbitrary</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Clinical meaning or nuance<break/>Study requirements<break/>Terminologies and cross-terminology mappings<break/>Use of vocabularies lacking granularity for clinical concepts or requiring post-coordination</td>
<td align="left">Population characteristics<break/>Regional, institutional, or clinical specialization coding practices<break/>Institutional workflow</td>
<td align="left">Codes mistakenly left out<break/>Codes mistakenly included<break/>Codes included or not based on faulty or idiosyncratic reference standards</td>
</tr>
<tr>
<td align="center" colspan="2">Algorithmic context (use of other domains or correcting for false positives or negatives at the phenotype level)</td>
<td align="center"/>
</tr>
<tr>
<td align="center"/>
<td align="center" colspan="2">Arbitrary inclusion thresholds</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<sec id="sec016">
<title>3.6.1 Semantic reasons for value sets to differ</title>
<p>Two value sets may have the same name and refer to ostensibly the same condition or event but, on closer inspection, may differ in their meaning or how they are meant to be used.</p>
<p><italic>Clinical meaning or nuance</italic>. Researchers may differ in their understanding of a clinical concept, or, for instance, a diabetes value set for a cardiology study may require a different set of codes than an endocrinology study.</p>
<p><italic>Study requirements</italic>. Different value sets may share the same clinical meaning, but one study may need a more specific value set, another a more sensitive one.</p>
<p><italic>Vocabulary issues</italic>. Value sets for the same phenomenon will, of course, differ if they use different vocabularies (e.g., ICD10CM or SNOMED-CT for clinical findings; NDC, RxNorm, or ATC for drugs; CPT, HCPCS, or ICD10PCS for procedures.) There may be reasons to translate codes across vocabularies. (E.g., CDMs like OMOP may require translation or harmonization of codes in patient records to agreed-upon vocabularies.) Different strategies may be applied when using vocabularies that either lack granularity to express the concept of interest or use post-coordination to express it.</p>
<p><italic>Algorithmic context</italic>. Different strategies for identifying a clinical phenomenon can lead to differences in value set composition. (This not strictly a semantic issue but a fact of value set use).</p>
<list list-type="bullet">
<list-item><p>A value set designed to target a diagnostic condition might use evidence from other domains of clinical data, e.g., drugs, procedures, lab tests. (It is recognized in the literature that phenotypes benefit from the use of multi-modal, multi-domain data [<xref ref-type="bibr" rid="pone.0312289.ref056">56</xref>].)</p></list-item>
<list-item><p>A value set may be designed in the knowledge that it will produce false positives or negatives if these will be corrected by logic or other value sets at the phenotype.</p></list-item>
</list>
</sec>
<sec id="sec017">
<title>3.6.2 Empirical reasons for value sets to differ</title>
<p>Differences in the datasets being analyzed may affect the codes used to represent some conditions.</p>
<p><italic>Population characteristics</italic>. E.g., codes may differ for studies of children less than 10 years of age versus geriatric populations; a study of neuropathy in orthopedic surgery patients would not need to include codes for diabetic neuropathy in the pediatric population.</p>
<p><italic>Regional</italic>, <italic>institutional</italic>, <italic>or clinical specialization coding practices</italic>. A single meaning may be expressed using different codes in different places. (This possibility was mentioned by our participants and seems to be a relatively common belief, but we have not encountered specific examples).</p>
<p><italic>Institutional workflow at data source</italic>. Certain conditions or observations may not be captured in some clinical settings requiring recourse to indirect ways of identifying them in EHRs.</p>
<p><italic>Arbitrary inclusion thresholds</italic>. Some codes may give rise to false positives when included and false negatives when left out. If researchers are not able to resolve this kind of problem at the phenotype algorithm level, they will need to make judgment calls depending on whether they think the false negatives or false positives caused by a given code’s presence or absence will more adversely affect the study’s results. Differences in judgment do not mean one decision is right and another wrong, but, unless the judgment call is justified with specific reasoning, value sets for a given phenomenon can differ without giving potential re-users any basis for choosing between them.</p>
</sec>
<sec id="sec018">
<title>3.6.3 Errors</title>
<p>Crafting accurate value sets is hard and mistakes are not uncommon. Discrepancies between value sets provide an opportunity to discover mistakes that might otherwise be overlooked.</p>
<p><italic>Codes mistakenly left out or mistakenly included</italic>. When value sets are missing codes they should include, they can cause false negatives in patient or event selection; codes included in error can introduce false positives in selection. Without a reference standard to test results (of a value set or its containing cohort algorithm) against a sample of records already reliably classified as exhibiting or not exhibiting the clinical phenomenon of interest, false positives and negatives in selection results may entirely escape detection.</p>
<p><italic>Codes included or not based on faulty or idiosyncratic reference standards</italic>. Reference standards themselves can suffer from error. Decisions by a chart reviewer on which patients match a phenotype or cohort definition can be affected by differences in understanding that are not quite matters of clinical judgment or study needs but differences in chart reading practice, differences in the chart reviewers’ interpretation of study needs, or chart reviewer error. But if the error or discrepancy affects the standard, a value set or its containing phenotype may show perfect sensitivity (low false negative rate) and specificity (low false positive rate) while differing from a value set based on another gold standard.</p>
<p>Errors can lead to bias in results, whose magnitude and direction are not predictable, but legitimate differences in value sets can be recognized if their reasons are known. Value set analysis and authoring software can be better designed to help re-users understand these differences, giving them a basis for deciding between existing value sets or selecting the elements from each most appropriate for their own use case.</p>
</sec>
</sec>
<sec id="sec019">
<title>3.7 Limitations</title>
<p>We attempted reach a diverse set of professionals who work with value sets, especially but not limited to those working in RWD research. Our questions did not go as deeply into reuse and validation as they might have because we needed to allow diversity in experience and background. It would be valuable for future work to both broaden and narrow the target population. In terms of broadening: 1) a follow-up study should try to recruit participants who validate value sets and electronic phenotypes against MRA-based gold standards, and 2) future scholarship may be able to identify groups of value set developers and users missed in this study. In terms of narrowing, a study that targeted only RWD researchers could focus more effectively on reuse, validation, and particular methods for improving value set quality.</p>
</sec>
<sec id="sec020">
<title>3.8 Conclusion: Leveraging and mitigating redundancy in value set repositories</title>
<p>Value set reuse is frequently championed as a response to persistent concerns about value set quality: not only should researchers make use of expertly designed value sets, value set repositories should facilitate incremental refinement; over time the quality of a shared value set should improve as more researchers put it to use, evaluate its accuracy, and contribute their changes back to the repository.</p>
<disp-quote>
<p>[Reusable value sets] would be helpful [so that] I don’t have to do this on my own every time…[B]ecause it has been created by a collaborative team that’s known for creating value sets, I would know that, "Oh, this has been extracted or they got it from a paper that has been vetted and validated and you know it’s a legit paper." I would use that. (P09, interview)</p>
</disp-quote>
<p>Rather than incremental improvement of existing value sets or indications of a value set’s having been vetted and validated, what we see in repositories is proliferation and clutter: new value sets that may or may not have been vetted in any way and junk concept sets, created for some reason but never finished. We have found general agreement in our data that the presence of many alternative value sets for a given condition often leads value set developers to ignore all of them and start from scratch, as there is generally no easy way to tell which will be more appropriate for the researcher’s needs. And if they share their value set back to the repository (as they must on analysis platforms like ATLAS or N3C), they further compound the problem, especially if they neglect to document the new value set’s intention and provenance.</p>
<p>There is a tension regarding how many value sets should exist for a given clinical condition. On the one hand, the principle of reproducibility of research and fungibility of research results—whether results from different studies may be pooled—argues for re-use of value sets. On the other hand, tight coherence with the research question—“fitness for use”—argues for customizing a unique value set to fit the research intent. Given this tension, it is no surprise that respondents expressed a variety of beliefs on each side of this dialectic.</p>
<p>If, as a field, we hope to increase reuse and refinement to decrease redundant value set creation, we must be able to understand when an additional value set for a target condition may be needed or not. The taxonomy in Section 3.6 may help in reconciling differences when multiple value sets are being reviewed or considered for reuse: if the analyst can identify a valid reason for a difference, this may give them insight to inform choices for their own use case or may help them determine where errors lie, increasing or decreasing their confidence in specific value sets or codes.</p>
<p>When a new researcher creates their own value set from scratch rather than leveraging the work of those who have tread the same ground, however, this should not be seen as laziness or as a problem to be addressed by exhortations to reuse existing value sets. Rather, the fault should be ascribed to the resources available to them: they should be given software and metadata to make the review and comparison of existing value set easier than creation from scratch. Practical application of the taxonomy and other ideas presented in this paper will require new software designed to implement these ideas and better guide value set developers through the process. Toward that end, we offer the following recommendations.</p>
<sec id="sec021">
<title>3.8.1 Advanced, automated comparison tools</title>
<p>In our professional experience we have seen instances where trust in what were considered authoritative value sets broke down when comparing them to other value sets. One participant, P16, performed an automated comparison of many alternative value sets for depression, using the differences and similarities to create a trustable value set without having to trust any of the input value sets individually.</p>
<p>Comparison functionality should be a central feature of value set repositories and authoring platforms, allowing users to take advantage of existing value sets rather than burdening them with having to manually sift through ostensibly redundant value sets. In the last couple years, tools for comparing value sets have begun to appear in ATLAS, the N3C Concept Set Browser, and VS-Hub [<xref ref-type="bibr" rid="pone.0312289.ref057">57</xref>]. VS-Hub explicitly nudges the user to compare related value sets and highlights the selected value sets’ similarities and differences throughout authoring and review.</p>
</sec>
<sec id="sec022">
<title>3.8.2 Detailed metadata collection and use</title>
<p>Existing tools vary in their collection of metadata through the authoring process, but however much metadata they collect, it is at the value set level; it could be enormously helpful to collect metadata to capture value set developers’ reasoning for including or rejecting specific codes. (FHIR and N3C accommodate relatively extensive set of metadata fields; VSAC somewhat less; and OHDSI/ATLAS hardly any at all. N3C, at SG’s suggestion, does request reasoning when adding codes to a value set, but this feature has not yet been developed to the point of being useful—nothing is currently done with users’ input.) A combination of automated process data capture and timely, minimally obtrusive user prompts could provide code-level metadata that could be displayed as future value set authors consider whether to include a code or not. An automated capture process could, for instance, record the source of included codes: if found in an existing value set, record a reference to that value set; if found through vocabulary text search and/or navigation of vocabulary hierarchy, record the steps leading to the included code. User prompts could try, for instance, to capture whether patient counts or any kind of chart review or gold standard had been used in decisions to include or reject specific codes.</p>
</sec>
<sec id="sec023">
<title>3.8.3 Expert or automated curation</title>
<p>Terminology experts on the N3C infrastructure staff have developed “N3C Recommended” value sets for commonly studied topics (conditions, medications, medication classes, measurements, procedures.) VS-Hub was specifically designed to facilitate that endeavor. VS-Hub, so far, has only attempted to make it easier to cull the best out of available value sets for a given condition or event, it has not attempted to force users to review relevant value sets and either improve one of those or make sure that a new one is genuinely needed. If we, as a field, hope to see value set repositories increase rather than decrease in quality and usefulness over time and widening use, strong curation will be necessary to exclude redundant, unfinished, or otherwise low-quality value sets. Such curation could be done by humans, software, or both.</p>
</sec>
<sec id="sec024">
<title>3.8.4 Improving value set authoring and repository platforms</title>
<p>It will be very difficult to curate existing value set repositories to eliminate all redundant or low-quality value sets and allow users to effectively understand the differences between meaningfully different value sets for similar topics, and it would be difficult even to maintain such standards if establishing a repository from scratch. Our taxonomy of reasons for similar value sets to differ will help in such efforts, but we believe the most direct approach to facilitating the development of high-quality, fit-for-use value sets is by helping users leverage the knowledge in existing value sets without encouraging them to pick a single existing value set for reuse. Rather, visualization for comparison of code inclusion across value sets with tools like VS-Hub will facilitate comprehension of numerous value sets at once. And, better than that, would be the capture and use of metadata documenting developers’ reasoning for including and excluding specific codes during their authoring process.</p>
<p>The requirements and recommendations in prior literature have not been sufficient to guide the design of software that could make effective leveraging of shared value sets a reality. However, the conceptual framework, real-world experience, and deep, detailed account of the challenges to reuse presented here make up that deficit and provide a high-level requirements roadmap for improved code-set creation tools.</p>
</sec>
</sec>
</sec>
</body>
<back>
<ack>
<p>Richard Williams, Jessica Ancker, Christopher Chute, Davera Gabriel, Harold Solbrig, Jeff Brown, Laura Wiley, Luke Rasmussen, Rachel Richesson, Allen Flynn, Meredith Zozus, David Gotz, Erica Voss, Christian Reich, Kristin Kosta, Shelley Rusincovitch, Niklas Elmqvist, Leilani Battle, Joel Chan, Amanda Lazar.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0312289.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Corrigan-Curay</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Sacks</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Woodcock</surname> <given-names>J.</given-names></name>, <article-title>Real-World Evidence and Real-World Data for Evaluating Drug Safety and Effectiveness</article-title>, <source>JAMA</source> <volume>320</volume> (<year>2018</year>) <fpage>867</fpage>–<lpage>868</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1001/jama.2018.10136" xlink:type="simple">10.1001/jama.2018.10136</ext-link></comment> <object-id pub-id-type="pmid">30105359</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pathak</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Kashyap</surname> <given-names>S.</given-names></name>, <name name-style="western"><surname>Basford</surname> <given-names>M.</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Masys</surname> <given-names>D.R.</given-names></name>, <name name-style="western"><surname>Chute</surname> <given-names>C.G.</given-names></name>, <article-title>Mapping clinical phenotype data elements to standardized metadata repositories and controlled terminologies: the eMERGE Network experience</article-title>, <source>Journal of the American Medical Informatics Association</source> <volume>18</volume> (<year>2011</year>) <fpage>376</fpage>–<lpage>386</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/amiajnl-2010-000061" xlink:type="simple">10.1136/amiajnl-2010-000061</ext-link></comment> <object-id pub-id-type="pmid">21597104</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hripcsak</surname> <given-names>G.</given-names></name>, <name name-style="western"><surname>Albers</surname> <given-names>D.J.</given-names></name>, <article-title>Next-generation phenotyping of electronic health records</article-title>, <source>J. Am. Med. Inform. Assoc</source>. <volume>20</volume> (<year>2013</year>) <fpage>117</fpage>–<lpage>121</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/amiajnl-2012-001145" xlink:type="simple">10.1136/amiajnl-2012-001145</ext-link></comment> <object-id pub-id-type="pmid">22955496</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Williams</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Kontopantelis</surname> <given-names>E.</given-names></name>, <name name-style="western"><surname>Buchan</surname> <given-names>I.</given-names></name>, <name name-style="western"><surname>Peek</surname> <given-names>N.</given-names></name>, <article-title>Clinical code set engineering for reusing EHR data for research: A review</article-title>, <source>Journal of Biomedical Informatics</source> <volume>70</volume> (<year>2017</year>) <fpage>1</fpage>–<lpage>13</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2017.04.010" xlink:type="simple">10.1016/j.jbi.2017.04.010</ext-link></comment> <object-id pub-id-type="pmid">28442434</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Richesson</surname> <given-names>R.L.</given-names></name>, <name name-style="western"><surname>Wiley</surname> <given-names>L.K.</given-names></name>, <name name-style="western"><surname>Gold</surname> <given-names>S.</given-names></name>, <name name-style="western"><surname>Rasmussen</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Luke</surname> <given-names>V.</given-names></name>, <source>Electronic Health Records-Based Phenotyping, Rethinking Clinical Trials: A Living Textbook of Pragmatic Clinical Trials</source> (<year>2020</year>). <ext-link ext-link-type="uri" xlink:href="https://rethinkingclinicaltrials.org/chapters/conduct/electronic-health-records-based-phenotyping/electronic-health-records-based-phenotyping-introduction/" xlink:type="simple">https://rethinkingclinicaltrials.org/chapters/conduct/electronic-health-records-based-phenotyping/electronic-health-records-based-phenotyping-introduction/</ext-link> (accessed July <volume>14</volume>, <fpage>2020</fpage>).</mixed-citation></ref>
<ref id="pone.0312289.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Davé</surname> <given-names>S.</given-names></name>, <name name-style="western"><surname>Petersen</surname> <given-names>I.</given-names></name>, <article-title>Creating medical and drug code lists to identify cases in primary care databases: CREATING MEDICAL AND DRUG CODE LISTS USING STATA, Pharmacoepidem</article-title>. <source>Drug Safe</source>. <volume>18</volume> (<year>2009</year>) <fpage>704</fpage>–<lpage>707</lpage>. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/pds.1770" xlink:type="simple">https://doi.org/10.1002/pds.1770</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Richesson</surname> <given-names>R.L.</given-names></name>, <name name-style="western"><surname>Rusincovitch</surname> <given-names>S.A.</given-names></name>, <name name-style="western"><surname>Wixted</surname> <given-names>D.</given-names></name>, <name name-style="western"><surname>Batch</surname> <given-names>B.C.</given-names></name>, <name name-style="western"><surname>Feinglos</surname> <given-names>M.N.</given-names></name>, <name name-style="western"><surname>Miranda</surname> <given-names>M.L.</given-names></name>, <name name-style="western"><surname>et al</surname> <given-names>A</given-names></name> <article-title>comparison of phenotype definitions for diabetes mellitus</article-title>, <source>J. Am. Med. Inform. Assoc</source>. <volume>20</volume> (<year>2013</year>) <fpage>e319</fpage>–<lpage>26</lpage>. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/amiajnl-2013-001952" xlink:type="simple">https://doi.org/10.1136/amiajnl-2013-001952</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lanes</surname> <given-names>S.</given-names></name>, <name name-style="western"><surname>Brown</surname> <given-names>J.S.</given-names></name>, <name name-style="western"><surname>Haynes</surname> <given-names>K.</given-names></name>, <name name-style="western"><surname>Pollack</surname> <given-names>M.F.</given-names></name>, <name name-style="western"><surname>Walker</surname> <given-names>A.M.</given-names></name>, <article-title>Identifying health outcomes in healthcare databases</article-title>, <source>Pharmacoepidemiol Drug Saf</source> <volume>24</volume> (<year>2015</year>) <fpage>1009</fpage>–<lpage>1016</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/pds.3856" xlink:type="simple">10.1002/pds.3856</ext-link></comment> <object-id pub-id-type="pmid">26282185</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gold</surname> <given-names>S.</given-names></name>, <name name-style="western"><surname>Batch</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>McClure</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Jiang</surname> <given-names>G.</given-names></name>, <name name-style="western"><surname>Kharrazi</surname> <given-names>H.</given-names></name>, <name name-style="western"><surname>Saripalle</surname> <given-names>R.</given-names></name>, <etal>et al</etal>, <article-title>Clinical Concept Value Sets and Interoperability in Health Data Analytics</article-title>, <source>AMIA Annu Symp Proc</source> <volume>2018</volume> (<year>2018</year>) <fpage>480</fpage>–<lpage>489</lpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6371254/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6371254/</ext-link> (accessed March 11, 2019). <object-id pub-id-type="pmid">30815088</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gold</surname> <given-names>S.</given-names></name>, <name name-style="western"><surname>Lehmann</surname> <given-names>H.</given-names></name>, <name name-style="western"><surname>Schilling</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Lutters</surname> <given-names>W.</given-names></name>, <source>Practices, norms, and aspirations regarding the construction, validation, and reuse of code sets in the analysis of real-world data</source>, (<year>2021</year>) <volume>35</volume>. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/2021.10.14.21264917" xlink:type="simple">https://doi.org/10.1101/2021.10.14.21264917</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hripcsak</surname> <given-names>G.</given-names></name>, <name name-style="western"><surname>Albers</surname> <given-names>D.J.</given-names></name>, <article-title>High-fidelity phenotyping: richness and freedom from bias</article-title>, <source>Journal of the American Medical Informatics Association</source> <volume>25</volume> (<year>2018</year>) <fpage>289</fpage>–<lpage>294</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamia/ocx110" xlink:type="simple">10.1093/jamia/ocx110</ext-link></comment> <object-id pub-id-type="pmid">29040596</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ostropolets</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Zhang</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Hripcsak</surname> <given-names>G.</given-names></name>, <article-title>A scoping review of clinical decision support tools that generate new knowledge to support decision making in real time</article-title>, <source>Journal of the American Medical Informatics Association</source> <volume>27</volume> (<year>2020</year>) <fpage>1968</fpage>–<lpage>1976</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamia/ocaa200" xlink:type="simple">10.1093/jamia/ocaa200</ext-link></comment> <object-id pub-id-type="pmid">33120430</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Callahan</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Shah</surname> <given-names>N.H.</given-names></name>, <name name-style="western"><surname>Chen</surname> <given-names>J.H.</given-names></name>, <article-title>Research and Reporting Considerations for Observational Studies Using Electronic Health Record Data</article-title>, <source>Annals of Internal Medicine</source> <volume>172</volume> (<year>2020</year>) <fpage>S79</fpage>–<lpage>S84</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.7326/M19-0873" xlink:type="simple">10.7326/M19-0873</ext-link></comment> <object-id pub-id-type="pmid">32479175</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref014"><label>14</label><mixed-citation publication-type="other" xlink:type="simple">Vocabulary Work Group, HL7 Specification: Characteristics of a Formal Value Set Definition, Release 1, HL7 ANSI, 2019. <ext-link ext-link-type="uri" xlink:href="http://www.hl7.org/implement/standards/product_brief.cfm?product_id=437" xlink:type="simple">http://www.hl7.org/implement/standards/product_brief.cfm?product_id=437</ext-link> (accessed March 8, 2018).</mixed-citation></ref>
<ref id="pone.0312289.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Benchimol</surname> <given-names>E.I.</given-names></name>, <name name-style="western"><surname>Smeeth</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Guttmann</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Harron</surname> <given-names>K.</given-names></name>, <name name-style="western"><surname>Moher</surname> <given-names>D.</given-names></name>, <name name-style="western"><surname>Petersen</surname> <given-names>I.</given-names></name>, <etal>et al</etal>, <article-title>RECORD Working Committee, The REporting of studies Conducted using Observational Routinely-collected health Data (RECORD) Statement</article-title>, <source>PLOS Medicine</source> <volume>12</volume> (<year>2015</year>) <fpage>e1001885</fpage>. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pmed.1001885" xlink:type="simple">https://doi.org/10.1371/journal.pmed.1001885</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Alper</surname> <given-names>B.S.</given-names></name>, <name name-style="western"><surname>Flynn</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Bray</surname> <given-names>B.E.</given-names></name>, <name name-style="western"><surname>Conte</surname> <given-names>M.L.</given-names></name>, <name name-style="western"><surname>Eldredge</surname> <given-names>C.</given-names></name>, <name name-style="western"><surname>Gold</surname> <given-names>S.</given-names></name>, <etal>et al</etal>, <article-title>Categorizing metadata to help mobilize computable biomedical knowledge</article-title>, <source>Learning Health Systems</source> <volume>6</volume> (<year>2022</year>) <fpage>e10271</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/lrh2.10271" xlink:type="simple">10.1002/lrh2.10271</ext-link></comment> <object-id pub-id-type="pmid">35036552</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Williams</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Brown</surname> <given-names>B.</given-names></name>, <name name-style="western"><surname>Kontopantelis</surname> <given-names>E.</given-names></name>, <name name-style="western"><surname>van Staa</surname> <given-names>T.</given-names></name>, <name name-style="western"><surname>Peek</surname> <given-names>N.</given-names></name>, <article-title>Term sets: A transparent and reproducible representation of clinical code sets</article-title>, <source>PLoS ONE</source> <volume>14</volume> (<year>2019</year>) <fpage>e0212291</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0212291" xlink:type="simple">10.1371/journal.pone.0212291</ext-link></comment> <object-id pub-id-type="pmid">30763407</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Peterson</surname> <given-names>K.J.</given-names></name>, <name name-style="western"><surname>Jiang</surname> <given-names>G.</given-names></name>, <name name-style="western"><surname>Brue</surname> <given-names>S.M.</given-names></name>, <name name-style="western"><surname>Shen</surname> <given-names>F.</given-names></name>, <name name-style="western"><surname>Liu</surname> <given-names>H.</given-names></name>, <article-title>Mining Hierarchies and Similarity Clusters from Value Set Repositories</article-title>, <source>AMIA Annu Symp Proc</source> <year>2017</year> (<issue>2017</issue>) <fpage>1372</fpage>–<lpage>1381</lpage>. <object-id pub-id-type="pmid">29854206</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Watson</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Nicholson</surname> <given-names>B.D.</given-names></name>, <name name-style="western"><surname>Hamilton</surname> <given-names>W.</given-names></name>, <name name-style="western"><surname>Price</surname> <given-names>S.</given-names></name>, <article-title>Identifying clinical features in primary care electronic health record studies: methods for codelist development</article-title>, <source>BMJ Open</source> <volume>7</volume> (<year>2017</year>) <fpage>e019637</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/bmjopen-2017-019637" xlink:type="simple">10.1136/bmjopen-2017-019637</ext-link></comment> <object-id pub-id-type="pmid">29170293</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref020"><label>20</label><mixed-citation publication-type="other" xlink:type="simple">R. Williams, GetSet, (2019). <ext-link ext-link-type="uri" xlink:href="https://getset.ga/" xlink:type="simple">https://getset.ga/</ext-link> (accessed September 13, 2021).</mixed-citation></ref>
<ref id="pone.0312289.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Zhang</surname> <given-names>Y.</given-names></name>, <name name-style="western"><surname>Cai</surname> <given-names>T.</given-names></name>, <name name-style="western"><surname>Ahuja</surname> <given-names>Y.</given-names></name>, <name name-style="western"><surname>He</surname> <given-names>Z.</given-names></name>, <name name-style="western"><surname>Ho</surname> <given-names>Y.-L.</given-names></name>, <etal>et al</etal>, <article-title>Automated grouping of medical codes via multiview banded spectral clustering</article-title>, <source>Journal of Biomedical Informatics</source> <volume>100</volume> (<year>2019</year>) <fpage>103322</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2019.103322" xlink:type="simple">10.1016/j.jbi.2019.103322</ext-link></comment> <object-id pub-id-type="pmid">31672532</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ostropolets</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Hripcsak</surname> <given-names>G.</given-names></name>, <name name-style="western"><surname>Knoll</surname> <given-names>C.</given-names></name>, <name name-style="western"><surname>Ryan</surname> <given-names>P.</given-names></name>, <source>PHOEBE 2.0: selecting the right concept sets for the right patients using lexical, semantic, and data-driven recommendations, (n.d.)</source>.</mixed-citation></ref>
<ref id="pone.0312289.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Winnenburg</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Bodenreider</surname> <given-names>O.</given-names></name>, <article-title>Issues in creating and maintaining value sets for clinical quality measures</article-title>, <source>AMIA Annu. Symp. Proc</source>. <year>2012</year> (2012) <fpage>988</fpage>–<lpage>996</lpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pubmed/23304374" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pubmed/23304374</ext-link>. <object-id pub-id-type="pmid">23304374</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Winnenburg</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Bodenreider</surname> <given-names>O.</given-names></name>, <article-title>Metrics for assessing the quality of value sets in clinical quality measures</article-title>, <source>AMIA Annu. Symp. Proc</source>. <volume>2013</volume> (<year>2013</year>) <fpage>1497</fpage>–<lpage>1505</lpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pubmed/24551422" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pubmed/24551422</ext-link>. <object-id pub-id-type="pmid">24551422</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Winnenburg</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Rodriguez</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Callaghan</surname> <given-names>F.M.</given-names></name>, <name name-style="western"><surname>Sorbello</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Szarfman</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Bodenreider</surname> <given-names>O.</given-names></name>, <source>Aligning Pharmacologic Classes Between MeSH and ATC</source>, in: <year>2013</year>. <ext-link ext-link-type="uri" xlink:href="https://mor.nlm.nih.gov/pubs/pdf/2013-vdos-rw.pdf" xlink:type="simple">https://mor.nlm.nih.gov/pubs/pdf/2013-vdos-rw.pdf</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bahr</surname> <given-names>N.J.</given-names></name>, <name name-style="western"><surname>Nelson</surname> <given-names>S.D.</given-names></name>, <name name-style="western"><surname>Winnenburg</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Bodenreider</surname> <given-names>O.</given-names></name>, <article-title>Eliciting the Intension of Drug Value Sets–Principles and Quality Assurance Applications</article-title>, <source>Stud Health Technol Inform</source> <volume>245</volume> (<year>2017</year>) <fpage>843</fpage>–<lpage>847</lpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5881398/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5881398/</ext-link> (accessed May 14, 2020). <object-id pub-id-type="pmid">29295218</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cholan</surname> <given-names>R.A.</given-names></name>, <name name-style="western"><surname>Weiskopf</surname> <given-names>N.G.</given-names></name>, <name name-style="western"><surname>Rhoton</surname> <given-names>D.L.</given-names></name>, <name name-style="western"><surname>Colin</surname> <given-names>N.V.</given-names></name>, <name name-style="western"><surname>Ross</surname> <given-names>R.L.</given-names></name>, <name name-style="western"><surname>Marzullo</surname> <given-names>M.N.</given-names></name>, <etal>et al</etal>, <article-title>Specifications of Clinical Quality Measures and Value Set Vocabularies Shift Over Time: A Study of Change through Implementation Differences</article-title>, <source>AMIA Annu Symp Proc</source> <volume>2017</volume> (<year>2018</year>) <fpage>575</fpage>–<lpage>584</lpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5977609/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5977609/</ext-link> (accessed May 13, 2020). <object-id pub-id-type="pmid">29854122</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Margawati</surname> <given-names>D.</given-names></name>, <source>UNDERSTANDING THE VARIABILITY IN VALUE SETS: THE ROLE OF STEWARD</source>, <collab>Johns Hopkins University</collab>, <year>2019</year>.</mixed-citation></ref>
<ref id="pone.0312289.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Swerdel</surname> <given-names>J.N.</given-names></name>, <name name-style="western"><surname>Hripcsak</surname> <given-names>G.</given-names></name>, <name name-style="western"><surname>Ryan</surname> <given-names>P.B.</given-names></name>, <article-title>PheValuator: Development and evaluation of a phenotype algorithm evaluator</article-title>, <source>Journal of Biomedical Informatics</source> <volume>97</volume> (<year>2019</year>) <fpage>103258</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2019.103258" xlink:type="simple">10.1016/j.jbi.2019.103258</ext-link></comment> <object-id pub-id-type="pmid">31369862</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bodenreider</surname> <given-names>O.</given-names></name>, <name name-style="western"><surname>Nguyen</surname> <given-names>D.</given-names></name>, <name name-style="western"><surname>Chiang</surname> <given-names>P.</given-names></name>, <name name-style="western"><surname>Chuang</surname> <given-names>P.</given-names></name>, <name name-style="western"><surname>Madden</surname> <given-names>M.</given-names></name>, <name name-style="western"><surname>Winnenburg</surname> <given-names>R.</given-names></name>, <etal>et al</etal>, <article-title>The NLM Value Set Authority Center</article-title>, <source>Stud Health Technol Inform</source> <volume>192</volume> (<year>2013</year>) <fpage>1224</fpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4300102/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4300102/</ext-link> (accessed March 9, 2019). <object-id pub-id-type="pmid">23920998</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref031"><label>31</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Khatipov</surname> <given-names>E.</given-names></name>, <name name-style="western"><surname>Madden</surname> <given-names>M.</given-names></name>, <name name-style="western"><surname>Chiang</surname> <given-names>P.</given-names></name>, <name name-style="western"><surname>Chuang</surname> <given-names>P.</given-names></name>, <name name-style="western"><surname>Nguyen</surname> <given-names>D.M.</given-names></name>, <name name-style="western"><surname>D’Souza</surname> <given-names>I.</given-names></name>, <etal>et al</etal>, <article-title>Creating, Maintaining and Publishing Value Sets in the VSAC</article-title>, <source>in: AMIA</source>, <year>2014</year>.</mixed-citation></ref>
<ref id="pone.0312289.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Springate</surname> <given-names>D.A.</given-names></name>, <name name-style="western"><surname>Kontopantelis</surname> <given-names>E.</given-names></name>, <name name-style="western"><surname>Ashcroft</surname> <given-names>D.M.</given-names></name>, <name name-style="western"><surname>Olier</surname> <given-names>I.</given-names></name>, <name name-style="western"><surname>Parisi</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Chamapiwa</surname> <given-names>E.</given-names></name>, <etal>et al</etal>, <article-title>ClinicalCodes: An Online Clinical Codes Repository to Improve the Validity and Reproducibility of Research Using Electronic Medical Records</article-title>, <source>PLoS ONE</source> <volume>9</volume> (<year>2014</year>) <fpage>e99825</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0099825" xlink:type="simple">10.1371/journal.pone.0099825</ext-link></comment> <object-id pub-id-type="pmid">24941260</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref033"><label>33</label><mixed-citation publication-type="other" xlink:type="simple">D.A. Schön, The reflective practitioner: how professionals think in action, Basic Books, New York, 1983. <ext-link ext-link-type="uri" xlink:href="http://www.gbv.de/dms/bowker/toc/9780465068746.pdf" xlink:type="simple">http://www.gbv.de/dms/bowker/toc/9780465068746.pdf</ext-link> (accessed August 30, 2023).</mixed-citation></ref>
<ref id="pone.0312289.ref034"><label>34</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gold</surname> <given-names>S.</given-names></name>, <article-title>Value sets and the problem of redundancy in value set repositories</article-title>. <source>Survey data</source>, (<year>2024</year>). <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17605/OSF.IO/ABTJU" xlink:type="simple">https://doi.org/10.17605/OSF.IO/ABTJU</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref035"><label>35</label><mixed-citation publication-type="other" xlink:type="simple">OHDSI/ATLAS, (2020). <ext-link ext-link-type="uri" xlink:href="https://github.com/OHDSI/Atlas/wiki" xlink:type="simple">https://github.com/OHDSI/Atlas/wiki</ext-link> (accessed May 5, 2020).</mixed-citation></ref>
<ref id="pone.0312289.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pathak</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Jiang</surname> <given-names>G.</given-names></name>, <name name-style="western"><surname>Dwarkanath</surname> <given-names>S.O.</given-names></name>, <name name-style="western"><surname>Buntrock</surname> <given-names>J.D.</given-names></name>, <name name-style="western"><surname>Chute</surname> <given-names>C.G.</given-names></name>, <name name-style="western"><surname>Chute</surname> <given-names>C.</given-names></name>, <article-title>LexValueSets: an approach for context-driven value sets extraction</article-title>, <source>AMIA Annu Symp Proc</source> (<year>2008</year>) <fpage>556</fpage>–<lpage>560</lpage>. <object-id pub-id-type="pmid">18998955</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref037"><label>37</label><mixed-citation publication-type="other" xlink:type="simple">H. Solbrig, ISO 11179 CTS2 and Value Set Binding, (n.d.). <ext-link ext-link-type="uri" xlink:href="http://dbooth.org/2015/solbrig/FHIR_RDF_Solbrig.pdf" xlink:type="simple">http://dbooth.org/2015/solbrig/FHIR_RDF_Solbrig.pdf</ext-link> (accessed March 28, 2020).</mixed-citation></ref>
<ref id="pone.0312289.ref038"><label>38</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Jiang</surname> <given-names>G.</given-names></name>, <name name-style="western"><surname>Solbrig</surname> <given-names>H.R.</given-names></name>, <name name-style="western"><surname>Chute</surname> <given-names>C.G.</given-names></name>, <article-title>Quality evaluation of value sets from cancer study common data elements using the UMLS semantic groups</article-title>, <source>J. Am. Med. Inform. Assoc</source>. <volume>19</volume> (<year>2012</year>) <fpage>e129</fpage>–<lpage>36</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/amiajnl-2011-000739" xlink:type="simple">10.1136/amiajnl-2011-000739</ext-link></comment> <object-id pub-id-type="pmid">22511016</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref039"><label>39</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wu</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Finnell</surname> <given-names>J.T.</given-names></name>, <name name-style="western"><surname>Vreeman</surname> <given-names>D.J.</given-names></name>, <article-title>Evaluating Congruence Between Laboratory LOINC Value Sets for Quality Measures, Public Health Reporting, and Mapping Common Tests</article-title>, <source>AMIA Annu Symp Proc</source> <volume>2013</volume> (<year>2013</year>) <fpage>1525</fpage>–<lpage>1532</lpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3900163/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3900163/</ext-link> (accessed May 14, 2020). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2012.06.002" xlink:type="simple">10.1016/j.jbi.2012.06.002</ext-link></comment> <object-id pub-id-type="pmid">24551424</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref040"><label>40</label><mixed-citation publication-type="other" xlink:type="simple">HL7 Standards Product Brief ‐ HL7 Specification: Characteristics of a Formal Value Set Definition, Release 1, (2017). <ext-link ext-link-type="uri" xlink:href="http://www.hl7.org/implement/standards/product_brief.cfm?product_id=437" xlink:type="simple">http://www.hl7.org/implement/standards/product_brief.cfm?product_id=437</ext-link> (accessed March 8, 2018).</mixed-citation></ref>
<ref id="pone.0312289.ref041"><label>41</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Caragea</surname> <given-names>C.</given-names></name>, <name name-style="western"><surname>Honavar</surname> <given-names>V.</given-names></name>, <name name-style="western"><surname>Boncz</surname> <given-names>P.</given-names></name>, <name name-style="western"><surname>Boncz</surname> <given-names>P.</given-names></name>, <name name-style="western"><surname>Larson</surname> <given-names>P.-Å.</given-names></name>, <name name-style="western"><surname>Dietrich</surname> <given-names>S.W.</given-names></name>, <etal>et al</etal>, <chapter-title>Metadata Registry, ISO/IEC 11179</chapter-title>, in: <name name-style="western"><surname>Liu</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Özsu</surname> <given-names>M.T.</given-names></name> (Eds.), <source>Encyclopedia of Database Systems</source>, <publisher-name>Springer US</publisher-name>, <publisher-loc>Boston, MA</publisher-loc>, <year>2009</year>: pp. <fpage>1724</fpage>–<lpage>1727</lpage>. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/978-0-387-39940-9_907" xlink:type="simple">https://doi.org/10.1007/978-0-387-39940-9_907</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref042"><label>42</label><mixed-citation publication-type="other" xlink:type="simple">National Cancer Institute, caDSR and ISO 11179, Cancer Data Standards Registry and Repository (2016). <ext-link ext-link-type="uri" xlink:href="https://wiki.nci.nih.gov/display/caDSR/caDSR+and+ISO+11179" xlink:type="simple">https://wiki.nci.nih.gov/display/caDSR/caDSR+and+ISO+11179</ext-link> (accessed May 26, 2020).</mixed-citation></ref>
<ref id="pone.0312289.ref043"><label>43</label><mixed-citation publication-type="other" xlink:type="simple">FHIR Vocabulary Work Group, ValueSet ‐ FHIR v4.0.1, HL7 FHIR Release 4 (2019). <ext-link ext-link-type="uri" xlink:href="https://www.hl7.org/fhir/valueset.html" xlink:type="simple">https://www.hl7.org/fhir/valueset.html</ext-link> (accessed December 8, 2020).</mixed-citation></ref>
<ref id="pone.0312289.ref044"><label>44</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Newton</surname> <given-names>K.M.</given-names></name>, <name name-style="western"><surname>Peissig</surname> <given-names>P.L.</given-names></name>, <name name-style="western"><surname>Kho</surname> <given-names>A.N.</given-names></name>, <name name-style="western"><surname>Bielinski</surname> <given-names>S.J.</given-names></name>, <name name-style="western"><surname>Berg</surname> <given-names>R.L.</given-names></name>, <name name-style="western"><surname>Choudhary</surname> <given-names>V.</given-names></name>, <etal>et al</etal>, <article-title>Validation of electronic medical record-based phenotyping algorithms: results and lessons learned from the eMERGE network</article-title>, <source>J Am Med Inform Assoc</source> <volume>20</volume> (<year>2013</year>) <fpage>e147</fpage>–<lpage>e154</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/amiajnl-2012-000896" xlink:type="simple">10.1136/amiajnl-2012-000896</ext-link></comment> <object-id pub-id-type="pmid">23531748</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref045"><label>45</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Jing</surname> <given-names>X.</given-names></name>, <name name-style="western"><surname>Emerson</surname> <given-names>M.</given-names></name>, <name name-style="western"><surname>Masters</surname> <given-names>D.</given-names></name>, <name name-style="western"><surname>Brooks</surname> <given-names>M.</given-names></name>, <name name-style="western"><surname>Buskirk</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Abukamail</surname> <given-names>N.</given-names></name>, <name name-style="western"><surname>et al</surname> <given-names>A</given-names></name> <article-title>visual interactive analytic tool for filtering and summarizing large health data sets coded with hierarchical terminologies (VIADS)</article-title>, <source>BMC Medical Informatics and Decision Making</source> <volume>19</volume> (<year>2019</year>) <fpage>31</fpage>. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12911-019-0750-y" xlink:type="simple">https://doi.org/10.1186/s12911-019-0750-y</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref046"><label>46</label><mixed-citation publication-type="other" xlink:type="simple">A Word on “Descriptive” and “Prescriptive” Defining, <ext-link ext-link-type="uri" xlink:href="https://www.Merriam-Webster.Com/" xlink:type="simple">Https://Www.Merriam-Webster.Com/</ext-link> (n.d.). <ext-link ext-link-type="uri" xlink:href="https://www.merriam-webster.com/words-at-play/descriptive-vs-prescriptive-defining-lexicography" xlink:type="simple">https://www.merriam-webster.com/words-at-play/descriptive-vs-prescriptive-defining-lexicography</ext-link> (accessed August 21, 2021).</mixed-citation></ref>
<ref id="pone.0312289.ref047"><label>47</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chute</surname> <given-names>C.G.</given-names></name>, <article-title>The Copernican era of healthcare terminology: a re-centering of health information systems</article-title>, <source>Proc AMIA Symp</source> (<year>1998</year>) <fpage>68</fpage>–<lpage>73</lpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pubmed/9929184" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pubmed/9929184</ext-link>. <object-id pub-id-type="pmid">9929184</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref048"><label>48</label><mixed-citation publication-type="other" xlink:type="simple">H.D.L. Rosenberg HM, History of the statistical classification of diseases and causes of death, National Center for Health Statistics, Hyattsville, MD, 2011. <ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/nchs/data/misc/classification_diseases2011.pdf" xlink:type="simple">https://www.cdc.gov/nchs/data/misc/classification_diseases2011.pdf</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref049"><label>49</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Berg</surname> <given-names>M.</given-names></name>, <name name-style="western"><surname>Bowker</surname> <given-names>G.</given-names></name>, <article-title>The multiple bodies of the medical record</article-title>, <source>Sociol. Q</source>. (<year>1997</year>). <ext-link ext-link-type="uri" xlink:href="http://onlinelibrary.wiley.com/doi/10.1111/j.1533-8525.1997.tb00490.x/full" xlink:type="simple">http://onlinelibrary.wiley.com/doi/10.1111/j.1533-8525.1997.tb00490.x/full</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref050"><label>50</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bowker</surname> <given-names>G.C.</given-names></name>, <name name-style="western"><surname>Star</surname> <given-names>S.L.</given-names></name>, <source>Sorting Things Out</source>, (<year>2000</year>). <ext-link ext-link-type="uri" xlink:href="https://mitpress.mit.edu/books/sorting-things-out" xlink:type="simple">https://mitpress.mit.edu/books/sorting-things-out</ext-link> (accessed September <volume>26</volume>, <fpage>2017</fpage>).</mixed-citation></ref>
<ref id="pone.0312289.ref051"><label>51</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Haendel</surname> <given-names>M.A.</given-names></name>, <name name-style="western"><surname>Chute</surname> <given-names>C.G.</given-names></name>, <name name-style="western"><surname>Bennett</surname> <given-names>T.D.</given-names></name>, <name name-style="western"><surname>Eichmann</surname> <given-names>D.A.</given-names></name>, <name name-style="western"><surname>Guinney</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Kibbe</surname> <given-names>W.A.</given-names></name>, <etal>et al</etal>. <article-title>Gersing, the N3C Consortium, The National COVID Cohort Collaborative (N3C): Rationale, design, infrastructure, and deployment</article-title>, <source>Journal of the American Medical Informatics Association</source> <volume>28</volume> (<year>2021</year>) <fpage>427</fpage>–<lpage>443</lpage>. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamia/ocaa196" xlink:type="simple">https://doi.org/10.1093/jamia/ocaa196</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref052"><label>52</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pfaff</surname> <given-names>E.R.</given-names></name>, <name name-style="western"><surname>Girvin</surname> <given-names>A.T.</given-names></name>, <name name-style="western"><surname>Gabriel</surname> <given-names>D.L.</given-names></name>, <name name-style="western"><surname>Kostka</surname> <given-names>K.</given-names></name>, <name name-style="western"><surname>Morris</surname> <given-names>M.</given-names></name>, <name name-style="western"><surname>Palchuk</surname> <given-names>M.B.</given-names></name>, <etal>et al</etal>, <article-title>Synergies between centralized and federated approaches to data quality: a report from the national COVID cohort collaborative</article-title>, <source>Journal of the American Medical Informatics Association</source> (<year>2021</year>) ocab217. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamia/ocab217" xlink:type="simple">https://doi.org/10.1093/jamia/ocab217</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref053"><label>53</label><mixed-citation publication-type="other" xlink:type="simple">Healthcare Cost and Utilization Project (HCUP). Agency for Healthcare Research and Quality, HCUP Clinical Classifications Software Refined (CCSR) for ICD-10-CM Diagnoses, (n.d.). <ext-link ext-link-type="uri" xlink:href="https://www.hcup-us.ahrq.gov/toolssoftware/ccsr/ccs_refined.jsp" xlink:type="simple">https://www.hcup-us.ahrq.gov/toolssoftware/ccsr/ccs_refined.jsp</ext-link> (accessed April 13, 2022).</mixed-citation></ref>
<ref id="pone.0312289.ref054"><label>54</label><mixed-citation publication-type="other" xlink:type="simple">A. Elixhauser, C. Steiner, Most common diagnoses and procedures in US community hospitals, 1996, Healthcare Cost and Utilization Project, HCUP Research Note. Rockville, MD. Agency for Health Care Policy and Research. AHCPR Pub (1999) 99–0046. <ext-link ext-link-type="uri" xlink:href="https://www.hcup-us.ahrq.gov/reports/natstats/commdx/commdx.htm" xlink:type="simple">https://www.hcup-us.ahrq.gov/reports/natstats/commdx/commdx.htm</ext-link> (accessed April 13, 2022).</mixed-citation></ref>
<ref id="pone.0312289.ref055"><label>55</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ostropolets</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Ryan</surname> <given-names>P.</given-names></name>, <name name-style="western"><surname>Hripcsak</surname> <given-names>G.</given-names></name>, <article-title>Phenotyping in distributed data networks: selecting the right codes for the right patients</article-title>, <source>AMIA Annu Symp Proc</source> <volume>2022</volume> (<year>2022</year>) <fpage>826</fpage>–<lpage>835</lpage>. <object-id pub-id-type="pmid">37128407</object-id></mixed-citation></ref>
<ref id="pone.0312289.ref056"><label>56</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Peissig</surname> <given-names>P.L.</given-names></name>, <name name-style="western"><surname>Rasmussen</surname> <given-names>L.V.</given-names></name>, <name name-style="western"><surname>Berg</surname> <given-names>R.L.</given-names></name>, <name name-style="western"><surname>Linneman</surname> <given-names>J.G.</given-names></name>, <name name-style="western"><surname>McCarty</surname> <given-names>C.A.</given-names></name>, <name name-style="western"><surname>Waudby</surname> <given-names>C.</given-names></name>, <etal>et al</etal>. <article-title>Starren, Importance of multi-modal approaches to effectively identify cataract cases from electronic health records</article-title>, <source>J Am Med Inform Assoc</source> <volume>19</volume> (<year>2012</year>) <fpage>225</fpage>–<lpage>234</lpage>. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/amiajnl-2011-000456" xlink:type="simple">https://doi.org/10.1136/amiajnl-2011-000456</ext-link>.</mixed-citation></ref>
<ref id="pone.0312289.ref057"><label>57</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gold</surname> <given-names>S.</given-names></name>, <name name-style="western"><surname>Flack</surname> <given-names>J.</given-names></name>, <source>VS-Hub</source>, (<year>2022</year>). <ext-link ext-link-type="uri" xlink:href="https://bit.ly/termhub" xlink:type="simple">https://bit.ly/termhub</ext-link>.</mixed-citation></ref>
</ref-list>
</back>
</article>