<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLOS Digit Health</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosdh</journal-id>
<journal-title-group>
<journal-title>PLOS Digital Health</journal-title>
</journal-title-group>
<issn pub-type="epub">2767-3170</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pdig.0000063</article-id>
<article-id pub-id-type="publisher-id">PDIG-D-21-00079</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Public and occupational health</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Medical conditions</subject><subj-group><subject>Infectious diseases</subject><subj-group><subject>Viral diseases</subject><subj-group><subject>COVID 19</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Epidemiology</subject><subj-group><subject>Pandemics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Lexicography</subject><subj-group><subject>Lexicons</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Law and legal sciences</subject><subj-group><subject>Criminal justice system</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Applied mathematics</subject><subj-group><subject>Algorithms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject><subj-group><subject>Algorithms</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Artificial intelligence</subject><subj-group><subject>Machine learning</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Applied mathematics</subject><subj-group><subject>Algorithms</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject><subj-group><subject>Algorithms</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Artificial intelligence</subject><subj-group><subject>Machine learning</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Evaluating criminal justice reform during COVID-19: The need for a novel sentiment analysis package</article-title>
<alt-title alt-title-type="running-head">Evaluating criminal justice reform during COVID-19</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" equal-contrib="yes" xlink:type="simple">
<name name-style="western">
<surname>Ramjee</surname>
<given-names>Divya</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-9029-4644</contrib-id>
<name name-style="western">
<surname>Smith</surname>
<given-names>Louisa H.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0003-0924-2287</contrib-id>
<name name-style="western">
<surname>Doanvo</surname>
<given-names>Anhvinh</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Charpignon</surname>
<given-names>Marie-Laure</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-4281-8236</contrib-id>
<name name-style="western">
<surname>McNulty-Nebel</surname>
<given-names>Alyssa</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-6590-7821</contrib-id>
<name name-style="western">
<surname>Lett</surname>
<given-names>Elle</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff006"><sup>6</sup></xref>
<xref ref-type="aff" rid="aff007"><sup>7</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-8962-9427</contrib-id>
<name name-style="western">
<surname>Desai</surname>
<given-names>Angel N.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff008"><sup>8</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Majumder</surname>
<given-names>Maimuna S.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff006"><sup>6</sup></xref>
<xref ref-type="aff" rid="aff009"><sup>9</sup></xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Department of Justice, Law and Criminology, School of Public Affairs, American University, Washington, District of Columbia, United States of America</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Roux Institute, Northeastern University, Portland, Maine, United States of America</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>COVID-19 Dispersed Volunteer Research Network, Boston, Massachusetts, United States of America</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>Institute for Data, Systems, and Society, Massachusetts Institute of Technology, Cambridge, Massachusetts, United States of America</addr-line></aff>
<aff id="aff005"><label>5</label> <addr-line>Department of Epidemiology and Biostatistics, School of Public Health, Texas A&amp;M University, College Station, Texas, United States of America</addr-line></aff>
<aff id="aff006"><label>6</label> <addr-line>Computational Health Informatics Program, Boston Children’s Hospital and Harvard Medical School, Boston, Massachusetts, United States of America</addr-line></aff>
<aff id="aff007"><label>7</label> <addr-line>Perelman School of Medicine, University of Pennsylvania, 3400 Civic Center Boulevard, Philadelphia, Pennsylvania, United States of America</addr-line></aff>
<aff id="aff008"><label>8</label> <addr-line>Division of Infectious Disease, University of California Davis Health, Sacramento, California, United States of America</addr-line></aff>
<aff id="aff009"><label>9</label> <addr-line>Department of Pediatrics, Harvard Medical School, Boston, Massachusetts, United States of America</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Banerjee</surname>
<given-names>Imon</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Emory University, UNITED STATES</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>Maimuna Majumder is an Associate Editor for PLOS Digital Health.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">dr1208a@american.edu</email> (DR); <email xlink:type="simple">angel.n.desai@gmail.com</email>, <email xlink:type="simple">andesai@ad3.ucdavis.edu</email> (AND)</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>13</day>
<month>7</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<month>7</month>
<year>2022</year>
</pub-date>
<volume>1</volume>
<issue>7</issue>
<elocation-id>e0000063</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>9</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>5</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-year>2022</copyright-year>
<copyright-holder>Ramjee et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pdig.0000063"/>
<abstract>
<p>The health and safety of incarcerated persons and correctional personnel have been prominent in the U.S. news media discourse during the COVID-19 pandemic. Examining changing attitudes toward the health of the incarcerated population is imperative to better assess the extent to which the general public favors criminal justice reform. However, existing natural language processing lexicons that underlie current sentiment analysis (SA) algorithms may not perform adequately on news articles related to criminal justice due to contextual complexities. News discourse during the pandemic has highlighted the need for a novel SA lexicon and algorithm (i.e., an SA package) tailored for examining public health policy in the context of the criminal justice system. We analyzed the performance of existing SA packages on a corpus of news articles at the intersection of COVID-19 and criminal justice collected from state-level outlets between January and May 2020. Our results demonstrated that sentence sentiment scores provided by three popular SA packages can differ considerably from manually-curated ratings. This dissimilarity was especially pronounced when the text was more polarized, whether negatively or positively. A randomly selected set of 1,000 manually scored sentences, and the corresponding binary document term matrices, were used to train two new sentiment prediction algorithms (i.e., linear regression and random forest regression) to verify the performance of the manually-curated ratings. By better accounting for the unique context in which incarceration-related terminologies are used in news media, both of our proposed models outperformed all existing SA packages considered for comparison. Our findings suggest that there is a need to develop a novel lexicon, and potentially an accompanying algorithm, for analysis of text related to public health within the criminal justice system, as well as criminal justice more broadly.</p>
</abstract>
<abstract abstract-type="summary">
<title>Author summary</title>
<p>Incarceration is a social cause of disease, with currently and formerly incarcerated individuals being more likely to face vulnerabilities to disease outbreaks. The COVID-19 pandemic amplified the health-related shortcomings in the U.S. prison system, prompting the U.S. Department of Justice to consider reforming early release and home confinement measures. Public attention and news media coverage has concurrently increased, with particular attention to criminal justice reform initiatives and systemic ethnoracial inequities. Here, we attempted to characterize public sentiment towards criminal justice reform, in light of the pandemic and public health concerns, using existing tools to assessing positive, negative, and neutral sentiment. Our findings suggest these existing tools are inadequate for accurately gauging sentiment in texts at the intersection of public health and criminal justice, but also for potentially for texts related to criminal justice more broadly. Along with other approaches, properly validated tools for understanding sentiment can assist in gauging the scope of public health and reform measures for incarcerated persons and public support for or against criminal justice reform.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/100009633</institution-id>
<institution>Eunice Kennedy Shriver National Institute of Child Health and Human Development</institution>
</institution-wrap>
</funding-source>
<award-id>T32HD040128</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Majumder</surname>
<given-names>Maimuna S.</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>MSM was supported in part by grant T32HD040128 from the Eunice Kennedy Shriver National Institute of Child Health and Human Development (NIH). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="1"/>
<page-count count="10"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>The data and code is available at <ext-link ext-link-type="uri" xlink:href="https://osf.io/5pmxv/" xlink:type="simple">https://osf.io/5pmxv/</ext-link>.</meta-value>
</custom-meta>
<custom-meta id="outbreaks">
<meta-name>Outbreaks</meta-name>
<meta-value>COVID-19</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>The disproportionate incarceration of ethnoracial minorities and marginalized populations represents one mechanism through which structural racism drives health inequity in the United States (U.S.) [<xref ref-type="bibr" rid="pdig.0000063.ref001">1</xref>]. The coronavirus disease 2019 (COVID-19) pandemic provided an acute shock to the criminal justice system and adversely impacted the health of incarcerated people, as well as correctional workers and staff [<xref ref-type="bibr" rid="pdig.0000063.ref002">2</xref>]. Vulnerabilities driven by exposure to the carceral system have been exacerbated by COVID-19, further impeding the health of communities of color.</p>
<p>Incarceration and detention facilities are disproportionately affected by infectious disease outbreaks [<xref ref-type="bibr" rid="pdig.0000063.ref001">1</xref>], and the onset of COVID-19 prompted the U.S. Department of Justice to consider prisoner release and home confinement as mitigation options to control transmission in March 2020 [<xref ref-type="bibr" rid="pdig.0000063.ref003">3</xref>]. While this policy only applied to facilities under the control of the U.S. Bureau of Prisons, states made varying decisions regarding prisoner release, perhaps in part due to public opinion and activist movements [<xref ref-type="bibr" rid="pdig.0000063.ref004">4</xref>]. Thus, it is important to understand the role of correctional facilities in the pandemic, including support for reform measures, to better address the health implications of mass incarceration.</p>
<p>In recent years, the rhetoric related to criminal justice policy has increasingly emphasized reform, with the framing of “unfair” punishment and circumstances being most effective in garnering support [<xref ref-type="bibr" rid="pdig.0000063.ref005">5</xref>]. News media outlets in particular have served not only to highlight existing public opinion, but also to help shape public perceptions based on their coverage [<xref ref-type="bibr" rid="pdig.0000063.ref006">6</xref>]. The health and safety of incarcerated persons and correctional personnel were prominent in the U.S. media during the first year of the COVID-19 pandemic [<xref ref-type="bibr" rid="pdig.0000063.ref001">1</xref>], highlighting the need to examine the discourse around public health policy in such contexts.</p>
<p>To understand and characterize public sentiment towards support for and against release of incarcerated individuals, we used existing natural language processing (NLP) lexicons and related algorithms to assess sentiment (people’s opinions, attitudes, evaluations, and emotions [<xref ref-type="bibr" rid="pdig.0000063.ref007">7</xref>]) in news media coverage towards prisoner release and criminal justice reform over the course of the pandemic. NLP tools and techniques provide rapid means for analyzing large amounts of text and are increasingly used in social science and policy contexts [<xref ref-type="bibr" rid="pdig.0000063.ref008">8</xref>]. Sentiment analysis (SA)—or opinion mining—is an NLP subfield that pairs sentiment lexicons, i.e. dictionaries of words and phrases with rated sentiment polarity (negative or positive), with specific algorithms that account for important syntactical and contextual features [<xref ref-type="bibr" rid="pdig.0000063.ref008">8</xref>]. Common practical applications of SA span a wide range of fields including economics, marketing, politics, and public health [<xref ref-type="bibr" rid="pdig.0000063.ref008">8</xref>].</p>
<p>To our knowledge, a SA lexicon specific to the field of criminal justice does not yet exist. This field is unique in that much of the related vocabulary is inherently negative, though the intentions and motivations of the discourse may be positive [<xref ref-type="bibr" rid="pdig.0000063.ref004">4</xref>,<xref ref-type="bibr" rid="pdig.0000063.ref006">6</xref>]—thus potentially skewing sentiment analysis of commentary in support of criminal justice reform (positive sentiment polarity) and against criminal justice reform (negative sentiment polarity). We therefore hypothesized that due to dual use [<xref ref-type="bibr" rid="pdig.0000063.ref009">9</xref>] (i.e., using a system developed for a purpose separate from the one for which it was designed), existing SA packages (i.e., lexicon-algorithm pairs) would be insufficient for accurately gauging sentiment in news media coverage related to public health crises within the criminal justice system, particularly during the COVID-19 pandemic.</p>
<p>To test our hypothesis, we manually rated sentiment scores on a text corpus of news media articles related to COVID-19 and incarceration. Our manually-curated scores were then compared to ratings from existing SA packages for each sentence of the selected sample. Building on a training set consisting of our manual ratings as the reference outcome, we created two algorithms (based on a linear regression model and a random forest regression model) to improve on currently available SA tools that are not tailored to text at the intersection of public health and the criminal justice system.</p>
</sec>
<sec id="sec002" sec-type="results">
<title>Results</title>
<p>Our experiment and analyses considered the following existing SA packages that are most frequently used in the NLP literature: SocialSent [<xref ref-type="bibr" rid="pdig.0000063.ref010">10</xref>], VADER [<xref ref-type="bibr" rid="pdig.0000063.ref007">7</xref>], and Stanford CoreNLP [<xref ref-type="bibr" rid="pdig.0000063.ref011">11</xref>].</p>
<sec id="sec003">
<title>Sentiment scoring and lexical analysis</title>
<p>Overall, sentences that were manually scored to have neutral sentiment were consistently rated as neutral by the above listed SA packages. <xref ref-type="fig" rid="pdig.0000063.g001">Fig 1</xref> shows three such sentences (sentences 2–4), with scale-standardized scores (see Methods) that deviated least from our ratings, all neutral in sentiment (i.e., either fully neutral in sentiment or with equal amounts of positive and negative sentiment). However, sentences with more extreme positive or negative sentiment polarity, as ascertained by our manual ratings, were more often scored differently across SA packages (sentences 1, 5–7 in <xref ref-type="fig" rid="pdig.0000063.g001">Fig 1</xref>). This division appeared driven by words related to criminal justice and public safety (e.g., “innocent”, “violent”, “defense”, “threat”, “vulnerable”, “safety”, “care”, “negative”).</p>
<fig id="pdig.0000063.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pdig.0000063.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Standardized sentence sentiment scores that deviated least from manually-curated scores, exemplified by three distinct SA packages.</title>
<p>SocialSent was considered because it is specifically attuned to social science contexts. VADER was also included for comparison because it is one of the most widely used SA packages. Finally, CoreNLP was used due to its accuracy in sentiment scoring by a recent systematic review of SA in public health. The sentences are arranged from top to bottom in order of most positive sentiment score to most negative, as determined by manual curators. The left, middle, and right panels correspond with the SocialSent, VADER, and Stanford CoreNLP SA packages, respectively. To the left of the sentences are the sentiment values assigned by each SA package on its own scale (SocialSent from 0 to 1, VADER from -1 to 1, CoreNLP 1 to 3); our ratings follow each sentence in parentheses (range of -4 to 4). Colors indicate relative sentiment associated with the selected portion of the sentence, with red and blue indicating negative and positive sentiment, respectively, as determined by running each algorithm on separate phrases within the sentences.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.g001" xlink:type="simple"/>
</fig>
<p>To investigate, we used the 68 words (i.e., 3.6% of the overall corpus vocabulary) that appeared most often across all sentences (i.e., in at least 10) and compared the average sentence sentiment score for each word. This was conducted using our manually-curated sentence sentiment scores against the popular SA packages of SocialSent, VADER, and Stanford CoreNLP. Results present similar patterns across SA packages (<xref ref-type="fig" rid="pdig.0000063.g002">Fig 2</xref>). Our manually-curated SA scores generally associated criminal justice and public safety terminologies (e.g., “safety”, “attorney”, “community”) with positively-scored sentences while the three existing SA packages yielded more neutral or negative sentiment ratings than our scores (e.g., an average score of 0.55 (95% CI -0.06, 1.17) for “community” compared to 0.06 (-0.36, 0.48); -0.02 (-0.62, 0.58); and -0.10 (-0.62, 0.41) from SocialSent, VADER, and Stanford CoreNLP, respectively) (full results in <xref ref-type="supplementary-material" rid="pdig.0000063.s001">S1 Table</xref>). However, certain criminal justice-related terminology, including “detention”, “facility”, “sentence”, and “justice”, appeared in sentences we rated slightly more negatively in sentiment, on average, compared to the existing SA packages (e.g., an average score of -0.18 (-0.49, 0.12) for “facility” compared to 0.10 (-0.19, 0.40); -0.08 (-0.40, 0.24); and -0.04 (-0.32, 0.24) from SocialSent, VADER, and Stanford CoreNLP, respectively), though this was less consistent across packages. Criminal justice-related words associated with neutrally-scored (or equally positive and negative) sentences as determined by both existing SA packages and our manual curation included “jail”, “inmate”, “prisoner”, “medical”, and “test”. For terminology specific to public health and the pandemic (e.g., “disease”, “positive”, “virus”, “outbreak”, “spread”, “pandemic”), our manually-curated scores were primarily associated with negatively-scored sentences, with the exception of the word “health”, while the three existing SA packages rated these as more positive in sentiment compared to our scores (e.g., an average score of -0.31 (-0.46, -0.15) for “positive” compared to -0.09 (-0.44, 0.27); 1.24 (1.03, 1.45); and 1.03 (0.65, 1.41) from SocialSent, VADER, and Stanford CoreNLP, respectively).</p>
<fig id="pdig.0000063.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pdig.0000063.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Average standardized sentiment scores for sentences containing each of the 68 most frequently appearing words.</title>
<p>The words are arranged from top to bottom in order of most positive (blue) average sentiment to most negative (red), according to our human-curated scores (Panel A). Colors in Panel B indicate the relative score from each of three SA packages compared to our ratings, with red (blue) indicating that the algorithm-rated sentences with a given word more negatively (positively) than our team did. The color gradient indicates the intensity of the average standardized sentiment score for that term across sentences (i.e., light colors refer to terms rated more neutral, while darker colors refer to terms rated more negatively or more positively). The length of each bar depicts the average difference between a given package’s scores and ours. Words with direction of sentiment that differ from ours in each package are marked with an asterisk, and words relating to the criminal justice system are in bold.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.g002" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec004">
<title>Proof of concept machine learning algorithms</title>
<p>To validate the proof of concept derived from our manually-curated sentiment analysis, we developed two machine learning (ML) algorithms–using a linear regression model and a random forest regression model–based on our sentiment ratings. After standardization of sentiment scores for the three existing SA packages and our two ML models, we trained and tested all algorithms on our text corpus. As is evidenced by the lowest mean absolute difference between our manually-curated scores and predicted sentiment scores (<xref ref-type="table" rid="pdig.0000063.t001">Table 1</xref>), both of our models strongly outperformed all three tested SA packages–signifying an important initial step in the development of a new SA package.</p>
<table-wrap id="pdig.0000063.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pdig.0000063.t001</object-id>
<label>Table 1</label> <caption><title>Comparison of Model Fit Between Existing SA Packages and Our Model.</title></caption>
<alternatives>
<graphic id="pdig.0000063.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center">SA Model</th>
<th align="center">Mean Absolute Difference in Standardized Score Prediction <italic>(standard error)</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">SocialSent</td>
<td align="center">1.04 <italic>(0</italic>.<italic>02)</italic></td>
</tr>
<tr>
<td align="center">Stanford CoreNLP</td>
<td align="center">1.03 <italic>(0</italic>.<italic>02)</italic></td>
</tr>
<tr>
<td align="center">VADER</td>
<td align="center">0.95 <italic>(0</italic>.<italic>03)</italic></td>
</tr>
<tr>
<td align="center" style="background-color:#EFEFEF">Trained Linear Regression (binary DTM)</td>
<td align="center" style="background-color:#EFEFEF">0.82 <italic>(0</italic>.<italic>03)</italic></td>
</tr>
<tr>
<td align="center" style="background-color:#EFEFEF">Trained Random Forest Regression (binary DTM)</td>
<td align="center" style="background-color:#EFEFEF">0.76 <italic>(0</italic>.<italic>04)</italic></td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t001fn001"><p>DTM = Document Term Matrix</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="sec005" sec-type="conclusions">
<title>Discussion</title>
<p>The COVID-19 pandemic instigated the U.S. Department of Justice, and specifically the U.S. Bureau of Prisons, to publicly acknowledge health-related shortcomings in the U.S. prison system and to address reforming early release and home confinement measures. Public attention and news media coverage has concurrently increased, with particular attention to criminal justice reform initiatives and systemic ethnoracial inequities [<xref ref-type="bibr" rid="pdig.0000063.ref012">12</xref>]. This preliminary study illustrates that existing SA packages are inadequate for accurate assessment of sentiment in texts regarding such current events.</p>
<p>Our results suggest existing SA packages may be unable to accurately gauge sentiment in the text of news articles at the intersection of public health and criminal justice, especially in the context of the COVID-19 pandemic. VADER, one of the most widely used SA packages, scored many of the most frequently used words (Figs <xref ref-type="fig" rid="pdig.0000063.g001">1</xref> and <xref ref-type="fig" rid="pdig.0000063.g002">2</xref>) as negative, despite our identification of the words as being neutral or positive within their respective sentence contexts. SocialSent performed better, rating these words more positively than VADER. The fact that the SocialSent SA lexicon is specifically tuned to social science contexts might explain this difference. Overall, existing SA packages performed similarly to each other, with an average error of roughly 1.0 for standardized score predictions (<xref ref-type="table" rid="pdig.0000063.t001">Table 1</xref>). However, our models’ performance demonstrates the limited utility of these packages–not only for analyzing texts that include both public health and criminal justice content, but also for texts related to criminal justice more broadly.</p>
<p>As suggested by our ML algorithms’ outperformance of existing SA packages, words used in texts related to public health within the criminal justice system are contextually unique. Our results demonstrate the importance of human curation as an initial step towards building a training data set that serves the development of a new SA lexicon and algorithm (i.e., package) specific to this interdisciplinary subject, as well as the importance of a new sentiment rating protocol (i.e., lexicon-algorithm pair) with texts specific to criminal justice. In future work, we aim to build off of the preliminary work presented here and develop a novel SA package tailored to texts related to public health crises within the criminal justice system, and potentially for the field of criminal justice overall.</p>
<p>Mass incarceration in the U.S. has been identified as an ongoing public health emergency requiring reform [<xref ref-type="bibr" rid="pdig.0000063.ref013">13</xref>]. Sentiment analysis packages tailored to criminal justice and its public health context could be used to assess sentiment, emotions, and opinions related to the urgency of this reform. Prior studies have established the disparate impact of mass incarceration on communities of color, as well as the socioeconomic and health effects that bolster long-standing ethnoracial inequities [<xref ref-type="bibr" rid="pdig.0000063.ref014">14</xref>–<xref ref-type="bibr" rid="pdig.0000063.ref021">21</xref>]. Additionally, previous research has highlighted the interconnected consequences of institutional racism whereby inequities in the health and criminal justice systems can reinforce inequities in other sectors [<xref ref-type="bibr" rid="pdig.0000063.ref022">22</xref>,<xref ref-type="bibr" rid="pdig.0000063.ref023">23</xref>]. Incarceration is considered a “structural driver” of health inequalities, making it a social cause of disease, such that individuals currently and formerly incarcerated are more likely to face vulnerabilities to disease outbreaks and pandemics, including COVID-19 [<xref ref-type="bibr" rid="pdig.0000063.ref024">24</xref>,<xref ref-type="bibr" rid="pdig.0000063.ref025">25</xref>]. The existence of such interactions underscores the relevance of research on the perception and support of enhanced public health policies for incarcerated individuals.</p>
<p>Support for such criminal justice reform measures is a crucial step to dismantling structural racism and addressing growing health inequities. Along with other mixed methods approaches, properly validated NLP work on various text corpora, including news media, can assist in gauging the scope of public health and reform measures for incarcerated persons, public support for or against criminal justice reform related to public health, and additional factors (e.g. budgetary prioritizations, community safety, etc.) mediating reform policy decisions in response to the COVID-19 pandemic.</p>
<sec id="sec006">
<title>Limitations &amp; future work</title>
<p>There are limitations to this pilot proof-of-concept study. Firstly, because our corpus is limited in size, future work could use an expanded corpus to build a more robust model for sentiment analysis of such texts. For example, additional research could include developing a large-scale news media data aggregation process, as well as establishing a principled framework for sentence-level and article-level sentiment scoring. Secondly, as explained in our Methodology section, only 1,000 sentences were manually scored over two rounds of scoring, and doubling or tripling this number of sentences should be considered in the future to improve sample size. In addition, agreement about the “true” sentiment behind each sentence was limited, with an intraclass correlation between the scores of 0.57. However, this is comparable or better to similar measures in human-annotated texts [<xref ref-type="bibr" rid="pdig.0000063.ref026">26</xref>–<xref ref-type="bibr" rid="pdig.0000063.ref028">28</xref>] and to the same measure calculated on a subset of the texts used to develop the VADER algorithm (see <xref ref-type="supplementary-material" rid="pdig.0000063.s002">S1 Text</xref>), reflecting inherent ambiguity and diversity of interpretation of language.</p>
<p>Thirdly, our methodology focused on unigram sentiment analysis; however, future investigations using bigram, trigram, or higher-level analyses may be warranted. Furthermore, varying word embedding strategies, building upon pre-existing low-dimensional vectors trained on legal and/or social science text corpora, could be tested and compared against these baselines. Lastly, as mentioned in our Methodology section, we limited our scope to only news articles published before George Floyd’s death to prevent coverage of the event from affecting our results, and future work could investigate this event more closely, particularly comparing news articles covering George Floyd’s death to news articles both before and after his death.</p>
</sec>
<sec id="sec007" sec-type="materials|methods">
<title>Methodology</title>
<p>A recent systematic review [<xref ref-type="bibr" rid="pdig.0000063.ref008">8</xref>] of SA in public health identified support vector machines and naïve Bayes classifiers as the most accurate algorithms (~70–80% accuracy) in the field, leading us to consider SA packages Stanford CoreNLP [<xref ref-type="bibr" rid="pdig.0000063.ref011">11</xref>] and VADER [<xref ref-type="bibr" rid="pdig.0000063.ref007">7</xref>] for our study. We also included SocialSent [<xref ref-type="bibr" rid="pdig.0000063.ref010">10</xref>], which uses a novel algorithm to derive content-specific sentiment lexicons for texts related to social science. See <xref ref-type="supplementary-material" rid="pdig.0000063.s002">S1 Text</xref>, <xref ref-type="supplementary-material" rid="pdig.0000063.s003">S2 Text</xref>, and <xref ref-type="supplementary-material" rid="pdig.0000063.s004">S3 Text</xref> for our code, data set, and additional SA packages examined.</p>
</sec>
<sec id="sec008">
<title>Sentiment scoring</title>
<p>MediaCloud [<xref ref-type="bibr" rid="pdig.0000063.ref029">29</xref>], a searchable platform for articles from news outlets around the world, was used to collect articles related to COVID-19 and criminal justice from January 1, 2020 through May 25, 2020 at the state-level in the U.S. (see <xref ref-type="supplementary-material" rid="pdig.0000063.s003">S2 Text</xref> for search query criteria). We subsequently scraped the full text of each available article. To avoid introducing event-specific coverage in our corpora of texts, we selected May 25th–the date of George Floyd’s death–as our end date. This particular event spurred an increase in news media coverage pertaining to criminal justice reform across the United States, especially as it related to excessive use of force by law enforcement [<xref ref-type="bibr" rid="pdig.0000063.ref030">30</xref>]. Additionally, some stories about his death also discussed the topic of COVID-19 transmission during protests. Thus we limited our scope to only news articles published before George Floyd’s death, since the coverage of this event could affect our results.</p>
<p>We then used simple random sampling to select 1,000 sentences from our text corpus of 126,552 unique sentences for manually-curated sentiment rating in two phases. Additionally, we validated that the word frequency in this subset and the overall data set were comparable. The first 500 sentences were scored (negative, neutral, or positive) independently by five members of the research team (DR, AD, AM, MC, TC), which were used as a learning phase for the development of a standardized sentence scoring approach. Reviewers were provided with brief instructions on use of the sentiment rating scale (an integer scale from -4 (most negative) to 4 (most positive)), based on the scoring scheme used by the raters in creating VADER [<xref ref-type="bibr" rid="pdig.0000063.ref007">7</xref>], and directed to examples from that project. All curators subsequently convened to reconcile rating discrepancies and ensure that all individuals agreed on how to score each sentence for our experiment (see <xref ref-type="supplementary-material" rid="pdig.0000063.s002">S1 Text</xref> for coded data and annotation guide).</p>
<p>The second 500 sentences were then used for our experimental results and scored by the same five members of the research team. This set of sentiment ratings was further averaged across curators to compute the final score for each sentence. The intraclass correlation (ICC) for the second set of scores was 0.57. The ICC achieved by the team is comparable to the ICC for the NYT editorial snippet scores, which was 0.53 (see <xref ref-type="supplementary-material" rid="pdig.0000063.s002">S1 Text</xref>). Such performance was deemed sufficient to proceed with our modeling efforts.</p>
<p>The second set of 500 sentences was additionally scored by SocialSent, VADER, and Stanford CoreNLP. All sentiment scores were then standardized (i.e., mean = 0 and standard deviation = 1 within scores from a given algorithm), and scores were compared between SA packages for each sentence. After lemmatization and removal of stop words, we then summarized sentiment related to the 68 words (3.3%) that appeared in at least 10 sentences by calculating the mean score across those sentences. We additionally assessed a selection of sentences to determine which sentiment scores from existing SA packages either deviated from or were consistent with our ratings (<xref ref-type="fig" rid="pdig.0000063.g001">Fig 1</xref>). We further isolated the most frequently appearing words and compared our sentence sentiment scoring with those from SocialSent, VADER, and Stanford CoreNLP (<xref ref-type="fig" rid="pdig.0000063.g002">Fig 2</xref>).</p>
</sec>
<sec id="sec009">
<title>Machine learning algorithms</title>
<p>We developed a proof of concept using our manually-curated scores for the first 500 sampled sentences. We compared the performance of our algorithms against the performance of SocialSent, VADER, and Stanford CoreNLP, using scores on the second set of 500 sampled sentences. All scoring systems were standardized (i.e., to have mean = 0 and standard deviation = 1) for comparison. We used binary document term matrices (DTMs) from our text corpus (i.e., a value is 1 if a word appears in a sentence; otherwise, the value is 0). 10-fold cross-validation was used to train and test a linear regression model and a random forest regression model on DTMs to predict sentiment scores (see <xref ref-type="supplementary-material" rid="pdig.0000063.s005">S4 Text</xref> and <xref ref-type="supplementary-material" rid="pdig.0000063.s006">S1 Fig</xref> for cross-validation explanation). We then compared the scores predicted from our models to the scores predicted from each of the SA packages (standardized to the same training data). We computed the mean absolute difference between these predicted scores in the test sets and the manually-curated scores, considered as the reference (i.e., ground truth) scores (<xref ref-type="table" rid="pdig.0000063.t001">Table 1</xref>).</p>
</sec>
</sec>
<sec id="sec010" sec-type="supplementary-material">
<title>Supporting information</title>
<supplementary-material id="pdig.0000063.s001" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.s001" xlink:type="simple">
<label>S1 Table</label>
<caption>
<title>Average score across sentences containing the 68 most-common words in our data.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000063.s002" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.s002" xlink:type="simple">
<label>S1 Text</label>
<caption>
<title>Code and Data Availability.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000063.s003" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.s003" xlink:type="simple">
<label>S2 Text</label>
<caption>
<title>Data Set Search Query Criteria.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000063.s004" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.s004" xlink:type="simple">
<label>S3 Text</label>
<caption>
<title>Robustness Check.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000063.s005" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.s005" xlink:type="simple">
<label>S4 Text</label>
<caption>
<title>Cross-Validation Technique.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0000063.s006" mimetype="image/png" position="float" xlink:href="info:doi/10.1371/journal.pdig.0000063.s006" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>Cross-validation technique used for our machine learning models.</title>
<p>We randomly divided the data set into k partitions (e.g., “five-fold cross-validation” yields five partitions). The model was trained on k—1 partitions, using the last partition as the validation data set from which we made predictions and collected accuracy metrics. This train-test process was repeated k times so that every partition of the data serves as a test data set once.</p>
<p>(PNG)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>Thank you to Tori L. Cowger MPH (referenced as “TC” in our article) for assistance with sentiment scoring. Thank you also to Shagun Gupta MASc, for assistance with data analysis on a previous version of this manuscript. This work was supported in part by grant T32HD040128 from the Eunice Kennedy Shriver National Institute of Child Health and Human Development (NIH).</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pdig.0000063.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kinner</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Young</surname> <given-names>JT</given-names></name>, <name name-style="western"><surname>Snow</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Southalan</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Lopez-Acuña</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Ferreira-Borges</surname> <given-names>C</given-names></name>, <etal>et al</etal>. <article-title>Prisons and custodial settings are part of a comprehensive response to COVID-19</article-title>. <source>Lancet Public Health</source>. <year>2020</year>;<volume>5</volume>(<issue>4</issue>):<fpage>e188</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/S2468-2667%2820%2930058-X" xlink:type="simple">10.1016/S2468-2667(20)30058-X</ext-link></comment> <object-id pub-id-type="pmid">32197116</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Nowonty</surname> <given-names>KM</given-names></name>, <name name-style="western"><surname>Piquero</surname> <given-names>AR</given-names></name>. <article-title>The global impact of the pandemic on institutional and community corrections: Assessing short-term crisis management and long-term change strategies</article-title>. <source>Vict Offenders</source>. <year>2020</year>;<volume>15</volume>(<issue>7–8</issue>):<fpage>839</fpage>–<lpage>47</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref003"><label>3</label><mixed-citation publication-type="other" xlink:type="simple">U.S. Department of Justice, Bureau of Prisons. Federal Bureau of Prisons COVID-19 Action Plan [Internet]. [cited 2021 Aug 4]. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.bop.gov/resources/news/20200313_covid-19.jsp" xlink:type="simple">https://www.bop.gov/resources/news/20200313_covid-19.jsp</ext-link></mixed-citation></ref>
<ref id="pdig.0000063.ref004"><label>4</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Roberts</surname> <given-names>JV</given-names></name>, <name name-style="western"><surname>Hough</surname> <given-names>JM</given-names></name>. <source>Understanding public attitudes to criminal justice</source>. <publisher-loc>Maidenhead, Berkshire; New York</publisher-loc>: <publisher-name>Open University Press</publisher-name>; <year>2005</year>. <volume>183</volume> p. (Crime and justice).</mixed-citation></ref>
<ref id="pdig.0000063.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gottlieb</surname> <given-names>A.</given-names></name> <article-title>The effect of message frames on public attitudes toward criminal justice reform for nonviolent offenses</article-title>. <source>Crime Delinquency</source>. <year>2017</year>;<volume>63</volume>(<issue>5</issue>):<fpage>636</fpage>–<lpage>56</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1177/0011128716687758" xlink:type="simple">10.1177/0011128716687758</ext-link></comment> <object-id pub-id-type="pmid">28943646</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pickett</surname> <given-names>JT</given-names></name>. <article-title>Public opinion and criminal justice policy: Theory and research</article-title>. <source>Annu Rev Criminol</source>. <year>2019</year>;<volume>2</volume>:<fpage>405</fpage>–<lpage>28</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hutto</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Gilbert</surname> <given-names>E</given-names></name>. <article-title>Vader: A parsimonious rule-based model for sentiment analysis of social media text</article-title>. In: <source>Proceedings of the International AAAI Conference on Web and Social Media</source>. <year>2014</year>.</mixed-citation></ref>
<ref id="pdig.0000063.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zunic</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Corcoran</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Spasic</surname> <given-names>I</given-names></name>, others. <article-title>Sentiment analysis in health and well-being: systematic review</article-title>. <source>JMIR Med Inform</source>. <year>2020</year>;<volume>8</volume>(<issue>1</issue>):<fpage>e16023</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2196/16023" xlink:type="simple">10.2196/16023</ext-link></comment> <object-id pub-id-type="pmid">32012057</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hovy</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Spruit</surname> <given-names>SL</given-names></name>. <article-title>The social impact of natural language processing</article-title>. <source>In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (volume 2: Short Papers)</source>. <year>2016</year>. p. <fpage>591</fpage>–<lpage>8</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hamilton</surname> <given-names>WL</given-names></name>, <name name-style="western"><surname>Clark</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Leskovec</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Jurafsky</surname> <given-names>D</given-names></name>. <article-title>Inducing domain-specific sentiment lexicons from unlabeled corpora</article-title>. <source>In: Proceedings of the Conference on Empirical Methods in Natural Language Processing Conference on Empirical Methods in Natural Language Processing</source>. <year>2016</year>. p. <fpage>595</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.18653/v1/D16-1057" xlink:type="simple">10.18653/v1/D16-1057</ext-link></comment> <object-id pub-id-type="pmid">28660257</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Manning</surname> <given-names>CD</given-names></name>, <name name-style="western"><surname>Surdeanu</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Bauer</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Finkel</surname> <given-names>JR</given-names></name>, <name name-style="western"><surname>Bethard</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>McClosky</surname> <given-names>D</given-names></name>. <article-title>The Stanford CoreNLP natural language processing toolkit</article-title>. <source>In: Proceedings of 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations</source>. <year>2014</year>. p. <fpage>55</fpage>–<lpage>60</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Flanders</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Galoob</surname> <given-names>S</given-names></name>. <article-title>Progressive prosecution in a pandemic</article-title>. <source>J Crim Criminol</source>. <year>2020</year>;<volume>110</volume>:<fpage>685</fpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Brinkley-Rubinstein</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Cloud</surname> <given-names>DH</given-names></name>. <article-title>Mass incarceration as a social-structural driver of health inequities: A supplement to AJPH</article-title>. <source>Am J Public Health</source>. <year>2020</year>;<volume>110</volume>(<issue>S1</issue>):<fpage>S14-S</fpage>–<lpage>15</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2105/AJPH.2019.305486" xlink:type="simple">10.2105/AJPH.2019.305486</ext-link></comment> <object-id pub-id-type="pmid">31967896</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref014"><label>14</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Alexander</surname> <given-names>M.</given-names></name> <source>The new Jim Crow: Mass incarceration in the age of colorblindness. Tenth anniversary edition</source>. <publisher-loc>New York London</publisher-loc>: <publisher-name>The New Press</publisher-name>; <year>2020</year>. <volume>377</volume> p.</mixed-citation></ref>
<ref id="pdig.0000063.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Harris</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Evans</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Beckett</surname> <given-names>K</given-names></name>. <article-title>Drawing blood from stones: Legal debt and social inequality in the contemporary United States</article-title>. <source>Am J Sociol</source>. <year>2010</year>;<volume>115</volume>(<issue>6</issue>):<fpage>1753</fpage>–<lpage>99</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cloud</surname> <given-names>DH</given-names></name>, <name name-style="western"><surname>Bassett</surname> <given-names>MT</given-names></name>, <name name-style="western"><surname>Graves</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Fullilove</surname> <given-names>RE</given-names></name>, <name name-style="western"><surname>Brinkley-Rubinstein</surname> <given-names>L</given-names></name>. <article-title>Documenting and addressing the health impacts of carceral systems</article-title>. <source>Am J Public Health</source>. <year>2020</year>;<volume>110</volume>(<issue>S1</issue>):<fpage>S5</fpage>–<lpage>S5</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2105/AJPH.2019.305475" xlink:type="simple">10.2105/AJPH.2019.305475</ext-link></comment> <object-id pub-id-type="pmid">31967878</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lee</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>McCormick</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Hicken</surname> <given-names>MT</given-names></name>, <name name-style="western"><surname>Wildeman</surname> <given-names>C</given-names></name>. <article-title>Racial inequalities in connectedness to imprisoned individuals in the United States</article-title>. <source>Bois Rev Soc Sci Res Race</source>. <year>2015</year>;<volume>12</volume>(<issue>2</issue>):<fpage>269</fpage>–<lpage>82</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Massoglia</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Pridemore</surname> <given-names>WA</given-names></name>. <article-title>Incarceration and health</article-title>. <source>Annu Rev Sociol</source>. <year>2015</year>;<volume>41</volume>:<fpage>291</fpage>–<lpage>310</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1146/annurev-soc-073014-112326" xlink:type="simple">10.1146/annurev-soc-073014-112326</ext-link></comment> <object-id pub-id-type="pmid">30197467</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pettit</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Western</surname> <given-names>B</given-names></name>. <article-title>Mass imprisonment and the life course: Race and class inequality in US incarceration</article-title>. <source>Am Sociol Rev</source>. <year>2004</year>;<volume>69</volume>(<issue>2</issue>):<fpage>151</fpage>–<lpage>69</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref020"><label>20</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Tonry</surname> <given-names>MH</given-names></name>. <source>Punishing race: A continuing American dilemma</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>; <year>2011</year>. <fpage>204</fpage> p.</mixed-citation></ref>
<ref id="pdig.0000063.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Weaver</surname> <given-names>VM</given-names></name>. <article-title>Frontlash: Race and the development of punitive crime policy</article-title>. <source>Stud Am Polit Dev</source>. <year>2007</year>;<volume>21</volume>(<issue>2</issue>):<fpage>230</fpage>–<lpage>65</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bailey</surname> <given-names>ZD</given-names></name>, <name name-style="western"><surname>Krieger</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Agénor</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Graves</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Linos</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Bassett</surname> <given-names>MT</given-names></name>. <article-title>Structural racism and health inequities in the USA: Evidence and interventions</article-title>. <source>The Lancet</source>. <year>2017</year>;<volume>389</volume>(<issue>10077</issue>):<fpage>1453</fpage>–<lpage>63</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/S0140-6736%2817%2930569-X" xlink:type="simple">10.1016/S0140-6736(17)30569-X</ext-link></comment> <object-id pub-id-type="pmid">28402827</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Reskin</surname> <given-names>B.</given-names></name> <article-title>The race discrimination system</article-title>. <source>Annu Rev Sociol</source>. <year>2012</year>;<volume>38</volume>:<fpage>17</fpage>–<lpage>35</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Link</surname> <given-names>BG</given-names></name>, <name name-style="western"><surname>Phelan</surname> <given-names>J</given-names></name>. <article-title>Social conditions as fundamental causes of disease</article-title>. <source>J Health Soc Behav</source>. <year>1995</year>;<fpage>80</fpage>–<lpage>94</lpage>. <object-id pub-id-type="pmid">7560851</object-id></mixed-citation></ref>
<ref id="pdig.0000063.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Novisky</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Nowotny</surname> <given-names>KM</given-names></name>, <name name-style="western"><surname>Jackson</surname> <given-names>DB</given-names></name>, <name name-style="western"><surname>Testa</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Vaughn</surname> <given-names>MG</given-names></name>. <article-title>Incarceration as a fundamental social cause of health inequalities: Jails, prisons and vulnerability to COVID-19</article-title>. <source>Br J Criminol</source>. <year>2021</year>;<fpage>azab023</fpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bobicev</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Sokolova</surname> <given-names>M</given-names></name>. <article-title>Inter-annotator agreement in sentiment analysis: machine learning perspective</article-title>. <source>In: RANLP 2017:Recent Advances in Natural Language Processing Meet Deep Learning</source>. <year>2017</year>;<fpage>97</fpage>–<lpage>102</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ishita</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Oard</surname> <given-names>DW</given-names></name>, <name name-style="western"><surname>Fleischmann</surname> <given-names>KR</given-names></name>, <name name-style="western"><surname>Cheng</surname> <given-names>AS</given-names></name>, <name name-style="western"><surname>Templeton</surname> <given-names>TC</given-names></name>. <article-title>Investigating multi-label classification for human values: Investigating Multi-Label Classification for Human Values</article-title>. <source>Proc Am Soc Inf Sci Technol</source>. <year>2010</year>;<volume>47</volume>(<issue>1</issue>):<fpage>1</fpage>–<lpage>4.3</lpage>.</mixed-citation></ref>
<ref id="pdig.0000063.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Melzi</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Abdaoui</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Azé</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Bringay</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Poncelet</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Galtier</surname> <given-names>F</given-names></name>. <article-title>Patient’s rationale: Patient knowledge retrieval from health forums</article-title>. <source>ETELEMED EHealth Telemed Soc Med</source>. Published online 2014:lirmm-01130720.</mixed-citation></ref>
<ref id="pdig.0000063.ref029"><label>29</label><mixed-citation publication-type="other" xlink:type="simple">Media Cloud [Internet]. Media Cloud. [cited 2021 Aug 4]. Available from: <ext-link ext-link-type="uri" xlink:href="https://mediacloud.org" xlink:type="simple">https://mediacloud.org</ext-link></mixed-citation></ref>
<ref id="pdig.0000063.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Mullinix</surname> <given-names>KJ</given-names></name>, <name name-style="western"><surname>Bolsen</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Norris</surname> <given-names>RJ</given-names></name>. <article-title>The feedback effects of controversial police use of force</article-title>. <source>Polit Behav</source>. <year>2021</year>;<volume>43</volume>(<issue>2</issue>):<fpage>881</fpage>–<lpage>98</lpage>.</mixed-citation></ref>
</ref-list>
</back>
</article>