<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Infodemiology</journal-id>
      <journal-title>JMIR Infodemiology</journal-title>
      <issn pub-type="epub">2564-1891</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v2i2e38756</article-id>
      <article-id pub-id-type="pmid">37113446</article-id>
      <article-id pub-id-type="doi">10.2196/38756</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>COVID-19 Misinformation Detection: Machine-Learned Solutions to the Infodemic</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Ahmed</surname>
            <given-names>Wasim</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Banda</surname>
            <given-names>Juan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Döring</surname>
            <given-names>Nicola</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Kolluri</surname>
            <given-names>Nikhil</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5622-1932</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Yunong</given-names>
          </name>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9169-8552</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Murthy</surname>
            <given-names>Dhiraj</given-names>
          </name>
          <degrees>BA, MSc, DPhil</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Computational Media Lab</institution>
            <institution>School of Journalism and Media, Moody College of Communication</institution>
            <institution>The University of Texas at Austin</institution>
            <addr-line>300 W Dean Keeton (A0900)</addr-line>
            <addr-line>Austin, TX, 78712</addr-line>
            <country>United States</country>
            <phone>1 512 471 5775</phone>
            <email>Dhiraj.Murthy@austin.utexas.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9734-1124</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Computational Media Lab</institution>
        <institution>Department of Electrical and Computer Engineering</institution>
        <institution>The University of Texas at Austin</institution>
        <addr-line>Austin, TX</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Engineering</institution>
        <institution>College of Science and Engineering</institution>
        <institution>University of Edinburgh</institution>
        <addr-line>Edinburgh</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Computational Media Lab</institution>
        <institution>School of Journalism and Media, Moody College of Communication</institution>
        <institution>The University of Texas at Austin</institution>
        <addr-line>Austin, TX</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Dhiraj Murthy <email>Dhiraj.Murthy@austin.utexas.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Jul-Dec</season>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>25</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>2</volume>
      <issue>2</issue>
      <elocation-id>e38756</elocation-id>
      <history>
        <date date-type="received">
          <day>14</day>
          <month>4</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>20</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>8</day>
          <month>8</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Nikhil Kolluri, Yunong Liu, Dhiraj Murthy. Originally published in JMIR Infodemiology (https://infodemiology.jmir.org), 25.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Infodemiology, is properly cited. The complete bibliographic information, a link to the original publication on https://infodemiology.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://infodemiology.jmir.org/2022/2/e38756" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The volume of COVID-19–related misinformation has long exceeded the resources available to fact checkers to effectively mitigate its ill effects. Automated and web-based approaches can provide effective deterrents to online misinformation. Machine learning–based methods have achieved robust performance on text classification tasks, including potentially low-quality-news credibility assessment. Despite the progress of initial, rapid interventions, the enormity of COVID-19–related misinformation continues to overwhelm fact checkers. Therefore, improvement in automated and machine-learned methods for an infodemic response is urgently needed.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to achieve improvement in automated and machine-learned methods for an infodemic response.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We evaluated three strategies for training a machine-learning model to determine the highest model performance: (1) COVID-19–related fact-checked data only, (2) general fact-checked data only, and (3) combined COVID-19 and general fact-checked data. We created two COVID-19–related misinformation data sets from fact-checked “false” content combined with programmatically retrieved “true” content. The first set contained ~7000 entries from July to August 2020, and the second contained ~31,000 entries from January 2020 to June 2022. We crowdsourced 31,441 votes to human label the first data set.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The models achieved an accuracy of 96.55% and 94.56% on the first and second external validation data set, respectively. Our best-performing model was developed using COVID-19–specific content. We were able to successfully develop combined models that outperformed human votes of misinformation. Specifically, when we blended our model predictions with human votes, the highest accuracy we achieved on the first external validation data set was 99.1%. When we considered outputs where the machine-learning model agreed with human votes, we achieved accuracies up to 98.59% on the first validation data set. This outperformed human votes alone with an accuracy of only 73%.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>External validation accuracies of 96.55% and 94.56% are evidence that machine learning can produce superior results for the difficult task of classifying the veracity of COVID-19 content. Pretrained language models performed best when fine-tuned on a topic-specific data set, while other models achieved their best accuracy when fine-tuned on a combination of topic-specific and general-topic data sets. Crucially, our study found that blended models, trained/fine-tuned on general-topic content with crowdsourced data, improved our models’ accuracies up to 99.7%. The successful use of crowdsourced data can increase the accuracy of models in situations when expert-labeled data are scarce. The 98.59% accuracy on a “high-confidence” subsection comprised of machine-learned and human labels suggests that crowdsourced votes can optimize machine-learned labels to improve accuracy above human-only levels. These results support the utility of supervised machine learning to deter and combat future health-related disinformation.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>misinformation</kwd>
        <kwd>machine learning</kwd>
        <kwd>fact-checking</kwd>
        <kwd>infodemiology</kwd>
        <kwd>infodemic management</kwd>
        <kwd>model performance</kwd>
        <kwd>model accuracy</kwd>
        <kwd>content analysis</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Low information quality has led to adverse health outcomes for individuals during the COVID-19 pandemic [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. Claims were being made on social media of dangerous home remedies and perceived preventative measures (eg, gargling with bleach-infused water) [<xref ref-type="bibr" rid="ref4">4</xref>]. Low-quality and biased sources of information can be more alluring to some, as they easily capture attention and offer simpler solutions with unambiguous evidence. Due to their persuasive, “simple” messaging [<xref ref-type="bibr" rid="ref2">2</xref>], these sources can appear more convincing to some because they confirm existing biases or better align with ideological narratives. Information veracity around COVID-19 is fundamentally important to the health outcomes of individuals worldwide [<xref ref-type="bibr" rid="ref5">5</xref>]. For example, the information that has been circulated in social media spaces that masks do not prevent COVID-19 transmission or that wearing a mask is unhealthy [<xref ref-type="bibr" rid="ref6">6</xref>] has been a major issue in terms of increased cases in the United States, but also in India, Brazil, and Turkey. Social media represent a key avenue where COVID-19–related disinformation and misinformation have been disseminated [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>To tackle this misinformation, manual intervention alone is insufficient. Indeed, in the first quarter of 2020 alone, English-language fact checks of COVID-19–related content jumped 900% [<xref ref-type="bibr" rid="ref8">8</xref>]. Despite checks increasing, there are a limited number of fact checkers. Moreover, they cannot check the high volume of content that needs evaluation [<xref ref-type="bibr" rid="ref8">8</xref>]. Thus, creating any interventions for providing automated solutions to evaluate the credibility of COVID-19–related content being circulated remains critical.</p>
        <p>In this study, we importantly compared COVID-19–related, general, and combined data sets for veracity classification applications, and developed a successful bidirectional long short-term memory (Bi-LSTM) machine-learning model (achieving internal and external validation accuracies of 93% and 75%, respectively). When crowdsourced human labels agreed with machine-learned outputs, the accuracy of 90% exceeded that of either approach alone. Our study provides critical, empirical evidence that small amounts of human labeling and machine learning can be an effective infodemic response to health disinformation.</p>
      </sec>
      <sec>
        <title>Misinformation and Disinformation</title>
        <p>Misinformation is defined as “incorrect or misleading information” [<xref ref-type="bibr" rid="ref9">9</xref>]. For example, a family member likely does not have intent to mislead you when they provide misinformation about politics or health, as they believe what they are sharing is actually true. Although misinformation is not inherently intentional, it can also cause real harm, as seen with COVID-19 misinformation being attributed to fatalities [<xref ref-type="bibr" rid="ref10">10</xref>]. Disinformation refers to intentionally and surreptitiously disseminated false information aiming to obscure the truth [<xref ref-type="bibr" rid="ref11">11</xref>]. Although both words refer to incorrect or inaccurate information, only disinformation is intentionally incorrect. A well-known example of a disinformation campaign is the 2016 Russian hacking of the Hillary Clinton campaign, and distribution of politically damaging propaganda on Facebook, Twitter, YouTube, and Instagram [<xref ref-type="bibr" rid="ref12">12</xref>]. Russia’s social media disinformation campaign was found to have likely influenced the 2016 US election [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
      </sec>
      <sec>
        <title>COVID-19 and Social Media</title>
        <p>Early COVID-19–related research was critical in documenting keywords, topics that were emerging, as well as temporal patterns [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. Some work specifically highlighted instances of rumors [<xref ref-type="bibr" rid="ref17">17</xref>], racism against individuals of Asian descent, and released data sets [<xref ref-type="bibr" rid="ref18">18</xref>]. Other studies documented COVID-19–related misinformation and disinformation [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. This work found that misinformation was widely diffused, which included that neem leaves can cure coronavirus [<xref ref-type="bibr" rid="ref20">20</xref>], certain ethnic and racial groups were immune (particularly if they had darker skin), individuals in warmer countries would not be affected, and the disease was no more harmful than the common flu [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>Other studies used machine-learned methods to try to classify misinformation and disinformation that was being circulated online [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. By training machine-learned classifiers on labeled misinformation and disinformation data sets, these approaches were able to achieve accuracy ranging from 16.7% to 96% as measured by F1 scores. Early work was mostly focused on deploying rapid results rather than optimizing classifiers for the best accuracy to COVID-19–specific misinformation and disinformation. The presumption was that there would be a reasonable similarity of misinformation detection approaches more broadly with the misinformation being spread during COVID-19. As studies emerged, it became clear that COVID-19–specific data sets and platforms were needed.</p>
      </sec>
      <sec>
        <title>COVID-19–Related Misinformation Data Sets, Machine Learning, and Automated Detection</title>
        <p>Due to the vast amount of COVID-19–related information circulating in public domains, automatic machine-learned identification and classification remains a critical method for detecting harmful content at scale. Six machine-learning algorithms with ensemble learning were used to study COVID-19–related Twitter data [<xref ref-type="bibr" rid="ref25">25</xref>]. Combinations of several machine-learning approaches and natural language processing (NLP) are being used to develop large-scale misinformation detection. For example, ReCOVery, a repository for COVID-19 news credibility checking, evaluates various machine-learned methods [<xref ref-type="bibr" rid="ref26">26</xref>]. One of the key issues hindering machine-learned methods remains the lack of large, verified, and labeled misinformation data sets [<xref ref-type="bibr" rid="ref27">27</xref>]. A reason for this lack is that robust labeled data sets require involvement of humans with specific domain knowledge. Moreover, misinformation is a diverse dynamic phenomenon that changes rapidly [<xref ref-type="bibr" rid="ref28">28</xref>]. Additionally, there remains a dearth of automated solutions that are scalable to incorporate content from multiple platforms. Although global studies indicate a high prevalence of misinformation (which disproportionately impacts low-income countries) [<xref ref-type="bibr" rid="ref29">29</xref>], currently available data sets may not be large enough to be scalable [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
        <p>To help address this gap, FakeCovid is a database of 5182 fact-checked news articles that uses 40 languages from 105 countries and classifies data using machine learning [<xref ref-type="bibr" rid="ref31">31</xref>]. COVIDLIES is another database comprising 6761 expert-annotated COVID-19–related tweets [<xref ref-type="bibr" rid="ref22">22</xref>]. Effective NLP methodology has also been used for detecting COVID-19 misinformation through YouTube videos by studying user comments [<xref ref-type="bibr" rid="ref23">23</xref>]. More than 100 million Twitter messages have been collected and classified to build the “Infodemic Risk Index” to estimate the magnitude of exposure to misinformation across various regions and countries [<xref ref-type="bibr" rid="ref2">2</xref>]. A manually labeled data set related to COVID-19 misinformation was released [<xref ref-type="bibr" rid="ref32">32</xref>]. COVID-19–specific data sets have also been developed with non-English–language content, including Arabic [<xref ref-type="bibr" rid="ref33">33</xref>], Portuguese [<xref ref-type="bibr" rid="ref34">34</xref>], Italian [<xref ref-type="bibr" rid="ref35">35</xref>], Chinese [<xref ref-type="bibr" rid="ref36">36</xref>], and multiple Indic languages [<xref ref-type="bibr" rid="ref37">37</xref>]. Machine-learned approaches have also been developed to complement manually labeled data sets related to COVID-19 [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
      </sec>
      <sec>
        <title>Machine-Learning Methods for Text Classification</title>
        <p>NLP applications for text classification include news categorization, sentiment analysis, emotion detection, and authorship attribution [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Most classical machine-learning models in text classification tasks extract features (eg, bag of words) from the documents and then feed them to a classifier to make a prediction [<xref ref-type="bibr" rid="ref38">38</xref>]. Note that, following prior works [<xref ref-type="bibr" rid="ref40">40</xref>], we use the word “classical” to describe traditional supervised and unsupervised machine-learning methods.</p>
        <p>The classical machine-learning models have some limitations, including tedious feature engineering in the process to extract hand-crafted features and the fact that they are difficult to generalize to new tasks due to their strong reliance on domain knowledge when designing features [<xref ref-type="bibr" rid="ref38">38</xref>]. Deep-learning models make use of embedding models to map text into a feature vector with lower dimensions, thus limiting the need to rely on hand-crafted features (which often require domain knowledge) [<xref ref-type="bibr" rid="ref38">38</xref>]. ELMo [<xref ref-type="bibr" rid="ref41">41</xref>], a 3-layer Bi-LSTM model with 93 million parameters developed in 2017, achieved better performance than the previous most popular word2vec models [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>] developed by Google in 2013. In 2018, OpenAI developed Generative Pre-trained Transformer (GPT) [<xref ref-type="bibr" rid="ref42">42</xref>], and Google developed Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref43">43</xref>], which inspired the creation of several different pretrained language models (PLMs) of large size based on transformers [<xref ref-type="bibr" rid="ref38">38</xref>]. For example, XLNet, a generalized autoregressive pretraining method, allows for the learning of bidirectional contexts, and its autoregressive formulation overcomes some limitations of BERT [<xref ref-type="bibr" rid="ref44">44</xref>]. Moreover, Facebook developed RoBERTa [<xref ref-type="bibr" rid="ref45">45</xref>], which is trained on a larger data set than BERT. Large models based on transformers, including BERT, RoBERTa, and XLNet, achieved a high level of success in many NLP tasks [<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref45">45</xref>].</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>The objective of this study was to ameliorate the impact of online misinformation through automated, machine-learned, and scalable methods. Our study sought to answer the following three core research questions (RQs):</p>
        <p><italic>RQ1</italic>: Can approaches leveraging automated and scalable strategies such as machine learning, information retrieval, and crowdsourcing help combat misinformation when information growth exceeds fact-checker capabilities?</p>
        <p><italic>RQ2</italic>: Does training a machine-learning model on only COVID-19–related misinformation data, only on general misinformation data, or on both result in the highest performance on COVID-19–related data?</p>
        <p><italic>RQ3</italic>: Does combining crowdsourced labels with machine-learning model outputs improve accuracy over either approach individually?</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Machine-Learned Classification</title>
        <p>We first developed a classifier using the CoAID data set [<xref ref-type="bibr" rid="ref46">46</xref>]; specifically, the 05-01-2020 and 07-01-2020 folders of the CoAID data set were used. Since there are more pieces of news deemed to be accurate (“true”) than those deemed to be inaccurate (“false”), we included all inaccurate news, but limited the quantity of true news to be equal to the amount of false news to have a balanced data set. For the Bi-LSTM model, we split our input data into a training set (75%) and test set (25%). Pandas [<xref ref-type="bibr" rid="ref47">47</xref>] and scikit-learn [<xref ref-type="bibr" rid="ref48">48</xref>] were used in our classifier development and implementation.</p>
        <p>We evaluated different architectures, dropouts, activation functions, optimizers, regularizers, and batch sizes. We ultimately chose an embedding layer, Bi-LSTM layer, Dropout layer with a rate of 0.7, and Dense layer with a 1-dimensional output and sigmoid activation function. We used an Adam optimizer with a learning rate of 0.0001, binary cross-entropy loss, and a batch size of 1. The Bi-LSTM model has a kernel regularizer with <italic>l</italic><sub>1</sub> and <italic>l</italic><sub>2</sub> regularization factors of 1e-5 and 1e-4, respectively. In addition, we employed several state-of-the-art models for text classification, including PLMs such as BERT, RoBERTa, and XLNet. We selected RoBERTa, as it is an optimized BERT approach, and XLNet, as it is an autoregressive BERT-like model. We employed four transformers: BERT-base [<xref ref-type="bibr" rid="ref43">43</xref>], XLNet [<xref ref-type="bibr" rid="ref44">44</xref>], and two models fine-tuned on RoBERTa-base [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>] for this specific classification task on the 7 data sets described in <xref ref-type="table" rid="table1">Table 1</xref> for 3 epochs with default training arguments in HuggingFace Trainer [<xref ref-type="bibr" rid="ref51">51</xref>]. Moreover, we trained a convolutional neural network (CNN) model for text classification [<xref ref-type="bibr" rid="ref52">52</xref>], as this method has been extensively used in text classification [<xref ref-type="bibr" rid="ref38">38</xref>].</p>
        <p>All source code files for our models are publicly available as open source [<xref ref-type="bibr" rid="ref53">53</xref>].</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Data set sources and specifications.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="230"/>
            <col width="140"/>
            <col width="140"/>
            <col width="90"/>
            <col width="80"/>
            <col width="0"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Data set</td>
                <td>Source</td>
                <td>Time range</td>
                <td colspan="4">Size (number of articles)</td>
                <td>Type</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Noncredible news</td>
                <td>True news</td>
                <td>Total</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>CoAID<sup>a</sup></td>
                <td>Tweets</td>
                <td>Until May 1, 2020</td>
                <td>572<break/>  <break/>  </td>
                <td>1324</td>
                <td>1896</td>
                <td colspan="2">COVID-19–specific</td>
              </tr>
              <tr valign="top">
                <td>FNN<sup>b</sup></td>
                <td>PolitiFact</td>
                <td>N/A<sup>c</sup></td>
                <td>472</td>
                <td>797</td>
                <td>1270</td>
                <td colspan="2">General news</td>
              </tr>
              <tr valign="top">
                <td>FNN</td>
                <td>Gossip Cop</td>
                <td>N/A</td>
                <td>16,818</td>
                <td>5335</td>
                <td>22,153</td>
                <td colspan="2">General news</td>
              </tr>
              <tr valign="top">
                <td>Validation data set 1<sup>d</sup></td>
                <td>Poynter.org (noncredible news); Washington Post, Associated Press, Politico (true news)</td>
                <td>July 20, 2020, to August 8, 2020</td>
                <td>3874</td>
                <td>3177</td>
                <td>7051</td>
                <td colspan="2">COVID-19–specific</td>
              </tr>
              <tr valign="top">
                <td>Validation data set 2<sup>d</sup></td>
                <td>Poynter.org (noncredible news); BBC, AXIOS, CBS News, The Globe and Mail (true news)</td>
                <td>January 20, 2020, to June 15, 2022</td>
                <td>14,398</td>
                <td>16,232</td>
                <td>30,630</td>
                <td colspan="2">COVID-19–specific</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Only the 05-01-2020 folder of the CoAID data set was used.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>FNN: FakeNewsNet.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>Scraped with the query term “COVID-19.”</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Evaluation</title>
        <p>To develop our external validation data sets, we used data from Poynter [<xref ref-type="bibr" rid="ref54">54</xref>], which had several thousand instances of COVID-19–related content with a “false” label. For “true” news, we inherited article accuracy from the credibility of the media source on which the documents were published, following an approach similar to the ReCOVery [<xref ref-type="bibr" rid="ref26">26</xref>] and CoAID [<xref ref-type="bibr" rid="ref46">46</xref>] COVID-19–related data sets. We created two external validation data sets with different “true” news sources to test the generalization ability of the models. The first external validation data set consists of ~4000 pieces of false-news content scraped from Poynter and ~3000 pieces of true-news content collected from several news outlets that we deemed to be reliable by inheriting source credibility. We used NewsAPI’s application programming interface [<xref ref-type="bibr" rid="ref55">55</xref>] to retrieve content from the following news outlets: Reuters, BBC, The Wall Street Journal, The Washington Post, Associated Press, and Politico. We searched for articles from July 20, 2020, to August 8, 2020, with the query term “COVID-19.” With these parameters, we queried just over 3000 news articles and stored their labels, titles, sources, descriptions, URLs, and publication dates. The second external validation data set consists of ~14,000 pieces of noncredible news scraped from Poynter in the time range from March 20, 2020, to February 23, 2022, and ~16,000 pieces of true news scraped from BBC, AXIOS, CBS News, and The Globe and Mail with the query term “COVID-19” in the time range from January 20, 2020, to June 15, 2022. In total, after removing elements due to nonapplicable Poynter labels, the first data set had 7051 labeled pieces of COVID-19–related content within the time range from July 20, 2020, to August 8, 2020, and the second data set had 30,630 pieces of COVID-19–related content within the time range from January 20, 2020, to June 15, 2022.</p>
        <p>We developed methods to evaluate whether training a machine-learning model on only COVID-19–related misinformation data, only on general misinformation data, or on both would result in the highest performance on new, unseen COVID-19 data sets. When evaluating general data sets, FakeNewsNet (FNN) [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>] provided a data format matching our needs and with a sufficient volume for the scale of our training. For COVID-19–related data, we found that CoAID, a COVID-19 health care misinformation data set, with 1896 news articles, 183,564 related user engagements, 516 social platform posts about COVID-19, and ground truth labels [<xref ref-type="bibr" rid="ref46">46</xref>], allowed us to achieve high internal validation accuracy in preliminary trials. To be as consistent across the two data sets as possible, we drew from standard benchmarking practices performed on data sets using default machine-learning model implementations. We trained on 7 different combinations of data sources to mimic different situations in the real world: (1) only CoAID, used to mimic the situation when sufficient topic-specific data are available; (2) partial (using only the 05-01-2020 folder of the CoAID data set) CoAID and FNN; (3) partial CoAID and PolitiFact; (4) partial CoAID and the GossipCop content from FNN, used to mimic the situation when we have a limited quantity of topic-specific data; (5) FNN; (6) PolitiFact; and (7) GossipCop, used to mimic the situation when no topic-specific data are available. For three classical models (support vector machine [SVM], logistic regression [LR], and Bernoulli naïve Bayes [BNB]) and six deep-learning models (Bi-LSTM, BERT-based model, two RoBERTa-based models [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>], XLNet [<xref ref-type="bibr" rid="ref44">44</xref>], and Text-CNN [<xref ref-type="bibr" rid="ref52">52</xref>]) on all seven data source combinations, we computed precision, recall, and F1-score for both internal validation and the two external validation data sets described above. These were taken as a weighted average of both labels and rounded to the nearest hundredth, as detailed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>-<xref ref-type="supplementary-material" rid="app3">3</xref>, and are available as a CSV file on our data repository [<xref ref-type="bibr" rid="ref53">53</xref>].</p>
      </sec>
      <sec>
        <title>Ethics Considerations</title>
        <p>The University of Texas at Austin Institutional Review Board (IRB) approved this study for human subjects research on April 20, 2021 (STUDY00000962). Informed consent from all study participants was obtained.</p>
      </sec>
      <sec>
        <title>Crowdsourced Classification</title>
        <p>We recruited annotators from the crowdsourcing platform Prolific to vote on pieces of news content from the data set we created. On Prolific, we set the study distribution to “standard sample,” which launched the study to the whole participant pool [<xref ref-type="bibr" rid="ref58">58</xref>]. In line with the IRB protocol, we limited voting to US residents only. We established approximately 10 rounds of Prolific tasks with each participant being paid varying amounts of ~$8 an hour, which resulted in 31,441 votes from 756 voters.</p>
        <p>After completing the crowdsourced voting, we then processed the data both manually and with Python scripts for usability. We removed duplicate votes for the same label (two “true” votes) and votes from Prolific IDs that we could not find in the set of IDs reported to us by Prolific. The processed data set had more than 6800 pieces of content with at least 3 votes for either the “true” or “false” label. We took the initial ground truth labels from Poynter and credible news sources and mapped them to 0 or 1. “True” was coded as 1 and “false” was coded as 0. Additionally, “correct” labels were coded as 1 (2 labels), and all other labels were converted to 0 (690 labels). Mapping our labels to 0 or 1 allowed us to collect certain metrics for our data set. Some examples from the crowdsourced data set are provided in <xref ref-type="table" rid="table2">Table 2</xref> (also see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Voter soft labels of 0.0 or 1.0 indicate that the vote results are concordant (ie, all votes were for the same label), whereas a voter soft label range of 0.4-0.6 implies that (nearly) half of the voters have different opinions.</p>
        <p>We also computed the percentage of agreeing decisions, which we defined as the probability that the label decided on by the crowdsourced votes was the same as the ground truth label. The percentage of agreeing decisions (human voter accuracy) was ~0.73, or 73%. We also calculated interannotator agreements to determine the agreement among voters. As the number of voters varied (from 3 to 7) for each piece of news content, Cohen and Fleiss κ statistics were not suitable for our data set. We therefore computed the percent agreement between users to determine interrater reliability (68.5%) for our data. As percent agreement does not take chance agreement into consideration, we calculated Krippendorff <italic>α</italic> (0.428). As percent agreement is considered to be acceptable when above 75% [<xref ref-type="bibr" rid="ref59">59</xref>] and <italic>α</italic> is “acceptable at 0.667≤<italic>α</italic>≤0.823 and unacceptable at <italic>α</italic>&lt;0.667” [<xref ref-type="bibr" rid="ref60">60</xref>], there was low agreement among all voters in the crowdsourced data. Ultimately, crowdsourced voters had low accuracy (~73%) when identifying COVID-19–related noncredible content, and there was a high level of disagreement among them. Given that this data set was not used as the ground truth, but rather to evaluate whether labeled data from nonexperts could improve model performance, low agreement is not an issue for our use case. Moreover, this low agreement indicates that nonprofessionals respond to misinformation differently rather than consistently.</p>
        <p>Given this high level of variability, we next evaluated whether our crowdsourced data could actually improve machine-learning model predictions. With this in mind, we developed and answered the following questions: (1) Which model best predicted crowdsourced labels? (2) Can model performance be improved after being blended with crowdsourced labels? (3) Which model performs best when blended with crowdsourced labels? (4) If we only take the subset of the data set where machine-learning models and human votes have agreeing labels, will the performance of prediction be improved?; if so, which model has the highest performance?</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Examples from the crowdsourced data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="480"/>
            <col width="70"/>
            <col width="80"/>
            <col width="60"/>
            <col width="60"/>
            <col width="220"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">News title</td>
                <td>Ground truth</td>
                <td>Voter soft label<sup>a</sup></td>
                <td>Voter label</td>
                <td>Total votes</td>
                <td>Results</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="7">
                  <bold>Concordant human votes</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>COVID-19 pandemic derails Germany’s push for migrant integration-Reuters</td>
                <td>1</td>
                <td>1.0</td>
                <td>1</td>
                <td>3</td>
                <td>Correctly classified by humans</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Photo shows the last meeting of a Turkish doctor who died due to COVID-19 with his child in Munich</td>
                <td>0</td>
                <td>1.0</td>
                <td>1</td>
                <td>4</td>
                <td>Misclassified by humans</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>3M brings on another lobbying firm</td>
                <td>1</td>
                <td>1.0</td>
                <td>1</td>
                <td>5</td>
                <td>Correctly classified by humans</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Video shows that the Italian government/Brisbane police used zombie robots/drones to chase their citizen and make them stay home</td>
                <td>0</td>
                <td>0.0</td>
                <td>0</td>
                <td>4</td>
                <td>Correctly classified by humans</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>British vaccine provokes immune response in first human studies</td>
                <td>1</td>
                <td>0.0</td>
                <td>0</td>
                <td>3</td>
                <td>Misclassified by humans</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>This video shows a woman eating a bat soup in Wuhan</td>
                <td>0</td>
                <td>0.0</td>
                <td>0</td>
                <td>5</td>
                <td>Correctly classified by humans</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Discordant human votes</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>An emergency department closed in a Spanish hospital</td>
                <td>0</td>
                <td>0.5</td>
                <td>1</td>
                <td>6</td>
                <td>Misclassified by human</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Majority of Caledonian hotel jobs under review in Edinburgh</td>
                <td>1</td>
                <td>0.5</td>
                <td>1</td>
                <td>4</td>
                <td>Correctly classified by humans</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>England v Ireland: Captain Eoin Morgan relishes 'new journey' in ODI series</td>
                <td>1</td>
                <td>0.6</td>
                <td>1</td>
                <td>5</td>
                <td>Correctly classified by humans</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Panic scene in Germany with people rushing into a supermarket</td>
                <td>0</td>
                <td>0.4</td>
                <td>0</td>
                <td>5</td>
                <td>Correctly classified by humans</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Voter soft label is calculated by the number of true labels/total votes.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Machine-Learned Classification</title>
        <p>RQ1 asks whether automated systems can help combat COVID-19–related misinformation. We found that machine learning predicts veracity better than random. We developed a Bi-LSTM model trained on the CoAID data set. Specifically, we used 1257 entries from CoAID for training and tested our model on 419 entries from CoAID. We achieved a weighted average F1-score of 0.93 (with equal precision, recall, and accuracy) across both labels. Using the same model, the external validation results on our data set was an F1-score of 0.75, with equal precision, recall, and accuracy. In addition, we fine-tuned BERT-base, RoBERTa-fake-news, Fake-News-BERT-Detect, XLNet, and trained Text-CNN on 7 data set combinations and tested them on the two external validation data sets. The results are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>-<xref ref-type="supplementary-material" rid="app2">2</xref>. We achieved accuracies of up to 91%, 93%, 97%, 94%, and 87% on the first external validation data set from BERT-base, RoBERTa-fake-news, Fake-News-BERT-Detect, XLNet, and trained Text-CNN, respectively. Accuracies of up to 93%, 84%, 93%, 91%, and 85% were achieved on the second external data sets from the same models. Given these results, RQ1 can be answered in the affirmative.</p>
      </sec>
      <sec>
        <title>Data Evaluation</title>
        <p>RQ2 asks whether training a machine-learning model on only COVID-19–related misinformation data, on only general misinformation data, or on both results in the highest performance on COVID-19–related data. We found that machine-learned models benefit from COVID-19–related data. Specifically, after training on 7 different data sets (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>-<xref ref-type="supplementary-material" rid="app3">3</xref>), RQ2 can be answered as follows: for classical models, the combination of topic-specific and general-topic data results in the best performance; however, pretrained models benefit from purely topic-specific data the most. In this study, we investigated the efficacy of three scenarios: (1) training on COVID-19–related misinformation, (2) training on non-COVID-19–related misinformation, and (3) training on both COVID-19–related misinformation and non-COVID-19–related misinformation. Our results indicate that including COVID-19–related misinformation (in our case CoAID data) helped—or, at least, maintained—model performance.</p>
        <p>Examples of classical classification models include LR, SVM, BNB, hidden Markov model, and random forests [<xref ref-type="bibr" rid="ref39">39</xref>]. In our experiment, classical models used included LR, SVM, and BNB. All three classical models shown in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> achieved the best accuracy when trained on the combination of CoAID and PolitiFact, whereas for deep-learning pretrained models, which have already “studied” the behavior of the English language, the best model performance was obtained when fine-tuned on CoAID only (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>-<xref ref-type="supplementary-material" rid="app3">3</xref>). In instances where we are lacking additional COVID-19–related misinformation content, our findings suggest that incorporation of prior misinformation data sets in conjunction with COVID-19–specific misinformation data sets could potentially be useful to detect new COVID-19–related misinformation when using classical models. However, using PLMs (eg, BERT), which normally have much better performance on language tasks than classical models, fine-tuning on a topic-specific data set tended to give a better result. By combining COVID-19–related (ie, CoAID) and broad, multitopic misinformation data sets (ie, FNN, GossipCop, and PolitiFact), we evaluated the performance of our machine-learning models. Combining labeled data sets from different sources coupled with various machine-learning models is a novel contribution of our study in terms of producing a scalable and generalizable framework. As detailed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>-<xref ref-type="supplementary-material" rid="app3">3</xref>, we found that the accuracy of models where we used only GossipCop data sets was very low. The lowest BNB accuracy we obtained (0.37) was also obtained for GossipCop, indicating the important role that labeled data sets play in the validity of misinformation detection. As GossipCop is considered a credible source of celebrity news, the labeled data sets of GossipCop are specific and have limited value to COVID-19 misinformation detection on their own. Conversely, combining CoAID and GossipCop as the input data to train our models significantly improved the accuracy (0.64) for the BNB model (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). As the best result, an accuracy of 96.55% was achieved when we fine-tuned Fake-News-BERT-Detect using only the CoAID data set (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). With these findings, RQ2 can be answered positively.</p>
      </sec>
      <sec>
        <title>Crowdsourced Classification</title>
        <p>RQ3 asks whether combining crowdsourced labels with machine-learning model outputs improves accuracy over either approach individually. We found that combining human votes with machine-learned outputs allowed us to create higher performance models. Specifically, deep-learning models are able to predict human votes at an accuracy up to 70%. Combining human votes with machine-learned outputs allowed us to create a model with 99.1% accuracy. We achieved accuracy up to 98.59% when only considering the subset where model and human votes agreed.</p>
        <p>We first evaluated how well our models could predict our crowdsourced values or the labels we generated from our Prolific labeling (see <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>-<xref ref-type="supplementary-material" rid="app9">9</xref>). A label of 0 indicates that most voters voted false, while a label of 1 indicates that greater than or equal to half of the voters voted true. Using the models trained on the 7 data set combinations and testing on our data set of 7051 votes, the success at predicting the crowdsourced values from Prolific had accuracies up to 0.70 (see <xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>). All values were rounded to the nearest hundredth.</p>
        <p>Second, we blended the soft predictions (ie, probabilities) from the models and soft vote (combining the probabilities of each prediction in contrast to hard voting, which chooses the prediction that receives the most votes) results from crowdsourcing data in different proportions to assess both the maximum improvements and highest accuracies that can be achieved after blending. The soft vote results were computed by taking the number of votes for label 1 (credible) and dividing by the number of total votes. The results shown in <xref ref-type="table" rid="table3">Table 3</xref> (predictions from blended models) were calculated by the following formula:</p>
        <disp-quote>
          <p>a×(soft predictions from model)+(1–a)×(soft vote results from crowdsourcing data)</p>
        </disp-quote>
        <p><xref ref-type="table" rid="table3">Table 3</xref> illustrates that models had higher accuracy on average after blending, and the highest accuracy we achieved was 99.1% on the first external validation data set (when blending 10% of user vote results with 90% of the machine-learning model prediction). Therefore, we found that models trained on general news were improved. Those models achieved much higher accuracies (up to 99.7%) after blending with user vote results. This represents a considerable improvement over the human vote accuracy of ~73%. As shown in <xref ref-type="table" rid="table3">Table 3</xref>, when a=0.9, the performance of Text-CNN trained on GossipCop could be improved from 42.6% to 99.1% after blending with crowdsourced data.</p>
        <p>Third, as discussed in the Machine-Learned Classification section above, the machine-learning models had accuracies ranging from 41% to 98% and the human votes had approximately 73% accuracy. Out of the 7051 pieces of content, 39.24%-69.58% (for the best-performing model) showed agreement in both the human votes and the machine-learning model. We were therefore able to make reduced sets of 2766 to 4906 pieces of content. For each piece of content, we assigned its label to whichever value both the machine-learning model and human votes agreed on. Using this approach, our best accuracy was 98.59% (see <xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>), which was from the Fake-News-BERT-Detect model fine-tuned on the CoAID data set. This is in comparison with an accuracy of 73% for human votes and 96.55% for the entire validation data set. All models achieved the best performance when the models were previously fine-tuned on COVID-19–specific data sets (ie, CoAID).</p>
        <p>The performance of models trained/fine-tuned on a general-topic data set could be improved with crowdsourced data (eg, in low-data situations such as pandemics). Specifically, the base model achieved an accuracy of 71.01% on the whole validation data set. For example, for the subset, we achieved an accuracy of 89.96% at best (by BERT-base fine-tuned on PolitiFact). In addition, models trained on the combination general-topic and COVID-19–specific data set were also improved by this approach. Specifically, accuracies of up to 89.93% on the whole data sets (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) were improved to up to 96.26% (for the subset). Practically speaking, both credibility tests could be applied to a piece of content and receive a label of “true” or “false” with up to 98.59% accuracy. Combining human votes with machine-learned outputs therefore outperformed models with human votes alone. Our response to RQ3 is that both blending crowdsourced labels with model predictions and reducing the data set to a “high-confidence” data subset increased model performance.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Analysis of accuracy for blended models, evaluated on the first external validation data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="260"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Metric</td>
                <td colspan="2">a=0.9</td>
                <td colspan="2">a=0.7</td>
                <td colspan="2">a=0.5</td>
                <td colspan="2">a=0.3</td>
                <td>a=0.1</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Average improvement</td>
                <td colspan="2">0.069</td>
                <td colspan="2">0.082</td>
                <td colspan="2">0.084</td>
                <td colspan="2">0.063</td>
                <td colspan="2">0.029</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>Maximum improvement</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Maximum improvement</td>
                <td colspan="2">0.565</td>
                <td colspan="2">0.562</td>
                <td colspan="2">0.463</td>
                <td colspan="2">0.385</td>
                <td colspan="2">0.415</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Model name</td>
                <td colspan="2">Text-CNN trained on GossipCop</td>
                <td colspan="2">Text-CNN trained on GossipCop</td>
                <td colspan="2">Text-CNN trained on GossipCop</td>
                <td colspan="2">Text-CNN trained on GossipCop</td>
                <td colspan="2">Fake-News-BERT-Detect fine-tuned on GossipCop</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Model accuracy (before blending)</td>
                <td colspan="2">0.426</td>
                <td colspan="2">0.426</td>
                <td colspan="2">0.426</td>
                <td colspan="2">0.426</td>
                <td colspan="2">0.302</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Model accuracy (after blending)</td>
                <td colspan="2">0.991</td>
                <td colspan="2">0.981</td>
                <td colspan="2">0.889</td>
                <td colspan="2">0.804</td>
                <td colspan="2">0.717</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>Best performance</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Model name</td>
                <td colspan="2">Text-CNN trained on CoAID</td>
                <td colspan="2">Text-CNN trained on CoAID</td>
                <td colspan="2">Text-CNN trained on CoAID and PolitiFact</td>
                <td colspan="2">Text-CNN trained on GossipCop</td>
                <td colspan="2">Text-CNN trained on PolitiFact</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Model accuracy (before blending)</td>
                <td colspan="2">0.874</td>
                <td colspan="2">0.874</td>
                <td colspan="2">0.798</td>
                <td colspan="2">0.426</td>
                <td colspan="2">0.499</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Model accuracy (after blending)</td>
                <td colspan="2">0.991</td>
                <td colspan="2">0.984</td>
                <td colspan="2">0.891</td>
                <td colspan="2">0.804</td>
                <td colspan="2">0.728</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>Our results indicate that RQ1 (which asks whether automated systems and scalable strategies can help combat misinformation) can be answered in the affirmative. The models we trained showed an accuracy of 98% on our first external validation data set (of ~7000 posts and true news from July 20, 2020, to August 8, 2020) and an accuracy of 93% on our second validation data set (of ~15,000 posts and true news from January 20, 2020, to June 15, 2022). Labeling by fact-checkers can be time-consuming, labor-intensive, and expensive, whereas machine-learning models can be used at will and at scale once trained. These results support our finding that machine learning significantly improves fact checking given the reality that human fact-checkers are overburdened and cannot feasibly keep up with the increasing volume of online misinformation.</p>
        <p>Regarding RQ2 (which asks what kind of data set is most helpful to machine learning), we found that training/fine-tuning on pandemic-specific content tends to result in higher accuracy. Specifically, our best-performing models were fine-tuned on COVID-19 topic content only. We evaluated three classical models and five deep-learning models trained on seven different data sets, including one topic-specific data set (CoAID only), three general-topic data sets (FNN, GossipCop, and PolitiFact), and three combinations of topic-specific and general-topic data sets (CoAID and FNN, GossipCop and CoAID, PolitiFact and CoAID). Classical models achieved the best accuracy when trained on a combination of general-topic and COVID-19–specific data (the combination of CoAID and PolitiFact), while deep-learning PLMs (eg, BERT), which have already been trained on English-language text and therefore could be considered as having “studied” the behavior of the English language, obtained the best model performance when fine-tuned on a COVID-19–specific data set (ie, CoAID).</p>
        <p>Regarding RQ3, which asks whether combining crowdsourced labels with models can improve model performance, we found that blending crowdsourced labels with model predictions increased model performance. The blended model (crowdsourced votes mixed with a machine-learning model) was able to achieve an accuracy of 99.1%. Given that the accuracy of crowdsourced votes was 73% and the highest accuracy of our machine-learning models was 96.55%, our results therefore show that crowdsourcing can be used in conjunction with machine learning to boost accuracy. In addition, models trained on general news could be improved to achieve much higher accuracies after blending with user vote results. Specifically, we found improvements of up to 57.1% after blending (see <xref ref-type="table" rid="table3">Table 3</xref>). That being said, the performance of models trained/fine-tuned on a general-topic data set could only be improved when considering the subset. With neither crowdsourcing nor machine learning requiring time from expert fact-checkers, both are viable options for addressing COVID-19 and other health-related misinformation at scale.</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>Future work can further optimize our machine-learning model and extend and develop our labeled data set. Moreover, we hope that our findings encourage others to develop COVID-19–specific disinformation and misinformation data sets. As the quantity of COVID-19–related labeled data increases, the combination of COVID-19–related labeled data and general misinformation data should be further evaluated and benchmarked by others to enhance machine-learning model accuracy. Our results would therefore benefit from replication in future work with a data set consisting of both COVID-19–related and broad, multitopic content. Since we only crowdsourced votes for the first external validation data set (which spans one month), future work could crowdsource vote results on the second validation data set to strengthen the validity of our conclusions. Furthermore, the size of the crowdsourcing data set is relatively small (31,441 pieces of content and 4.46 average votes each), which could be strengthened with the accumulation of more votes and would increase the generalizability of our results. Thus, future work would benefit from extending our framework to a larger crowdsourced data set. Since collecting crowdsourced data could be time-consuming, using machine-learning models to generate pseudohuman votes can potentially be another way to strengthen the crowdsourced data set. After collecting crowdsourced data for a small news data set, the pseudohuman votes model trained on that data set can be used to predict human labels on a larger data set. This method would be especially useful with unlabeled news data sets, on which we could simulate human votes in the absence of ground truth labels.</p>
        <p>Future work could also measure whether there are sufficient advantages of using machine-learning models rather than expert fact-checkers (given that the former method allows for cheaper and quicker large-scale data labeling). There is also the possibility that machine-learning models and professional fact-checkers combined together could deliver better results. For example, fact-checkers could use models to flag news to speed up their work, and the results from fact-checkers could be used to refine models. Human-in-loop models could be developed by using this method. A live news browser displaying news alongside fact-checker results or model predictions (if no fact-checker is available) could help assess credibility even when there is more misinformation than experts can check manually. Lastly, future work could further examine the relationship between crowdsourced outputs and ground truth labels for COVID-19–related data, a line of inquiry we minimally investigated in this study. Specifically, future work could examine when humans are more likely to make misjudgments by exploring the scenarios in which crowdsourced and ground truth labels are most likely to disagree. Research could explore crowdsourced data in different problem domains to identify the misinformation in problem domains that interventions should pay most attention to, using metrics such as the disagreement between human votes and ground truth labels.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>A limitation of our work is that our study did not rigorously test the ceiling of possible model optimization on all combinations of FNN and CoAID models. Another minor limitation is that we assigned “false” to all labels (except two “correct” labels) in the Poynter data set when evaluating our model, even though a small portion of labels could be interpreted as true (&lt;0.5% with labels such as “half true” and “mostly true”). The crowdsourced data set quality was potentially limited due to the number of votes per item and the time span of the labeled data set. Lastly, we were only able to crowdsource votes for the first external validation data set due to time and funding constraints.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Manual fact checking is unable to cope with the large volumes of COVID-19–related misinformation that now exists [<xref ref-type="bibr" rid="ref8">8</xref>]. To help address the proliferation of COVID-19–related misinformation, we developed an automated, machine-learned, and scalable approach. Since the best-performing models we evaluated were fine-tuned on COVID-19–specific content only, topic-specific data sets are much more helpful than general-topic data sets or the combination of the two. The 96.55% and 94.6% accuracy on the first and second external validation data set, respectively, suggest that machine learning can be used to achieve significantly better than random results for the difficult task of determining the veracity of COVID-19–related content. Our study also found that in the cases when only considering the reduced set of the content that both human votes and model outputs agreed on, the models achieved up to 99.1% accuracy. Models trained/fine-tuned on general-topic content can be improved to an acceptable level after combining with human votes, and may be used to supplement limited amounts of topic-specific content in low-data situations (eg, pandemics) to increase accuracy.</p>
        <p>Our findings also suggest that machine-learning models can be augmented with the labels of lay, crowdsourced voters to boost accuracy without additional input from expert fact-checkers. Blending human votes with model prediction results achieved an accuracy up to 99.1% (by combining 10% of a human vote label with 90% of a label from the model). We have released our topic-related data set of 7000 ground truth and crowdsourced labels, machine-learning model, and code in open-source form to promote the development by others of automated, scalable solutions to the COVID-19 infodemic.</p>
        <p>COVID-19 infodemic responses need to acknowledge that misinformation can be amorphous and highly decentralized. The machine-learned and automated approaches developed in this study rely on text features, making them powerful in that they can be extended (eg, by researchers or technology companies) to study a variety of platforms and contexts (eg, news and social media) in which online misinformation exists. Automation and machine learning offer the ability to exchange a small decrease in accuracy for scalability, which is an important consideration when misinformation growth exceeds fact-checking capabilities as continues to be the case during the COVID-19 pandemic.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Model performances on the first external validation data set.</p>
        <media xlink:href="infodemiology_v2i2e38756_app1.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Model performances on the second external validation data set.</p>
        <media xlink:href="infodemiology_v2i2e38756_app2.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Benchmarking results using classical models.</p>
        <media xlink:href="infodemiology_v2i2e38756_app3.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Results for the bidirectional long short-term memory (Bi-LSTM) model trained on CoAID and tested on crowdsourced labels.</p>
        <media xlink:href="infodemiology_v2i2e38756_app4.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Results for BERT-base tested on crowdsourced labels.</p>
        <media xlink:href="infodemiology_v2i2e38756_app5.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Results for RoBERTa-Fake-News tested on crowdsourced labels.</p>
        <media xlink:href="infodemiology_v2i2e38756_app6.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p>Results for Fake-News-BERT-Detect tested on crowdsourced labels.</p>
        <media xlink:href="infodemiology_v2i2e38756_app7.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app8">
        <label>Multimedia Appendix 8</label>
        <p>Results for XLNet tested on crowdsourced labels.</p>
        <media xlink:href="infodemiology_v2i2e38756_app8.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app9">
        <label>Multimedia Appendix 9</label>
        <p>Results for Text-CNN tested on crowdsourced labels.</p>
        <media xlink:href="infodemiology_v2i2e38756_app9.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app10">
        <label>Multimedia Appendix 10</label>
        <p>Model performances on the reduced set of content when human and machine-learned votes agree.</p>
        <media xlink:href="infodemiology_v2i2e38756_app10.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">Bi-LSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BNB</term>
          <def>
            <p>Bernoulli naïve Bayes</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">FNN</term>
          <def>
            <p>FakeNewsNet</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">GPT</term>
          <def>
            <p>Generative Pre-trained Transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">IRB</term>
          <def>
            <p>institutional review board</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">PLM</term>
          <def>
            <p>pretrained language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">RQ</term>
          <def>
            <p>research question</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors wish to thank Kami Vinton for her insightful comments and suggestions, as well as for her assistance proofreading the manuscript. This work was supported by Good Systems, a research Grand Challenge at the University of Texas at Austin, and an Undergraduate Research Fellowship at The University of Texas at Austin.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>NK and DM jointly architected the study, wrote the first version of the manuscript, and collaborated to obtain crowdsourcing funding. DM obtained further funding for the deep-learning aspects of the project. NK wrote all of the code for the first version of the manuscript, performed the experiments for the first version of the manuscript, collected crowdsourced data, and provided data for <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> and part of <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. YL wrote substantial sections of the manuscript revision, performed the experiments during the revision process, provided the second validation data set, performed the experiment regarding deep-learning models, and provided data for the other tables. All authors collaborated on the revised manuscript. NK and YL contributed equally to the study and should be viewed as joint first authors.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ahn</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A link2vec-based fake news detection model using web search results</article-title>
          <source>Expert Syst Appl</source>
          <year>2021</year>
          <month>12</month>
          <volume>184</volume>
          <fpage>115491</fpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2021.115491</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gallotti</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Valle</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Castaldo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sacco</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>De Domenico</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Assessing the risks of 'infodemics' in response to COVID-19 epidemics</article-title>
          <source>Nat Hum Behav</source>
          <year>2020</year>
          <month>12</month>
          <volume>4</volume>
          <issue>12</issue>
          <fpage>1285</fpage>
          <lpage>1293</lpage>
          <pub-id pub-id-type="doi">10.1038/s41562-020-00994-6</pub-id>
          <pub-id pub-id-type="medline">33122812</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-020-00994-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cinelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Quattrociocchi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Galeazzi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Valensise</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Brugnoli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Zola</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zollo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Scala</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The COVID-19 social media infodemic</article-title>
          <source>Sci Rep</source>
          <year>2020</year>
          <month>10</month>
          <day>06</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>16598</fpage>
          <pub-id pub-id-type="doi">10.1038/s41598-020-73510-5</pub-id>
          <pub-id pub-id-type="medline">33024152</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-020-73510-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7538912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Litman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rosen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenzweig</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberger-Litman</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Moss</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Did people really drink bleach to prevent COVID-19? A tale of problematic respondents and a guide for measuring rare events in survey data</article-title>
          <source>MedRxiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on January 2, 2021. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/2020.12.11.20246694v3">https://www.medrxiv.org/content/10.1101/2020.12.11.20246694v3</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <article-title>An ad hoc WHO technical consultation managing the COVID-19 infodemic: call for action, 7-8 April 2020</article-title>
          <source>World Health Organization, Institutional Repository for Information Sharing</source>
          <year>2020</year>
          <access-date>2020-12-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.who.int/iris/handle/10665/334287">https://apps.who.int/iris/handle/10665/334287</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khazan</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>How a bizarre claim about masks has lived on for months</article-title>
          <source>The Atlantic</source>
          <year>2020</year>
          <month>10</month>
          <day>09</day>
          <access-date>2022-04-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.theatlantic.com/politics/archive/2020/10/can-masks-make-you-sicker/616641/">https://www.theatlantic.com/politics/archive/2020/10/can-masks-make-you-sicker/616641/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bridgman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Merkley</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Loewen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Owen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ruths</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Teichmann</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhilin</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The causes and consequences of COVID-19 misperceptions: understanding the role of news and social media</article-title>
          <source>HKS Misinfo Review</source>
          <year>2020</year>
          <month>6</month>
          <day>18</day>
          <volume>19</volume>
          <fpage>1</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.37016/mr-2020-028"/>
          </comment>
          <pub-id pub-id-type="doi">10.37016/mr-2020-028</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Howard</surname>
              <given-names>PN</given-names>
            </name>
            <name name-style="western">
              <surname>Nielsen</surname>
              <given-names>RK</given-names>
            </name>
          </person-group>
          <article-title>Types, sources, and claims of COVID-19 misinformation</article-title>
          <source>Reuters Institute</source>
          <year>2020</year>
          <month>04</month>
          <day>07</day>
          <access-date>2022-08-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://reutersinstitute.politics.ox.ac.uk/types-sources-and-claims-covid-19-misinformation">https://reutersinstitute.politics.ox.ac.uk/types-sources-and-claims-covid-19-misinformation</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <article-title>Misinformation</article-title>
          <source>Merriam-Webster Dictionary</source>
          <access-date>2022-07-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.merriam-webster.com/dictionary/misinformation">https://www.merriam-webster.com/dictionary/misinformation</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>'Hundreds dead' because of Covid-19 misinformation</article-title>
          <source>BBC</source>
          <year>2020</year>
          <month>08</month>
          <day>12</day>
          <access-date>2022-07-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.bbc.co.uk/news/world-53755067">https://www.bbc.co.uk/news/world-53755067</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <article-title>Disinformation</article-title>
          <source>Merriam-Webster Dictionary</source>
          <access-date>2022-07-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.merriam-webster.com/dictionary/disinformation">https://www.merriam-webster.com/dictionary/disinformation</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>MacLellan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kerry</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Britain says Russian troll factory is spreading disinformation on social media</article-title>
          <source>Reuters</source>
          <access-date>2022-07-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.reuters.com/world/europe/britain-says-russian-troll-factory-is-spreading-disinformation-social-media-2022-04-30/">https://www.reuters.com/world/europe/britain-says-russian-troll-factory-is-spreading-disinformation-social-media-2022-04-30/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jamieson</surname>
              <given-names>KH</given-names>
            </name>
          </person-group>
          <source>Cyberwar: how Russian hackers and trolls helped elect a president: what we don't, can't, and do know</source>
          <year>2020</year>
          <publisher-loc>Oxford, UK</publisher-loc>
          <publisher-name>Oxford University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Banda</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Tekumalla</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Artemova</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tutubalina</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chowell</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A large-scale COVID-19 Twitter chatter dataset for open scientific Research—an international collaboration</article-title>
          <source>Epidemiologia</source>
          <year>2021</year>
          <month>08</month>
          <day>05</day>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>315</fpage>
          <lpage>324</lpage>
          <pub-id pub-id-type="doi">10.3390/epidemiologia2030024</pub-id>
          <pub-id pub-id-type="pii">2004.03688</pub-id>
          <pub-id pub-id-type="pmcid">PMC7280901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Tracking social media discourse about the COVID-19 pandemic: development of a public coronavirus Twitter data set</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>05</month>
          <day>29</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19273</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e19273/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19273</pub-id>
          <pub-id pub-id-type="medline">32427106</pub-id>
          <pub-id pub-id-type="pii">v6i2e19273</pub-id>
          <pub-id pub-id-type="pmcid">PMC7265654</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yada</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wakamiya</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>NAIST COVID: Multilingual COVID-19 Twitter and Weibo dataset</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on April 17, 2020. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2004.08145">https://arxiv.org/abs/2004.08145</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haouari</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hasanain</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Suwaileh</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ArCOV19-Rumors: Arabic COVID-19 Twitter dataset for misinformation detection</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on March 13, 2021. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2010.08768">https://arxiv.org/abs/2010.08768</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ziems</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Soni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ramakrishan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Racism is a virus: anti-Asian hate and counterspeech in social media during the COVID-19 crisis</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on November 10, 2021.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2005.12423?context=cs.CL">https://arxiv.org/abs/2005.12423?context=cs.CL</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kouzy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Abi Jaoude</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kraitem</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>El Alam</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Karam</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Adib</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zarka</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Traboulsi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Akl</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Baddour</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Coronavirus goes viral: quantifying the COVID-19 misinformation epidemic on Twitter</article-title>
          <source>Cureus</source>
          <year>2020</year>
          <month>03</month>
          <day>13</day>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>e7255</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32292669"/>
          </comment>
          <pub-id pub-id-type="doi">10.7759/cureus.7255</pub-id>
          <pub-id pub-id-type="medline">32292669</pub-id>
          <pub-id pub-id-type="pmcid">PMC7152572</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>FALSE: A claim that neem leaves can cure the novel coronavirus and relieve its symptoms has been shared thousands of times in multiple Facebook posts</article-title>
          <source>Poynter</source>
          <year>2020</year>
          <month>03</month>
          <day>22</day>
          <access-date>2022-07-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tinyurl.com/yc22cz8z">https://tinyurl.com/yc22cz8z</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bansal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bode</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Budak</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kawintiranon</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Padden</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vanarsdall</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vraga</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A first look at COVID-19 information and misinformation sharing on Twitter</article-title>
          <source>ArXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on March 31, 2020.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2003.13907">https://arxiv.org/abs/2003.13907</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hossain</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Logan</surname>
              <given-names>RI</given-names>
            </name>
            <name name-style="western">
              <surname>Ugarte</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>COVIDLies: detecting COVID-19 misinformation on social media</article-title>
          <year>2020</year>
          <conf-name>1st Workshop on NLP for COVID-19 (Part 2) at EMNLP 2020</conf-name>
          <conf-date>November 20, 2020</conf-date>
          <conf-loc>virtual</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.nlpcovid19-2.11</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Serrano</surname>
              <given-names>JCM</given-names>
            </name>
            <name name-style="western">
              <surname>Papakyriakopoulos</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Hegelich</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>NLP-based feature extraction for the detection of COVID-19 misinformation videos on YouTube</article-title>
          <year>2020</year>
          <conf-name>1st Workshop on NLP for COVID-19 at ACL 2020</conf-name>
          <conf-date>July 2020</conf-date>
          <conf-loc>virtual</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dharawat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lourentzou</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Morales</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zai</surname>
              <given-names>CX</given-names>
            </name>
          </person-group>
          <article-title>Drink bleach or do what now? Covid-HeRA: A dataset for risk-informed health decision making in the presence of COVID19 misinformation</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on October 17, 2020.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2010.08743">https://arxiv.org/abs/2010.08743</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Rakhami</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Amri</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Lies kill, facts save: detecting COVID-19 misinformation in Twitter</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>155961</fpage>
          <lpage>155970</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34192115"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/ACCESS.2020.3019600</pub-id>
          <pub-id pub-id-type="medline">34192115</pub-id>
          <pub-id pub-id-type="pmcid">PMC8043503</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Mulay</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zafarani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ReCOVery: a multimodal repository for COVID-19 news credibility research</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on June 9, 2020.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2006.05557">https://arxiv.org/abs/2006.05557</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Corona Virus (COVID-19) "infodemic" and emerging issues through a data lens: the case of China</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2020</year>
          <month>03</month>
          <day>30</day>
          <volume>17</volume>
          <issue>7</issue>
          <fpage>2309</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph17072309"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph17072309</pub-id>
          <pub-id pub-id-type="medline">32235433</pub-id>
          <pub-id pub-id-type="pii">ijerph17072309</pub-id>
          <pub-id pub-id-type="pmcid">PMC7177854</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ghorbani</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>An overview of online fake news: characterization, detection, and discussion</article-title>
          <source>Inf Process Manag</source>
          <year>2020</year>
          <month>03</month>
          <volume>57</volume>
          <issue>2</issue>
          <fpage>102025</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2019.03.004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cha</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cha</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lima</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ahn</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kulshrestha</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Varol</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of misinformation and factchecks on the COVID-19 pandemic in 35 countries: observational infodemiology study</article-title>
          <source>JMIR Hum Factors</source>
          <year>2021</year>
          <month>02</month>
          <day>13</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>e23279</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://humanfactors.jmir.org/2021/1/e23279/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23279</pub-id>
          <pub-id pub-id-type="medline">33395395</pub-id>
          <pub-id pub-id-type="pii">v8i1e23279</pub-id>
          <pub-id pub-id-type="pmcid">PMC7909456</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Asr</surname>
              <given-names>F T</given-names>
            </name>
            <name name-style="western">
              <surname>Taboada</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Big Data and quality data for fake news and misinformation detection</article-title>
          <source>Big Data &amp; Society</source>
          <year>2019</year>
          <month>1</month>
          <volume>6</volume>
          <issue>1</issue>
          <pub-id pub-id-type="doi">10.1177/2053951719843310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shahi</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Nandini</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>FakeCovid--a multilingual cross-domain fact check news dataset for COVID-19</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on June 19, 2020.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2006.11343">https://arxiv.org/abs/2006.11343</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patwa</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pykl</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guptha</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kumari</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Akhtar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ekbal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Chakraborty</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Shu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bernard</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Akhtar</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Fighting an infodemic: COVID-19 fake news dataset</article-title>
          <source>Combating Online Hostile Posts in Regional Languages during Emergency Situation. CONSTRAINT 2021. Communications in Computer and Information Science, vol 1402</source>
          <year>2021</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Addawood</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Coronavirus: Public Arabic Twitter Data Set</article-title>
          <source>OpenReview</source>
          <year>2020</year>
          <month>08</month>
          <day>12</day>
          <access-date>2022-08-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/forum?id=ZxjFAfD0pSy">https://openreview.net/forum?id=ZxjFAfD0pSy</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Melo</surname>
              <given-names>Tde</given-names>
            </name>
            <name name-style="western">
              <surname>Figueiredo</surname>
              <given-names>CMS</given-names>
            </name>
          </person-group>
          <article-title>A first public dataset from Brazilian twitter and news on COVID-19 in Portuguese</article-title>
          <source>Data in Brief</source>
          <year>2020</year>
          <volume>32</volume>
          <fpage>106179</fpage>
          <pub-id pub-id-type="doi">10.1016/j.dib.2020.106179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rovetta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bhagavathula</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>COVID-19-related web search behaviors and infodemic attitudes in Italy: infodemiological Study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>05</month>
          <day>05</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19374</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/2/e19374/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19374</pub-id>
          <pub-id pub-id-type="medline">32338613</pub-id>
          <pub-id pub-id-type="pii">v6i2e19374</pub-id>
          <pub-id pub-id-type="pmcid">PMC7202310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zafarani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>CHECKED: Chinese COVID-19 fake news dataset</article-title>
          <source>Soc Netw Anal Min</source>
          <year>2021</year>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>58</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34178179"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13278-021-00766-8</pub-id>
          <pub-id pub-id-type="medline">34178179</pub-id>
          <pub-id pub-id-type="pii">766</pub-id>
          <pub-id pub-id-type="pmcid">PMC8217979</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kar</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bhardwaj</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Samanta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Azad</surname>
              <given-names>AP</given-names>
            </name>
          </person-group>
          <article-title>No rumours please! A multi-indic-lingual approach for COVID fake-tweet detection</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on October 14, 2020.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2010.06906">https://arxiv.org/abs/2010.06906</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Minaee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kalchbrenner</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cambria</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nikzad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chenaghlu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Deep learning–based text classification</article-title>
          <source>ACM Comput Surv</source>
          <year>2022</year>
          <month>04</month>
          <day>30</day>
          <volume>54</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1145/3439726</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Bangla text classification using transformers</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on November 9, 2020.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2011.04446">https://arxiv.org/abs/2011.04446</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Biamonte</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wittek</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pancotti</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rebentrost</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wiebe</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lloyd</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Quantum machine learning</article-title>
          <source>Nature</source>
          <year>2017</year>
          <month>09</month>
          <day>13</day>
          <volume>549</volume>
          <issue>7671</issue>
          <fpage>195</fpage>
          <lpage>202</lpage>
          <pub-id pub-id-type="doi">10.1038/nature23474</pub-id>
          <pub-id pub-id-type="medline">28905917</pub-id>
          <pub-id pub-id-type="pii">nature23474</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Neumann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Iyyer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zettle-Moyer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Deep contextualized word representations</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on February 15, 2018.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1802.05365">https://arxiv.org/abs/1802.05365</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Narasimhan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salimans</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Improving language understanding by generative pre-training</article-title>
          <source>Amazon Simple Storage System (S3)</source>
          <year>2018</year>
          <access-date>2022-08-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf">https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on October 11, 2018.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1810.04805">https://arxiv.org/abs/1810.04805</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carbonell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
          </person-group>
          <article-title>XLNet: generalized autoregressive pretraining for language understanding</article-title>
          <year>2019</year>
          <conf-name>33rd Conference on Neural Information Processing Systems (NeurIPS 2019)</conf-name>
          <conf-date>December 8-14, 2019</conf-date>
          <conf-loc>Vancouver, BC</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: a robustly optimized bert pretraining approach</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint publihsed on Jul, 26 2019. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692">https://arxiv.org/abs/1907.11692</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>CoAID: COVID-19 healthcare misinformation dataset</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on May 22, 2020.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2006.00885">https://arxiv.org/abs/2006.00885</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>The Pandas Development Team</collab>
          </person-group>
          <source>Zenodo</source>
          <year>2020</year>
          <month>12</month>
          <day>07</day>
          <access-date>2022-08-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://zenodo.org/record/4309786#.YvfOIC9E3mp">https://zenodo.org/record/4309786#.YvfOIC9E3mp</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Eickenberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gervais</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mueller</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kossaifi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for neuroimaging with scikit-learn</article-title>
          <source>Front Neuroinform</source>
          <year>2014</year>
          <volume>8</volume>
          <fpage>14</fpage>
          <pub-id pub-id-type="doi">10.3389/fninf.2014.00014</pub-id>
          <pub-id pub-id-type="medline">24600388</pub-id>
          <pub-id pub-id-type="pmcid">PMC3930868</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Transformer-fake-news-detection</article-title>
          <source>GitHub</source>
          <year>2022</year>
          <access-date>2022-08-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/Jiayif/Transformer-Fake-News-Detection">https://github.com/Jiayif/Transformer-Fake-News-Detection</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tatti</surname>
              <given-names>GV</given-names>
            </name>
          </person-group>
          <article-title>roberta-fake-news</article-title>
          <source>Hugging Face</source>
          <year>2021</year>
          <access-date>2022-08-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/ghanashyamvtatti/roberta-fake-news">https://huggingface.co/ghanashyamvtatti/roberta-fake-news</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lysandre</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Victor</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Julien</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Clement</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Anthony</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pierric</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tim</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Remi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Joe</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sam</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Patrick</surname>
              <given-names>VP</given-names>
            </name>
            <name name-style="western">
              <surname>Clara</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yacine</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Julien</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Canwen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Teven</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Sylvain</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Transformers: State-of-the-Art Natural Language Processing</article-title>
          <year>2020</year>
          <conf-name>2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</conf-name>
          <conf-date>October 2020</conf-date>
          <conf-loc>online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-demos.6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>NeuronBlocks: building your NLP DNN models like playing Lego</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on April 21, 2019.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1904.09535">https://arxiv.org/abs/1904.09535</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="web">
          <article-title>COVID-19 misinformation detection: machine-learned solutions to the infodemic</article-title>
          <source>GitHub</source>
          <access-date>2022-08-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/yunongLiu1/COVID-19-Misinformation-Detection--Machine-Learned-Solutions-to-the-Infodemic">https://github.com/yunongLiu1/COVID-19-Misinformation-Detection--Machine-Learned-Solutions-to-the-Infodemic</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <source>Poynter</source>
          <year>2020</year>
          <access-date>2022-04-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.poynter.org/">https://www.poynter.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <source>News API</source>
          <access-date>2020-12-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://newsapi.org/">https://newsapi.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mahudeswaran</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>FakeNewsNet: A Data Repository with News Content, Social Context, and Spatiotemporal Information for Studying Fake News on Social Media</article-title>
          <source>Big Data</source>
          <year>2020</year>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>171</fpage>
          <lpage>188</lpage>
          <pub-id pub-id-type="doi">10.1089/big.2020.0062</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sliva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Fake news detection on social media: a data mining perspective</article-title>
          <source>arXiv</source>
          <access-date>2022-08-12</access-date>
          <comment>Preprint published on August 7, 2017.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1708.01967">https://arxiv.org/abs/1708.01967</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="web">
          <article-title>Setting up a study on Prolific</article-title>
          <source>Prolific</source>
          <access-date>2022-07-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://researcher-help.prolific.co/hc/en-gb/articles/4407449546002-Audience">https://researcher-help.prolific.co/hc/en-gb/articles/4407449546002-Audience</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <article-title>Inter-rater reliability IRR: definition, calculation</article-title>
          <source>Statistics How To</source>
          <access-date>2022-07-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.statisticshowto.com/inter-rater-reliability/">https://www.statisticshowto.com/inter-rater-reliability/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shabankhani</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Charati</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Shabankhani</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cherati</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Survey of agreement between raters for nominal data using Krippendorff's alpha</article-title>
          <source>Arch Pharma Pract</source>
          <year>2020</year>
          <volume>10</volume>
          <issue>S1</issue>
          <fpage>160</fpage>
          <lpage>164</lpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
