<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Infodemiology</journal-id>
      <journal-title>JMIR Infodemiology</journal-title>
      <issn pub-type="epub">2564-1891</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v2i2e38839</article-id>
      <article-id pub-id-type="pmid">36193330</article-id>
      <article-id pub-id-type="doi">10.2196/38839</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Data Exploration and Classification of News Article Reliability: Deep Learning Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mackey</surname>
            <given-names>Tim</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ceron</surname>
            <given-names>Wilson</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Tsao</surname>
            <given-names>Shu-Feng</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zhan</surname>
            <given-names>Kevin</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4802-2535</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Yutong</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Psychiatry</institution>
            <institution>University of Alberta</institution>
            <addr-line>4-142 KATZ Group Centre for Pharmacy and Health Research</addr-line>
            <addr-line>87 Avenue and 114 Street</addr-line>
            <addr-line>Edmonton, AB, T6G 2E1</addr-line>
            <country>Canada</country>
            <phone>1 403 926 6628</phone>
            <email>yutong5@ualberta.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9370-3455</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Osmani</surname>
            <given-names>Rafay</given-names>
          </name>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4177-0453</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Xiaoyu</given-names>
          </name>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1203-7711</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Cao</surname>
            <given-names>Bo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9338-3271</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Psychiatry</institution>
        <institution>University of Alberta</institution>
        <addr-line>Edmonton, AB</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Cell Biology</institution>
        <institution>University of Alberta</institution>
        <addr-line>Edmonton, AB</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Computing Science</institution>
        <institution>University of Alberta</institution>
        <addr-line>Edmonton, AB</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Yutong Li <email>yutong5@ualberta.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Jul-Dec</season>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>9</month>
        <year>2022</year>
      </pub-date>
      <volume>2</volume>
      <issue>2</issue>
      <elocation-id>e38839</elocation-id>
      <history>
        <date date-type="received">
          <day>19</day>
          <month>4</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>9</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>25</day>
          <month>8</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>10</day>
          <month>9</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Kevin Zhan, Yutong Li, Rafay Osmani, Xiaoyu Wang, Bo Cao. Originally published in JMIR Infodemiology (https://infodemiology.jmir.org), 22.09.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Infodemiology, is properly cited. The complete bibliographic information, a link to the original publication on https://infodemiology.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://infodemiology.jmir.org/2022/2/e38839" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>During the ongoing COVID-19 pandemic, we are being exposed to large amounts of information each day. This “infodemic” is defined by the World Health Organization as the mass spread of misleading or false information during a pandemic. This spread of misinformation during the infodemic ultimately leads to misunderstandings of public health orders or direct opposition against public policies. Although there have been efforts to combat misinformation spread, current manual fact-checking methods are insufficient to combat the infodemic.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We propose the use of natural language processing (NLP) and machine learning (ML) techniques to build a model that can be used to identify unreliable news articles online.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>First, we preprocessed the ReCOVery data set to obtain 2029 English news articles tagged with COVID-19 keywords from January to May 2020, which are labeled as reliable or unreliable. Data exploration was conducted to determine major differences between reliable and unreliable articles. We built an ensemble deep learning model using the body text, as well as features, such as sentiment, Empath-derived lexical categories, and readability, to classify the reliability.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found that reliable news articles have a higher proportion of neutral sentiment, while unreliable articles have a higher proportion of negative sentiment. Additionally, our analysis demonstrated that reliable articles are easier to read than unreliable articles, in addition to having different lexical categories and keywords. Our new model was evaluated to achieve the following performance metrics: 0.906 area under the curve (AUC), 0.835 specificity, and 0.945 sensitivity. These values are above the baseline performance of the original ReCOVery model.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This paper identified novel differences between reliable and unreliable news articles; moreover, the model was trained using state-of-the-art deep learning techniques. We aim to be able to use our findings to help researchers and the public audience more easily identify false information and unreliable media in their everyday lives.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>deep learning</kwd>
        <kwd>news article reliability</kwd>
        <kwd>false information</kwd>
        <kwd>infodemic</kwd>
        <kwd>ensemble model</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The onset of the COVID-19 pandemic has given the world more to battle. The world has faced a barrage of false information during the “infodemic,” which is defined as the spread of a large amount of information that includes misleading or false information during a pandemic [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Due to quarantine and increased restrictions, information is trafficked to the public via social media and news sources; consequently, false information propagates at a larger scale and faster rate. Despite available public health guidelines, there is still a large presence of false and misleading information online, comprising around 20% of articles on major social media sites, such as Twitter [<xref ref-type="bibr" rid="ref3">3</xref>]. Although the proportion of shared false information is less than evidence-informed guidelines, false information spreads at a faster rate because it contains inflammatory information [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Furthermore, infodemic management is an important aspect in maintaining public trust in scientific guidance [<xref ref-type="bibr" rid="ref1">1</xref>]. Hence, we need to construct methods to deter the spread of false information online and identify potential sources of false news.</p>
      <p>The abundance of fake or false news online can be instances of misinformation or disinformation and often lacks the reliability and credibility in content [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. Disinformation is defined as the intentional spread of false information, while misinformation is the negligent sharing of false information [<xref ref-type="bibr" rid="ref6">6</xref>]. Hereafter, we will not differentiate between disinformation and misinformation, as we will refer to them together as false information. False news can be categorized into 6 groups: propaganda, advertisement, manipulation, satire, parody, and fabrication [<xref ref-type="bibr" rid="ref6">6</xref>]. Although news organizations and social media companies have implemented measures to flag and delete false news, the rate of manual false news detection is not fast enough to compete with its rapid spread through social media [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Approximately 62% of US adults obtain news from social media sites; thus, faster fact checking is critical to ensure false information spread is reduced [<xref ref-type="bibr" rid="ref11">11</xref>]. As such, the spread of false news has resulted in public confusion, potentially associated with the antimask and vaccine rhetoric [<xref ref-type="bibr" rid="ref10">10</xref>]. Presently, one of the most common methods to detect false news online is through human-curated fact-checking websites, such as Snopes, to flag false information [<xref ref-type="bibr" rid="ref12">12</xref>]. Although this method may be accurate, it is inefficient due to the large amount of false news generated during the COVID-19 pandemic [<xref ref-type="bibr" rid="ref10">10</xref>]. Thus, automatic news article reliability detection is needed.</p>
      <p>Current false news detection using machine learning (ML) on social media has been researched extensively. Various textual features from news pages are used to predict reliability of the articles. The use of multiple features to predict the presence of false information is a common theme within current false information detection studies. The use of multiple features can improve the performance of an ML model. For example, Reis et al [<xref ref-type="bibr" rid="ref13">13</xref>] used textual features (eg, semantic and lexical features) and news source features (eg, credibility of the news organization) as inputs for the ML model. Using traditional classifiers, such as random forest and extreme gradient boosting (XGBoost), a performance of 0.85 and 0.86 area under the curve (AUC) was achieved, respectively [<xref ref-type="bibr" rid="ref13">13</xref>]. Elhadad et al [<xref ref-type="bibr" rid="ref14">14</xref>] used a voting ensemble method, in addition to feature engineering, for sentiment and part-of-speech tagging. Singhania et al [<xref ref-type="bibr" rid="ref15">15</xref>] created a 3-level HAN model using input from words, sentences, and the headline level of a news article. Similar studies have proposed that other lexical features, such as n-grams, term frequency–inverse document frequency (TF-IDF), and probabilistic context-free grammar (PCFG) have also been used as features for misinformation prediction using deep learning [<xref ref-type="bibr" rid="ref16">16</xref>]. Accordingly, feature engineering provides higher performance metrics as well as improved interpretability. These features allow the model to focus on the important elements, which allows for reliability prediction, especially in news articles, despite high heterogeneity and noise between samples. To build on what other false information research has found, as well as to identify important new factors that contribute to false information detection, we created a final ensemble model using the ReCOVery data set [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      <p>Ensemble methods were implemented to further improve the performance of misinformation detection within news articles. Ensemble model usage can benefit model performance by improving the ability to generalize to data on which the model has not been trained [<xref ref-type="bibr" rid="ref18">18</xref>]. Kumar et al [<xref ref-type="bibr" rid="ref19">19</xref>] demonstrated improvement in performance after the use of an ensemble model, where the use of an ensemble deep learning model with a convolutional neural network (CNN) and bidirectional long short-term memory (BiLSTM) was able to achieve higher performance than a CNN or long short-term memory (LSTM) model alone, with a performance of 88.78% accuracy versus 73.29% and 80.62% for the CNN and LSTM, respectively. Due to the size of news articles, a bidirectional gated recurrent unit (BiGRU) was selected as the first model in the ensemble [<xref ref-type="bibr" rid="ref20">20</xref>]. This model is a type of recurrent neural network (RNN) that functions well on sequential text data. A BiGRU solves the vanishing gradient problem, where the model trains on long news articles and “forgets” information from the start of the articles. This model is made of many neurons or cells, each with an update gate to control what new information is added at each word and a reset gate to control how much old information is retained. A BiGRU’s bidirectional nature allows it to process each sample from the beginning and end of the article. Compared to other state-of-the-art natural language processing (NLP) models, such as LSTM, a gated recurrent unit (GRU) has lower parameters, making it quicker to train and use [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. A quicker model is important as a large number of news articles are released each day; thus, a model for false information detection needs to be both accurate and fast in order to keep up with the number of new articles. XGBoost is another model included within our ensemble model. One strength of XGboost is its exceptional ability at learning from tabular data [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. As a gradient boosted tree model, it is faster than a neural network and works better on the low-dimensionality output from the first model following feature extraction. Furthermore, XGBoost has been shown to outperform deep learning models for tabular data as the hyperparameter search is shorter [<xref ref-type="bibr" rid="ref24">24</xref>]. Additionally, XGBoost combined with deep learning models in an ensemble model yields better results than an ensemble model with multiple deep learning models or classical ML models [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
      <p>This study aims to provide a potential solution to the multifaceted false information problem through an ensemble deep learning model to classify the reliability of news articles using the ReCOVery data set. We hypothesize that sentiment, readability, lexical categories, and other text characteristics in news articles can be used together as inputs for news reliability classification improvement. We also explore differences in the sentiment or tone of reliable and unreliable information, which can be used to classify the reliability of the text. The outcome of our study may advance news reliability classification and help researchers and the public identify unreliable news articles in their everyday lives.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Workflow</title>
        <p>First, data preprocessing was completed using the ReCOVery data set, which included removing stop words, links and Universal Resource Locators (URLs), and duplicate articles (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Conversion of abbreviations and numbers to words was also completed within the preprocessing step. Following the preprocessing of the data, we performed feature engineering to create readability and sentiment scores, as well as extract lexical categories from the text (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The preprocessed data were split into training, validation, and testing sets. Word tokenization and embedding were performed on the training and validation sets. Once tokenization and embedding were completed, 9 different ML models were trained and evaluated on the validation set to determine the best-performing model. We refer to naive Bayes (NB), K-nearest neighbors (KNNs), and logistic regression (LR) as traditional ML models as they are not deep learning models. The best-performing model was the ensemble model containing a bidirectional GRU and XGBoost ensemble “new model,” as highlighted in blue in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Details of workflow for data exploration and “new model” construction (highlighted in blue). CNN: convolutional neural network; BiGRU: bidirectional gated recurrent unit; BiLSTM: bidirectional long short-term memory; GRU: gated recurrent unit; KNN: K-nearest neighbor; LR: logistic regression; LSTM: long short-term memory; NB: naive Bayes; XGBoost: extreme gradient boosting.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38839_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Description</title>
        <p>The ReCOVery data set was our main source of data for news articles connected to Twitter posts [<xref ref-type="bibr" rid="ref17">17</xref>]. It focuses on the reliability of news articles from a wide array of news sources and contains 2029 articles from ~2000 different news outlets from different countries (filtered from January to May 2020) that are related to COVID-19 news [<xref ref-type="bibr" rid="ref17">17</xref>]. Each article was labeled as either 0 for unreliable or 1 as reliable according to the NewsGuard score [<xref ref-type="bibr" rid="ref17">17</xref>]. The NewsGuard score was developed by journalists to label the reliability of an online article. Using a scale of 0-100, the NewGuard gives points to articles that accomplish credible and transparent news practices. Online articles with a score above 60 are labeled with a “green” rating as reliable sources, and scores below 60 are labeled with a “red” rating as unreliable sources [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. In addition to the NewsGuard score, ReCOVery uses Media Bias/Fact Check, which checks the correctness of news sources according to the article subjectivity and ranks articles from “very high” to “very low” in terms of factual reporting [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Reliable articles have a NewsGuard score higher than 90, with a “very high” or “high” rating on Media Bias/Fact Check. Unreliable articles have a NewsGuard score lower than 30, with a “mixed,” “low,” or “very low” factual rating on Media Bias/Fact Check [<xref ref-type="bibr" rid="ref17">17</xref>]. The ReCOVery data set combined the NewsGuard and Media Bias/Fact Check scores to create the final news article reliability score.</p>
      </sec>
      <sec>
        <title>Preprocessing</title>
        <p>Prior to data analysis, the article text and tweet data were subjected to multiple preprocessing steps. The purpose of preprocessing was to clean the data so that the deep learning model could more efficiently detect patterns in the data. The steps taken to preprocess the article text included the removal of duplicates articles or tweets; common stop words, such as “the” and “a”; and all links and non-English characters. Lemmatization of the article text was also completed, in addition to the conversion of acronyms to full terms.</p>
        <p>Preprocessing was conducted using Python libraries, such as Pandas and Natural Language Toolkit [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. A total of 1346 reliable articles and 648 unreliable articles were used for model training. Additionally, 34 articles were removed as they had less than 100 words, which limited the validity of reliability analysis. Following preprocessing, features from the news articles such as text characteristics, readability, and sentiment were extracted for analysis and to be included as input to the deep learning model.</p>
      </sec>
      <sec>
        <title>Sentiment Analysis</title>
        <p>Sentiment analysis was applied to the body text of reliable and unreliable articles. This was implemented through Valence Aware Dictionary and Sentiment Reasoner (VADER) and TextBlob, which are open source tools for determining predominant sentiment, polarity, and subjectivity [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. The analysis relies on lexicographic analysis to map the text features of each article to different scores with regard to sentiment, polarity, and intensity. In terms of sentiment, the articles have a continuous score between 0 and 1, including both endpoints, with 1 representing that the article contains the specified sentiment as the predominant sentiment. For example, if an article has a positive sentiment of 1, this means the article contains the highest-possible positive sentiment. VADER and TextBlob were imported into Python and applied to the body text of articles within the data set. The total proportion of articles with a positive, negative, and neutral sentiment were determined through library functions within VADER and TextBlob.</p>
      </sec>
      <sec>
        <title>Text Analysis</title>
        <p>After preprocessing, the body text of articles was analyzed. The most common words from reliable and unreliable articles were determined. They are presented in a frequency bar graph to demonstrate the major differences between unreliable and reliable articles (<xref rid="figure2" ref-type="fig">Figures 2</xref> and <xref rid="figure3" ref-type="fig">3</xref>, respectively). Another feature included within the deep learning model was the text length and readability of the newspaper articles. The length of the articles was assessed using the character length of the article sentences and overall article length. Readability was assessed using 6 different readability metrics from the py-readability-metrics library: the Flesch-Kincaid grade level, Gunning fog index, Coleman-Liau index, Dale-Chall index, automated readability index (ARI), and Linsear Write index [<xref ref-type="bibr" rid="ref31">31</xref>]. The aforementioned readability metrics are used to determine the grade level necessary to understand a written document based on the sentence length and word length [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
        <p>The Flesch-Kincaid grade level is a scale modified from the Flesch-Kincaid reading ease index that compares the ratio of words per sentence and the ratio of syllables per word [<xref ref-type="bibr" rid="ref33">33</xref>]. The values for this scale linearly indicate the estimated US grade level of a text. For example, a grade of 10-12 would indicate that the target reader is at the high school level, whereas scores higher than 12 are graduate-level texts [<xref ref-type="bibr" rid="ref33">33</xref>]. Similarly, the Coleman-Liau index and the ARI both assess character and word frequency to approximate the US grade level required to read a text [<xref ref-type="bibr" rid="ref34">34</xref>]. The Gunning fog index assesses the frequency of difficult words in a text and is a linear range between 0 and 20: a score of 16-20 is at the graduate level [<xref ref-type="bibr" rid="ref35">35</xref>]. Similarly, the Dale-Chall index evaluates the frequency of difficult words but is scaled so that a score of 9-10 represents a university graduate–level text [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]. Lastly, the Linsear Write index was developed to assess the readability of technical texts, and its score represents the years of formal US education needed to understand a text, similar to the previous indices [<xref ref-type="bibr" rid="ref39">39</xref>].</p>
        <p>Topic analysis was performed using Empath, a neural network–based lexicon [<xref ref-type="bibr" rid="ref40">40</xref>]. Empath is able to determine whether a certain sentence has the lexical categories of politics, religion, contentment, and approximately 200 more categories [<xref ref-type="bibr" rid="ref40">40</xref>]. By processing the text with Empath, we derived 194 lexical categories that were used as additional features that were concatenated with the previous text, sentiment, and readability features in the final deep learning model. The extracted lexical categories from Empath increased the amount of information the deep learning model trained on for each article and allowed for better interpretability as differences in topic frequencies could also be evaluated. For each of the lexical categories, a mean count for reliable and unreliable articles was derived, along with the <italic>t</italic> test and the <italic>P</italic> value (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Number of occurrences for keywords in unreliable news articles (N=298,498 words).</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38839_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Number of occurrences of keywords in reliable news articles (N=662,290 words).</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38839_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Top 10 lexical categories from Empath (a neural network–based topic analysis tool) in reliable and unreliable news articles selected by Empath. The reliable and unreliable means is the mean counts of each lexical category being classified into reliable and unreliable news articles, respectively.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="170"/>
            <col width="130"/>
            <col width="250"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td>Lexical category</td>
                <td><italic>t</italic> (<italic>df</italic>)</td>
                <td><italic>P</italic> value</td>
                <td>Reliable mean (SD)</td>
                <td>Unreliable mean (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>magic</td>
                <td>–7.91 (1992)</td>
                <td>&lt;.001</td>
                <td>0.19 (0.60)</td>
                <td>0.51 (1.22)</td>
              </tr>
              <tr valign="top">
                <td>power</td>
                <td>–7.16 (1992)</td>
                <td>&lt;.001</td>
                <td>1.28 (2.20)</td>
                <td>2.16 (3.24)</td>
              </tr>
              <tr valign="top">
                <td>business</td>
                <td>7.15 (1992)</td>
                <td>&lt;.001</td>
                <td>8.58 (10.54)</td>
                <td>5.31 (7.10)</td>
              </tr>
              <tr valign="top">
                <td>work</td>
                <td>6.89 (1992)</td>
                <td>&lt;.001</td>
                <td>5.78 (8.82)</td>
                <td>3.28 (3.89)</td>
              </tr>
              <tr valign="top">
                <td>contentment</td>
                <td>6.18 (1992)</td>
                <td>&lt;.001</td>
                <td>0.70 (1.61)</td>
                <td>0.29 (0.72)</td>
              </tr>
              <tr valign="top">
                <td>office</td>
                <td>6.14 (1992)</td>
                <td>&lt;.001</td>
                <td>3.02 (4.37)</td>
                <td>1.88 (2.60)</td>
              </tr>
              <tr valign="top">
                <td>dispute</td>
                <td>–6.11 (1992)</td>
                <td>&lt;.001</td>
                <td>1.58 (2.48)</td>
                <td>2.35 (2.94)</td>
              </tr>
              <tr valign="top">
                <td>morning</td>
                <td>5.87 (1992)</td>
                <td>&lt;.001</td>
                <td>1.06 (1.87)</td>
                <td>0.59 (1.11)</td>
              </tr>
              <tr valign="top">
                <td>legend</td>
                <td>–5.85 (1992)</td>
                <td>&lt;.001</td>
                <td>0.34 (0.92)</td>
                <td>0.64 (1.31)</td>
              </tr>
              <tr valign="top">
                <td>blue collar job</td>
                <td>5.83 (1992)</td>
                <td>&lt;.001</td>
                <td>0.62 (1.75)</td>
                <td>0.21 (0.68)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Tokenization</title>
        <p>As ML models only accept numerical inputs, the text data must be tokenized. This process involves a word-index dictionary, where each word in the data set is converted to a numerical value or index, which corresponds to that word in the dictionary. For example, a word such as “coronavirus” might be presented to a ML model as the value 1234. As each unique word creates a unique index number, the “vocabulary” or total number of unique words in the data set can be a problem, especially if the data set is large, since words that appear once or twice in the data set generally do not contribute to the efficacy of the model. We limited the vocabulary size to 20,000 (51.73%) out of a total of 38,663 unique words from the training data. This excluded words that were used only once in the data set, because these words would not be useful to the model—Zipf’s law reaffirms that having larger vocabulary sizes gives diminishing returns as we frequently use a small proportion of their total vocabulary [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Furthermore, there are various estimates regarding the vocabulary size of an average native English speaker, with around 20,000 being a reasonable estimate for the vocabulary size [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. Articles were also 0-padded to a size of 3500 words, which was the size of the longest article to ensure uniformity of the model input.</p>
      </sec>
      <sec>
        <title>Word Embedding</title>
        <p>Following tokenization, the data were processed using word embedding, a form of unsupervised ML. Word embedding places the data points of individual words into an embedding space with high dimensionality. Inside this embedding space, each word is represented as a vector with words that are similar to each other being located in close proximity. As such, word embedding allows hidden relationships between similar words to be quantified for ML analysis. Although a new word embedding layer could be trained and fitted on our data set, there exist pretrained word embedding models that are more efficient to use. For the article text data, we leveraged Global Vectors for Word Representation (GloVE), which is a commonly used word embedding model trained on hundreds of thousands of Wikipedia articles, which have an embedding space of 100 dimensions [<xref ref-type="bibr" rid="ref45">45</xref>].</p>
      </sec>
      <sec>
        <title>Machine Learning Classification</title>
        <p>The data were randomly split into training, testing, and validation subsets for deep learning. The ratio of these subsets was 8:1:1, respectively. Of the 1994 articles, 1595 (79.99%) were in the training subset, 199 (9.98%) were in the validation subset, and 200 (10.03%) were in the testing subset. The training and validation data were used to build the model to classify between reliable and unreliable articles, while the testing data were used to evaluate the model’s performance. The splitting of the data followed by model training and evaluation were repeated 10-folds so that each article could be included in the training set. An average was taken between the performance metrics obtained from training on each fold. We evaluated the performance of multiple ML models on the data set (NB, KNNs, LR, LSTM, GRU, BiLSTM, BiGRU, and CNN) to determine the best models for reliability detection. The settings or hyperparameters were optimized either experimentally or using Gridsearch, which tests all combinations of hyperparameters for each of the aforementioned ML models.</p>
        <p>Finally, we developed an ensemble model using a lightly trained BiGRU to generate an initial reliability prediction, which was then combined with the text features, readability, sentiment, and Empath-classified lexical categories. This was then used to train an XGBoost model with 10-fold cross-validation.</p>
        <p>This paper uses several evaluation metrics that rely on the results from the confusion matrix. These metrics were derived from correct predictions by the model, such as true positive (TP) and true negative (TN), as well as incorrect predictions, such as false positive (FP) and false negative (FN). Accuracy is the total proportion of correct predictions, but this evaluation metric is not as effective when there is a class imbalance. Sensitivity refers to the proportion of correctly predicted unreliable articles, while specificity refers to the proportion of correctly predicted reliable articles. The AUC score shows the performance of the model at different TP and FP rates [<xref ref-type="bibr" rid="ref46">46</xref>].</p>
        <disp-formula>
       Sensitivity (recall) = TP/(TP + FN)
        </disp-formula>
        <disp-formula>
        Specificity = TN/(TN + FP)
        </disp-formula>
        <disp-formula>
        Accuracy = (TP + TN)/(TP + TN + FP + FN)
        </disp-formula>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The data used in this paper did not need ethics approval as they were accessed through the open access ReCOVery data set GitHub, as cited in Zhou et al [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Exploration</title>
        <p>Data exploration was performed and features, such as readability, sentiment, and lexical categories, were combined with the full news article text data to train an ensemble model. An ensemble method using BiGRU and XGBoost was created using 1346 reliable articles and 648 unreliable articles.</p>
        <p>During data exploration, we found that the average text length in terms of the average word length and sentence length was longer in unreliable articles compared to reliable articles (<xref ref-type="table" rid="table2">Table 2</xref>). The Flesch-Kincaid grade level, the Dale-Chall index, the ARI, the Coleman-Liau index, the Gunning fog index, and the Linsear Write index indicated that reliable articles are easier to read compared to unreliable articles (<xref ref-type="table" rid="table2">Table 2</xref>). From the average frequency of 194 Empath-derived lexical categories, 110 (56.7%) were significantly different between reliable and unreliable articles (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Most frequent words in unreliable and reliable articles were also visualized (<xref rid="figure2" ref-type="fig">Figures 2</xref> and <xref rid="figure3" ref-type="fig">3</xref>, respectively). Unreliable articles had higher rates of negative sentiment, while reliable articles had higher rates of neutral sentiment (<xref ref-type="table" rid="table3">Table 3</xref>). Performance metrics of various trained ML models as well as the new ensemble model were determined (<xref ref-type="table" rid="table3">Table 3</xref>).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>
          Text length and readability metrics for reliable (N=1346) and unreliable (N=648) online news articles. The text length was expressed as the average sentence length and word length. Readability was expressed using the Flesch-Kincaid grade level, the Dale-Chall readability index, the ARI<sup>a</sup>, the Coleman-Liau index, the Gunning fog index, and the Linsear Write index.
        </p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="220"/>
            <col width="240"/>
            <col width="180"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>Metrics</td>
                <td>Reliable mean (SD)</td>
                <td>Unreliable mean (SD)</td>
                <td><italic>t</italic> (<italic>df</italic>)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Average word length (characters)</td>
                <td>6.14 (0.27)</td>
                <td>6.32 (1.66)</td>
                <td>–3.93 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Average sentence length (words)</td>
                <td>23.67 (5.17)</td>
                <td>26.38 (7.06)</td>
                <td>–9.70 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Flesch-Kincaid grade level</td>
                <td>12.68 (2.63)</td>
                <td>14.39 (3.37)</td>
                <td>–12.38 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Gunning fog index</td>
                <td>14.87 (2.72)</td>
                <td>16.42 (3.33)</td>
                <td>–11.00 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Coleman-Liau index</td>
                <td>10.85 (1.87)</td>
                <td>11.82 (2.46)</td>
                <td>–9.72 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Dale-Chall index</td>
                <td>10.21 (0.96)</td>
                <td>10.70 (1.02)</td>
                <td>–10.53 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>ARI</td>
                <td>13.41 (3.30)</td>
                <td>15.43 (4.47)</td>
                <td>–11.41 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Linsear Write index</td>
                <td>16.42 (4.02)</td>
                <td>18.73 (5.31)</td>
                <td>–10.80 (1992)</td>
                <td>&lt;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>ARI: automated readability index.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of sentiment polarity (0=least expression of sentiment in interest, 1=most expression of sentiment in interest) between reliable (N=1346) and unreliable (N=648) news articles in terms of sentiment of the sentences within news articles. Differences between the frequencies of sentences possessing positive, neutral, or negative sentiment were analyzed with a 2-sample independent <italic>t</italic> test.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="260"/>
            <col width="290"/>
            <col width="190"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>Sentiment</td>
                <td>Reliable mean (SD)</td>
                <td>Unreliable mean (SD)</td>
                <td><italic>t</italic> (<italic>df</italic>)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Negative</td>
                <td>0.066 (0.042)</td>
                <td>0.076 (0.039)</td>
                <td>–5.46 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Neutral</td>
                <td>0.850 (0.054)</td>
                <td>0.840 (0.050)</td>
                <td>4.37 (1992)</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td>Positive</td>
                <td>0.084 (0.035)</td>
                <td>0.085 (0.035)</td>
                <td>–0.095 (1992)</td>
                <td>.92</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Text Analysis</title>
        <p>After removal of stop words, the most frequent words in reliable and unreliable articles were examined. The highest word frequencies for unreliable and reliable articles are illustrated in frequency bar graphs (<xref rid="figure2" ref-type="fig">Figures 2</xref> and <xref rid="figure3" ref-type="fig">3</xref>). Common words between reliable and unreliable news articles were COVID-19–related keywords, such as “coronavirus,” “virus,” and “pandemic.” The differences were related to political undertones, such as “Trump” and “government.” Additionally, the Empath lexicon tool was applied to the text to yield lexical categories. The average count for each lexical category was determined for reliable and unreliable text. The differences in means were then compared using <italic>t</italic> tests. There were a total of 194 lexical categories that significantly differed in frequency between reliable and unreliable texts (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and <xref ref-type="table" rid="table1">Table 1</xref>). In <xref ref-type="table" rid="table1">Table 1</xref>, we display the top 10 lexical categories with the lowest <italic>P</italic> value. Categories included “magic,” “power,” “business,” “work,” “contentment,” “office,” “dispute,” “morning,” “legend,” and “blue collar job.” The lexical categories “business,” “work,” “contentment,” “office,” “morning,” and “blue collar job” had higher mean counts for the reliable articles compared to the unreliable articles. The lexical categories “magic,” “power,” “legend,” and “dispute” had lower mean counts for the reliable articles compared to the unreliable articles. In terms of text characteristics, there was a significant difference in the average sentence length between reliable and unreliable news articles, with reliable articles containing shorter sentences at 23.67 (SD 5.17) words per sentence compared to unreliable articles containing 26.38 (SD 7.06) words per sentence (<xref ref-type="table" rid="table2">Table 2</xref>). Additionally, the average word lengths were 6.14 (SD 0.27) and 6.32 (SD 1.66) for reliable and unreliable articles, respectively. In addition to text length, we also analyzed the differences in readability between reliable and unreliable articles. The readability indices used were the Flesch-Kincaid grade level, the Dale-Chall index, the ARI, the Coleman-Liau index, the Gunning fog index, and the Linsear Write index. As shown in <xref ref-type="table" rid="table2">Table 2</xref>, unreliable articles were less readable, as indicated by all 6 readability indices. Since these text features are important in differentiating between reliable and unreliable news articles, they were input into our final deep learning model.</p>
      </sec>
      <sec>
        <title>Sentiment Analysis</title>
        <p>Using VADER, the sentences from the articles were classified into positive, neutral, and negative sentiments. The sentiment score ranged from 0 to 1, with 1 denoting strong presentation of the sentiment of interest. For reliable articles, the means for the negative, neutral, and positive sentiments scores were 0.066 (SD 0.042), 0.850 (SD 0.054), and 0.084 (SD 0.035), respectively (<xref ref-type="table" rid="table3">Table 3</xref>). For unreliable articles, the means for the negative, neutral, and positive sentiment scores were 0.076 (SD 0.039), 0.840 (SD 0.050), and 0.084 (SD 0.035), respectively.</p>
      </sec>
      <sec>
        <title>Machine Learning Analysis</title>
        <p>After the newspaper article data were passed through GloVE word embedding, the text data were split 10-folds for cross-validation. The traditional ML models included LR, KNNs, and NB. The AUC values (<xref rid="figure4" ref-type="fig">Figure 4</xref>) were generated, in addition to sensitivity and recall values (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
        <p>Next, the deep learning models were fit to the data. Each model included the GloVE word embedding prior to training. Optimization of hyperparameters for the deep learning models was completed using GridSearchCV from the ML Python scikit-learn library. The hyperparameters optimized were batch size, epochs, dropout rate, neuron number, optimizer type, learning rate, and activation function type. Each model had varying hyperparameters that yielded the best results.</p>
        <p>The deep learning models that were assessed were LSTM, GRU, BiLSTM, BiGRU, and CNN. Similar to traditional ML models, the AUC, specificity, and recall were determined as performance metrics (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
        <p>Lastly, an ensemble model was developed using the BiGRU and XGBoost. Our new model was first evaluated on the ReCOVery testing subset. A confusion matrix for our new model was generated, as shown in <xref rid="figure5" ref-type="fig">Figure 5</xref>. The AUC, specificity, and sensitivity for our new deep learning model were 0.906, 0.835, and 0.945, respectively (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Receiver operating characteristic (ROC) curve and AUC scores with the corresponding color for both traditional ML models (KNN, LR,NB) and deep learning models (BiLSTM, CNN, LSTM, BiGRU, GRU, new model). AUC: area under the curve; BiGRU: bidirectional gated recurrent unit; BiLSTM: bidirectional long short-term memory; CNN: convolutional neural network; FP: false positive; GRU: gated recurrent unit; KNN: K-nearest neighbor; LR: logistic regression; LSTM: long short-term memory; ML: machine learning; NB: naive Bayes; TP: true positive.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38839_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Performance metrics for the ReCOVery validation data set for traditional ML<sup>a</sup> models (KNN<sup>b</sup>, LR<sup>c</sup>, NB<sup>d</sup>), and deep learning models (BiLSTM<sup>e</sup>, CNN<sup>f</sup>, LSTM<sup>g</sup>, BiGRU<sup>h</sup>, GRU<sup>i</sup>, new model).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="300"/>
            <col width="290"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Specificity</td>
                <td>Sensitivity</td>
                <td>AUC<sup>j</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>LR</td>
                <td>0.720</td>
                <td>0.575</td>
                <td>0.563</td>
              </tr>
              <tr valign="top">
                <td>KNN</td>
                <td>0.660</td>
                <td>0.739</td>
                <td>0.530</td>
              </tr>
              <tr valign="top">
                <td>NB</td>
                <td>0.700</td>
                <td>0.627</td>
                <td>0.553</td>
              </tr>
              <tr valign="top">
                <td>BiLSTM</td>
                <td>0.810</td>
                <td>0.925</td>
                <td>0.892</td>
              </tr>
              <tr valign="top">
                <td>CNN</td>
                <td>0.792</td>
                <td>0.851</td>
                <td>0.789</td>
              </tr>
              <tr valign="top">
                <td>LSTM</td>
                <td>0.829</td>
                <td>0.903</td>
                <td>0.883</td>
              </tr>
              <tr valign="top">
                <td>BiGRU</td>
                <td>0.791</td>
                <td>0.963</td>
                <td>0.868</td>
              </tr>
              <tr valign="top">
                <td>GRU</td>
                <td>0.804</td>
                <td>0.918</td>
                <td>0.878</td>
              </tr>
              <tr valign="top">
                <td>New model</td>
                <td>0.835</td>
                <td>0.945</td>
                <td>0.906</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>ML: machine learning.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>KNN: K-nearest neighbor.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>NB: naive Bayes.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>BiLSTM: bidirectional long short-term memory.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>CNN: convolutional neural network.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>LSTM: long short-term memory.</p>
            </fn>
            <fn id="table4fn8">
              <p><sup>h</sup>BiGRU: bidirectional gated recurrent unit.</p>
            </fn>
            <fn id="table4fn9">
              <p><sup>i</sup>GRU: gated recurrent unit.</p>
            </fn>
            <fn id="table4fn10">
              <p><sup>j</sup>AUC: area under the curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Confusion matrix for ReCOVery validation subset on trained new ensemble model with BiGRU and XGBoost. BiGRU: bidirectional gated recurrent unit; XGBoost: extreme gradient boosting.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38839_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study demonstrates an ensemble model with BiGRU and XGBoost for text reliability classification using the ReCOVery data set with a specificity, sensitivity, and AUC of 0.835, 0.945, and 0.906, respectively [<xref ref-type="bibr" rid="ref17">17</xref>]. Through our data analysis, we demonstrated that unreliable news articles have lower readability and higher sentence length. They also include more negative and less neutral sentiments and contain more polarizing lexical categories in comparison to reliable articles.</p>
      </sec>
      <sec>
        <title>Data Usage</title>
        <p>With regard to using news articles to build a classification model, an important consideration is the generalizability of the model. To ensure that the model is generalizable, the data used to train the model must be diverse in nature. A shortcoming of many deep learning misinformation detection studies is the focus on detecting misinformation from a narrow range of news sources, or locations [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. Because of the homogenous nature of the data set used to train these models, many misinformation detection models are potentially less generalizable [<xref ref-type="bibr" rid="ref47">47</xref>]. An example would be CoAID, a data set constructed from COVID-19–related news articles and social media posts from December 1, 2019, to September 1, 2020. A shortcoming of the CoAID data set would be the lower number of news sources used for the data set as 9 reliable news sources were included during the data collection process [<xref ref-type="bibr" rid="ref48">48</xref>]. CoVerifi is a study that used the CoAID data set to create a web-based tool to check whether an online news article was credible [<xref ref-type="bibr" rid="ref49">49</xref>]. Another notable data set is the COVID-19-FAKES data set containing 61,711 tweets with misinformation and 2,985,399 tweets without misinformation [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. Silva et al [<xref ref-type="bibr" rid="ref51">51</xref>] used the COVID-19-FAKES data set to obtain insights into predictive features for the presence of misinformation in tweets and the differential engagement in tweets with and without misinformation [<xref ref-type="bibr" rid="ref51">51</xref>]. Hence, we used the ReCOVery data set for the diverse nature of the news articles as they range from ~2000 different news outlets from different countries [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      </sec>
      <sec>
        <title>Sentiment Analysis</title>
        <p>VADER was used to evaluate sentiment at a lexicon-based level due to its high accuracy, with an <italic>F</italic>1 classification accuracy of 0.96 and computational economy [<xref ref-type="bibr" rid="ref29">29</xref>]. Although VADER has become a staple in NLP for sentiment analysis, 2 key shortcomings to consider are its inability to recognize sarcasm/satire and its reduced accuracy when dealing with 3-class analyses (negative, neutral, and positive) [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
        <p>From the distribution of articles with reliable versus unreliable news articles, it can be observed that reliable articles contain less negative sentiment in comparison to unreliable articles as they had a lower negative sentiment polarity score (<xref ref-type="table" rid="table3">Table 3</xref>). This is in line with observations of news content in the literature, as Arif et al [<xref ref-type="bibr" rid="ref53">53</xref>] discussed how individuals searching for negative terms on the internet can lead to more biased articles. To emphasize the importance of sentiment in differentiating fake and real news, Paschen [<xref ref-type="bibr" rid="ref54">54</xref>] concluded that the titles and body text of fake news articles contain more negative content, such as anger and disgust, compared to real news articles. Fake news is more likely to display negative sentiment to drive a specific narrative for profit, which supports our finding that there are a greater number of negative unreliable sources than neutral or positive unreliable sources.</p>
        <p>We observed a difference between the number of neutral reliable and neutral unreliable articles, with more neutral sentiment in reliable articles in comparison to unreliable articles (<xref ref-type="table" rid="table3">Table 3</xref>). A neutral sentiment scoring for reliable data sources implies impartiality and objectivity when discussing the subject matter [<xref ref-type="bibr" rid="ref55">55</xref>].</p>
        <p>Many ML studies have targeted sentiment as a feature to predict misinformation in a variety of written information online because of the different sentiment valence between reliable and unreliable text due to the aforementioned reasons [<xref ref-type="bibr" rid="ref56">56</xref>]. Because of the differing nature of sentiment between texts of differing reliability, sentiment analysis was used in the context of filtering out negative messages on social media, spam filtering, among other applications [<xref ref-type="bibr" rid="ref56">56</xref>]. In agreement with our findings, Ajao et al [<xref ref-type="bibr" rid="ref57">57</xref>] determined that unreliable tweets often contain more negative sentiment in comparison to reliable tweets due to how authors of unreliable tweets use negative emotions to better propagate their message. They also showed that the use of sentiment can boost support vector machine (SVM) accuracy when the sentiment is considered in addition to textual features [<xref ref-type="bibr" rid="ref57">57</xref>]. Hence, sentiment was a feature selected for our model.</p>
      </sec>
      <sec>
        <title>Text Analysis</title>
        <p>The words themselves were observed to be quite similar to one another between the 2 groups because the subject matter of both reliable and unreliable sources is the same: COVID-19. Additionally, many of the most frequently occurring words are mere transitional words that are likely to be found in the majority of English literature.</p>
        <p>Interestingly, the most frequently occurring word in reliable sources was “said” (<xref rid="figure3" ref-type="fig">Figure 3</xref>). This is likely due to “said” being used to quote political figures and leaders in the scientific field. The reliability of articles in this case is a consequence of the articles citing reliable sources of information. Another observable trend is the increasing number of politically charged words found in unreliable articles. Words such as “country,” government,” and “Trump” were amongst the most frequent words for unreliable sources but not for reliable articles (<xref rid="figure2" ref-type="fig">Figure 2</xref>). This communicates a pattern of political commentary occurring in unreliable sources [<xref ref-type="bibr" rid="ref58">58</xref>]. We can anticipate that articles discussing political content in the context of COVID-19 are likely interested in propagating an agenda—hence, the unreliability. For example, Chen et al [<xref ref-type="bibr" rid="ref59">59</xref>] found interplay between COVID-19 misinformation propagation and the 2020 US presidential elections with regard to mask use and mail-in ballots. Specifically, health information has been politicized to push political agendas and attack political opponents. In addition to frequently occurring words, lexical categories extracted from Empath and similar models allows us to evaluate the difference in topic frequencies between reliable and unreliable news articles [<xref ref-type="bibr" rid="ref40">40</xref>]. The use of lexical categories extracted from Empath and similar models can increase model performance compared to using only raw text data [<xref ref-type="bibr" rid="ref60">60</xref>-<xref ref-type="bibr" rid="ref63">63</xref>].</p>
        <p>Another feature we decided to explore and include in our final deep learning model is the readability and length of the news articles. Readability has been shown to be predictive of misinformation. In the study by Santos et al [<xref ref-type="bibr" rid="ref64">64</xref>], articles from a frequent source of fake news could be differentiated using only article readability scores with an SVM algorithm with an accuracy of 92% [<xref ref-type="bibr" rid="ref64">64</xref>]. Similarly, in a study by Zhou et al [<xref ref-type="bibr" rid="ref65">65</xref>], various metrics were explored based on their ability to classify reliable versus unreliable news articles. It was determined using random forests that readability is among the top 5 in terms of contribution to the model, alongside sentiment [<xref ref-type="bibr" rid="ref65">65</xref>].</p>
      </sec>
      <sec>
        <title>Machine Learning Classification</title>
        <p>In the original ReCOVery study, Zhou et al [<xref ref-type="bibr" rid="ref17">17</xref>] created a baseline prediction performance for news article reliability and found that a precision of 0.721-0.836 and 0.421-0.667 can be obtained for reliable and unreliable news articles, respectively. A recall of 0.705-0.829 and 0.441-0.667 can be obtained for reliable and unreliable news articles, respectively [<xref ref-type="bibr" rid="ref17">17</xref>]. The features used in the baseline model ranged from text lexical categories, rhetorical structure, and visual information within news articles. Zhou et al [<xref ref-type="bibr" rid="ref17">17</xref>] also tested the model on traditional ML models, such as SVMs, or deep learning algorithms, such as CNNs with unimodal and multimodal features. Other studies have also explored the use of the ReCOVery data set for false information classification. One such study is by Raj and Meel [<xref ref-type="bibr" rid="ref66">66</xref>], where a novel deep learning model, the Allied Recurrent and Convolutional Neural Network (ARCNN), was created using both image and textual features within news articles to detect misinformation. The performance of the ARCNN was tested using 6 COVID-19 fake news data sets, with ReCOVery as 1 of the data sets, achieving an accuracy, precision, recall, and <italic>F</italic>1 score of 80.98%, 53.85%, 58.33%, and 56.00%, respectively [<xref ref-type="bibr" rid="ref66">66</xref>]. Another study using the ReCOVery data set for model development explored the use of multiple languages for fake news detection to improve model performance [<xref ref-type="bibr" rid="ref67">67</xref>]. Finally, Wahle et al [<xref ref-type="bibr" rid="ref68">68</xref>] used the ReCOVery data set as 1 of 6 COVID-19 misinformation data sets to evaluate the performance of 15 transformer-based ML models to determine the generalizability of different transformer models. Differing from the aforementioned studies, we were able to demonstrate that the use of readability, text characteristics, sentiment, and lexical categories can improve upon the original ReCOVery data set baseline models [<xref ref-type="bibr" rid="ref17">17</xref>]. Hence, we demonstrate the importance of the aforementioned text features to improve upon news article reliability prediction. Furthermore, we show that the combination of multiple inputs and consideration of the chosen model can increase ML model accuracy in the context of NLP.</p>
        <p>In our final proposed model, the BiGRU with XGBoost and feature engineering was the best-performing model. A BiGRU is adept at capturing temporal data in long sequences, as bidirectional models can better capture the context of the text [<xref ref-type="bibr" rid="ref46">46</xref>]. During the experimentation with these models on ReCOVery data, we found that all deep learning models outperformed the traditional ML models because deep learning models are better able to handle more complex data [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref69">69</xref>]. Furthermore, we chose to use the GRU algorithm, which is a variant of the recurrent neural network, in addition to the LSTM algorithm due to the increased performance on longer text compared to LSTM [<xref ref-type="bibr" rid="ref21">21</xref>]. To further increase the performance of our model, an ensemble model was built, as combining multiple predictions can yield more accurate predictions [<xref ref-type="bibr" rid="ref70">70</xref>].</p>
      </sec>
      <sec>
        <title>Strengths</title>
        <p>A strength of our investigation is that it not only had the main goal of creating a deep learning model for reliability prediction but also identified significant trends in text and sentiment for reliable and unreliable news articles. An investigation focused solely on a deep learning model has a “black box” problem where the mechanisms used by the deep learning model are not visible and are contained within its many complex hidden layers [<xref ref-type="bibr" rid="ref71">71</xref>]. As such, a data exploration approach coupled with the deep learning model is able to better visualize and portray article reliability classification. Furthermore, our paper examined news articles, which had the advantage of being more normalized in text compared to tweets and social media as, each article was written with a professional approach. As such, less data were removed during preprocessing due to grammatical or spelling errors. Using news articles as data also avoided the problem of low hydration that Twitter misinformation data sets suffer from when tweets are removed by Twitter.</p>
      </sec>
      <sec>
        <title>Limitations and Future Directions</title>
        <p>There are a number of ways our project could be further refined. First, expanding the number of total available data would be valuable as there are nearly twice as much data for reliable sources as unreliable. Furthermore, being able to web-scrape Facebook postings and Reddit threads would allow us to expand our scope of access and evaluate other high-traffic sources of information. Incorporating clustering models would also increase the specificity of our search and create a more accurate model that can consider what aspect of COVID-19 is being discussed when determining reliability. Due to the high accuracy of our model, as shown by the results, our model can be commercialized as a web app that allows users to assess, to a high degree of confidence, the reliability of the article they are reading. Moreover, it can also be used to determine the sentiment scoring of an article to determine whether they want to engage in that specific literature.</p>
        <p>Although this model specifically identifies COVID-19–related information, it could also be trained for other types of misinformation. As discussed previously, most current methods to combat misinformation online are through the use of human-moderated fact-checking websites. Examples include Twitter's Birdwatch program, where independent users can flag posts they deem untrustworthy [<xref ref-type="bibr" rid="ref72">72</xref>]. Other methods used include Facebook's fact-checking service, which manually labels posts or websites containing misinformation as untrustworthy and removes them from public view [<xref ref-type="bibr" rid="ref73">73</xref>]. Furthermore, warnings are placed below posts containing COVID-19 information to warn readers regarding potential misinformation contained within posts [<xref ref-type="bibr" rid="ref73">73</xref>]. Even though there are numerous instances of fact checking, the major issue that arises is the inefficiency in manual fact checking [<xref ref-type="bibr" rid="ref74">74</xref>]. Hence, new fact-checking methods aim toward automating the fact-checking process. The first example of a fact-checking website is the Bot Sentinel automated Twitter fact-checking software, which can be installed by users to monitor spam accounts [<xref ref-type="bibr" rid="ref75">75</xref>]. Bot Sentinel uses ML technology to classify posts or profiles as reliable or unreliable to an accuracy of 95% [<xref ref-type="bibr" rid="ref75">75</xref>].</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>In conclusion, we demonstrated that readability, sentiment, text characteristics, and lexical categories are important in differentiating between reliable and unreliable news articles, as it was shown that unreliable articles are less readable, have more negative sentiment, and have more political lexical categories. The aforementioned features were used to achieve above-the-baseline performance within the original ReCOVery data set, with a specificity, sensitivity, and AUC of 0.835, 0.945, and 0.906, respectively, using our new ensemble deep learning model. Hence, the application of readability, sentiment, and lexical categories using our new model can help determine the dependability of news articles and better improve upon pre-existing models that do not use these features.</p>
        <p>COVID-19 has brought to light the importance of developing an automated reliability assessor for news articles, as human-moderated fact-checking methods may be inefficient. Because readability, sentiment, and lexical categories can be used to improve upon pre-existing reliability classification models, we show that automated reliability detection may be an alternate way to determine new article reliability in the future, which will help news readers identify articles containing potentially unreliable information.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Mean (SDs) scores for Empath categories of reliable and unreliable news articles.</p>
        <media xlink:href="infodemiology_v2i2e38839_app1.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 31 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ARCNN</term>
          <def>
            <p>Allied Recurrent and Convolutional Neural Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ARI</term>
          <def>
            <p>automated readability index</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BiGRU</term>
          <def>
            <p>bidirectional gated recurrent unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">BiLSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">FP</term>
          <def>
            <p>false positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">GloVE</term>
          <def>
            <p>Global Vectors for Word Representation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">GRU</term>
          <def>
            <p>gated recurrent unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">KNN</term>
          <def>
            <p>K-nearest neighbor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">NB</term>
          <def>
            <p>naive Bayes</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">TN</term>
          <def>
            <p>true negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">VADER</term>
          <def>
            <p>Valence Aware Dictionary and sEntiment Reasoner</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">XGBoost</term>
          <def>
            <p>extreme gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to thank the science, technology, engineering, and mathematics (STEM) fellowship team for organizing the National Undergraduate Big Data Challenge 2021 and JMIR for sponsoring this publication.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>How to fight an infodemic: the four pillars of infodemic management</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>29</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e21820</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e21820/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21820</pub-id>
          <pub-id pub-id-type="medline">32589589</pub-id>
          <pub-id pub-id-type="pii">v22i6e21820</pub-id>
          <pub-id pub-id-type="pmcid">PMC7332253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>World Health Organization</collab>
          </person-group>
          <source>Infodemic</source>
          <year>2022</year>
          <access-date>2022-06-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/health-topics/infodemic">https://www.who.int/health-topics/infodemic</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obiała</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Obiała</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mańczak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Owoc</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Olszewski</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 misinformation: accuracy of articles about coronavirus prevention mostly shared on social media</article-title>
          <source>Health Policy Technol</source>
          <year>2021</year>
          <month>03</month>
          <day>10</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>182</fpage>
          <lpage>186</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33163352"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.hlpt.2020.10.007</pub-id>
          <pub-id pub-id-type="medline">33163352</pub-id>
          <pub-id pub-id-type="pii">S2211-8837(20)30116-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC7603966</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Figueira</surname>
              <given-names>Á</given-names>
            </name>
            <name name-style="western">
              <surname>Oliveira</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>The current state of fake news: challenges and opportunities</article-title>
          <source>Procedia Comput Sci</source>
          <year>2017</year>
          <volume>121</volume>
          <fpage>817</fpage>
          <lpage>825</lpage>
          <pub-id pub-id-type="doi">10.1016/j.procs.2017.11.106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sylvia Chou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gaysynsky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cappella</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>Where we go from here: health misinformation on social media</article-title>
          <source>Am J Public Health</source>
          <year>2020</year>
          <month>10</month>
          <volume>110</volume>
          <issue>S3</issue>
          <fpage>S273</fpage>
          <lpage>S275</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2020.305905</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tandoc</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Defining "fake news"</article-title>
          <source>Digit J</source>
          <year>2018</year>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>137</fpage>
          <lpage>153</lpage>
          <pub-id pub-id-type="doi">10.1002/9781119011071.iemp0300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazer</surname>
              <given-names>DMJ</given-names>
            </name>
            <name name-style="western">
              <surname>Baum</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Benkler</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Berinsky</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Greenhill</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Menczer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Metzger</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Nyhan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pennycook</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rothschild</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schudson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sloman</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Sunstein</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Thorson</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zittrain</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>The science of fake news</article-title>
          <source>Science</source>
          <year>2018</year>
          <month>03</month>
          <day>09</day>
          <volume>359</volume>
          <issue>6380</issue>
          <fpage>1094</fpage>
          <lpage>1096</lpage>
          <pub-id pub-id-type="doi">10.1126/science.aao2998</pub-id>
          <pub-id pub-id-type="medline">29590025</pub-id>
          <pub-id pub-id-type="pii">359/6380/1094</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tandoc</surname>
              <given-names>EC</given-names>
            </name>
          </person-group>
          <article-title>The facts of fake news: a research review</article-title>
          <source>Sociol Compass</source>
          <year>2019</year>
          <month>07</month>
          <day>25</day>
          <volume>13</volume>
          <issue>9</issue>
          <fpage>e12724</fpage>
          <pub-id pub-id-type="doi">10.1111/soc4.12724</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Oliveira</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Pisa</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>de Medeiros</surname>
              <given-names>DSV</given-names>
            </name>
            <name name-style="western">
              <surname>Mattos</surname>
              <given-names>DMF</given-names>
            </name>
          </person-group>
          <article-title>Identifying fake news on social networks based on natural language processing: trends and challenges</article-title>
          <source>Information</source>
          <year>2021</year>
          <month>01</month>
          <day>18</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>38</fpage>
          <pub-id pub-id-type="doi">10.3390/info12010038</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Torabi Asr</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Taboada</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Big data and quality data for fake news and misinformation detection</article-title>
          <source>Big Data Soc</source>
          <year>2019</year>
          <month>05</month>
          <day>23</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>205395171984331</fpage>
          <pub-id pub-id-type="doi">10.1177/2053951719843310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Blair</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Busam</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Forstner</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Glance</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kawata</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kovvuri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sandhu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Scholz-Bright</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Welch</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Wolff</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nyhan</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Real solutions for fake news? Measuring the effectiveness of general warnings and fact-check tags in reducing belief in false stories on social media</article-title>
          <source>Polit Behav</source>
          <year>2019</year>
          <month>02</month>
          <day>11</day>
          <volume>42</volume>
          <issue>4</issue>
          <fpage>1073</fpage>
          <lpage>1095</lpage>
          <pub-id pub-id-type="doi">10.1007/s11109-019-09533-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krause</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Freiling</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Beets</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Brossard</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Fact-checking as risk communication: the multi-layered risk of misinformation in times of COVID-19</article-title>
          <source>J Risk Res</source>
          <year>2020</year>
          <month>04</month>
          <day>22</day>
          <volume>23</volume>
          <issue>7-8</issue>
          <fpage>1052</fpage>
          <lpage>1059</lpage>
          <pub-id pub-id-type="doi">10.1080/13669877.2020.1756385</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reis</surname>
              <given-names>JCS</given-names>
            </name>
            <name name-style="western">
              <surname>Correia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Murai</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Veloso</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Benevenuto</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Supervised learning for fake news detection</article-title>
          <source>IEEE Intell Syst</source>
          <year>2019</year>
          <month>3</month>
          <volume>34</volume>
          <issue>2</issue>
          <fpage>76</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1109/mis.2019.2899143</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gebali</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>An ensemble deep learning technique to detect COVID-19 misleading information</article-title>
          <source>Advances in Networked-Based Information Systems</source>
          <year>2020</year>
          <month>08</month>
          <day>20</day>
          <conf-name>International Conference on Network-Based Information Systems</conf-name>
          <conf-date>2020</conf-date>
          <conf-loc>Victoria, Canada</conf-loc>
          <publisher-loc>Manhattan, NY</publisher-loc>
          <publisher-name>Springer International</publisher-name>
          <fpage>163</fpage>
          <lpage>175</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-57811-4_16</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singhania</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>3HAN: a deep neural network for fake news detection</article-title>
          <year>2017</year>
          <conf-name>24th International Conference on Neural Information Processing (ICONIP 2017)</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Guangzhou, China</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-319-70096-0_59</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Khondaker</surname>
              <given-names>MTI</given-names>
            </name>
            <name name-style="western">
              <surname>Afroz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Uddin</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Iqbal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A benchmark study of machine learning models for online fake news detection</article-title>
          <source>Mach Learn Appl</source>
          <year>2021</year>
          <month>06</month>
          <volume>4</volume>
          <fpage>100032</fpage>
          <pub-id pub-id-type="doi">10.1016/j.mlwa.2021.100032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Mulay</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zafarani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ReCOVery: a multimodal repository for COVID-19 news credibility research</article-title>
          <year>2020</year>
          <conf-name>CIKM '20: 29th ACM International Conference on Information &amp; Knowledge Management</conf-name>
          <conf-date>October 19-23, 2020</conf-date>
          <conf-loc>Virtual Event Ireland</conf-loc>
          <fpage>3205</fpage>
          <lpage>3212</lpage>
          <pub-id pub-id-type="doi">10.1145/3340531.3412880</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ganaie</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Malik</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tanveer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Suganthan</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Ensemble deep learning: a review</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online April 6, 2021</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2104.02395"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.engappai.2022.105151</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Asthana</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Upadhyay</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Upreti</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Akbar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Fake news detection using deep learning models: a novel approach</article-title>
          <source>Trans Emerging Tel Tech</source>
          <year>2019</year>
          <month>11</month>
          <day>05</day>
          <volume>31</volume>
          <issue>2</issue>
          <fpage>e3767</fpage>
          <pub-id pub-id-type="doi">10.1002/ett.3767</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gulcehre</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Empirical evaluation of gated recurrent neural networks on sequence modeling</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online December 11, 2014</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1412.3555</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>LSTM and GRU neural network performance comparison study: taking yelp review dataset as an example</article-title>
          <year>2020</year>
          <conf-name>2020 International Workshop on Electronic Communication and Artificial Intelligence (IWECAI)</conf-name>
          <conf-date>June 12-14, 2020</conf-date>
          <conf-loc>Shanghai, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/iwecai50956.2020.00027</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>1780</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: a scalable tree boosting system</article-title>
          <year>2016</year>
          <conf-name>KDD '16: 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 13-17, 2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shwartz-Ziv</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Armon</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Tabular data: deep learning is not all you need</article-title>
          <source>Inf Fusion</source>
          <year>2022</year>
          <month>05</month>
          <volume>81</volume>
          <fpage>84</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="doi">10.1016/j.inffus.2021.11.011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>NewsGuard Technologies</collab>
          </person-group>
          <source>Rating Process and Criteria</source>
          <year>2022</year>
          <access-date>2022-06-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.newsguardtech.com/ratings/rating-process-criteria/">https://www.newsguardtech.com/ratings/rating-pro cess-criteria/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zandt</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>About Media Bias / Fact Check</source>
          <access-date>2022-06-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mediabiasfactcheck.com/about/">https://mediabiasfactcheck.com/about/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loper</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bird</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>NLTK: the natural language toolkit</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online May 17, 2002</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/cs/0205028"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/1118108.1118117</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McKinney</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Data structures for statistical computing in Python</article-title>
          <year>2010</year>
          <conf-name>9th Python in Science Conference (SciPy 2010)</conf-name>
          <conf-date>June 28-July 3, 2010</conf-date>
          <conf-loc>Austin, TX</conf-loc>
          <fpage>56</fpage>
          <lpage>61</lpage>
          <pub-id pub-id-type="doi">10.25080/majora-92bf1922-00a</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>VADER: a parsimonious rule-based model for sentiment analysis of social media text</article-title>
          <year>2014</year>
          <conf-name>8th International AAAI Conference on Weblogs and Social Media</conf-name>
          <conf-date>June 1-4, 2014</conf-date>
          <conf-loc>Ann Arbor, MI</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ojs.aaai.org/index.php/ICWSM/article/view/14550"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loria</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>TextBlob: Simplified Text Processing</source>
          <access-date>2022-09-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://textblob.readthedocs.io/en/dev/">https://textblob.readthedocs.io/en/dev/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>DiMAscio</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <source>py-readability-metrics</source>
          <access-date>2022-03-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/cdimascio/py-readability-metrics">https://github.com/cdimascio/py-readability-metrics</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Si</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Callan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A statistical model for scientific readability</article-title>
          <year>2001</year>
          <conf-name>CIKM01: 10th International Conference on Information and Knowledge Management</conf-name>
          <conf-date>October 5-10, 2001</conf-date>
          <conf-loc>Atlanta, GA</conf-loc>
          <fpage>574</fpage>
          <lpage>576</lpage>
          <pub-id pub-id-type="doi">10.1145/502585.502695</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hoke</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>Comparison of Recreational Reading Books Levels Using the Fry Readability Graph and the Flesch-Kincaid Grade Level</source>
          <year>1999</year>
          <access-date>2022-06-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://eric.ed.gov/?id=ED428333">https://eric.ed.gov/?id=ED428333</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karmakar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Visualizing multiple text readability indexes</article-title>
          <year>2010</year>
          <conf-name>International Conference on Education and Management Technology (ICEMT 2010)</conf-name>
          <conf-date>November 2-4, 2010</conf-date>
          <conf-loc>Cairo, Egypt</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icemt.2010.5657684</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gunning</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The fog index after twenty years</article-title>
          <source>J Bus Commun</source>
          <year>2016</year>
          <month>09</month>
          <day>16</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>3</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1177/002194366900600202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Greenberg</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Determining readability: how to select and apply easy-to-use readability formulas to assess the difficulty of adult literacy materials</article-title>
          <source>Adult Basic Educ Lit J</source>
          <year>2010</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>34</fpage>
          <lpage>42</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dale</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chall</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A formula for predicting readability</article-title>
          <source>Educ Res Bull</source>
          <year>1948</year>
          <volume>27</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>28</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>The New Dale-Chall Readability Formula: A Vocabulary-Based Readability Formula</source>
          <year>2003</year>
          <access-date>2022-06-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.readabilityformulas.com/new-dale-chall-readability-formula.php">https://www.readabilityformulas.com/new-dale-chall-readability-formula.php</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>How to Use the Linsear Write Readability Formula to Grade Your Text</source>
          <year>2003</year>
          <access-date>2022-06-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://readabilityformulas.com/linsear-write-readability-formula.php">https://readabilityformulas.com/linsear-write-readability-formula.php</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fast</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bernstein</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Empath: understanding topic signals in large-scale text</article-title>
          <year>2016</year>
          <conf-name>ACM CHI 2016: 2016 CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>May 7-12, 2016</conf-date>
          <conf-loc>San Jose, CA</conf-loc>
          <fpage>4647</fpage>
          <lpage>4657</lpage>
          <pub-id pub-id-type="doi">10.1145/2858036.2858535</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Word embedding with Zipf’s context</article-title>
          <source>IEEE Access</source>
          <year>2019</year>
          <volume>7</volume>
          <fpage>168934</fpage>
          <lpage>168943</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2019.2954691</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sigurd</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Eeg-Olofsson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>van Weijer</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Word length, sentence length and frequency - Zipf revisited</article-title>
          <source>Studia Linguist</source>
          <year>2004</year>
          <month>04</month>
          <volume>58</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1111/j.0039-3193.2004.00109.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brysbaert</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mandera</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Keuleers</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>How many words do we know? Practical estimates of vocabulary size dependent on word definition, the degree of language input and the participant's age</article-title>
          <source>Front Psychol</source>
          <year>2016</year>
          <volume>7</volume>
          <fpage>1116</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fpsyg.2016.01116"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2016.01116</pub-id>
          <pub-id pub-id-type="medline">27524974</pub-id>
          <pub-id pub-id-type="pmcid">PMC4965448</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goulden</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nation</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Read</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>How large can a receptive vocabulary be?</article-title>
          <source>Appl Linguist</source>
          <year>1990</year>
          <month>12</month>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>341</fpage>
          <lpage>363</lpage>
          <pub-id pub-id-type="doi">10.26686/wgtn.12560441</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Glove: Global Vectors for Word Representation</article-title>
          <year>2014</year>
          <conf-name>2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>October 25-29, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <fpage>1532</fpage>
          <lpage>1543</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elfaik</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nfaoui</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Deep bidirectional LSTM network learning-based sentiment analysis for Arabic text</article-title>
          <source>J Intell Syst</source>
          <year>2021</year>
          <volume>30</volume>
          <issue>1</issue>
          <fpage>395</fpage>
          <lpage>412</lpage>
          <pub-id pub-id-type="doi">10.1515/jisys-2020-0021</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suprem</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Evaluating generalizability of fine-tuned models for fake news detection</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online May 15, 2022 </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2205.07154</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>CoAID: COVID-19 healthcare misinformation dataset</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online May 22, 2020 </comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2006.00885"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kolluri</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Murthy</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>CoVerifi: a COVID-19 news verification system</article-title>
          <source>Online Soc Netw Media</source>
          <year>2021</year>
          <month>03</month>
          <volume>22</volume>
          <fpage>100123</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33521412"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.osnem.2021.100123</pub-id>
          <pub-id pub-id-type="medline">33521412</pub-id>
          <pub-id pub-id-type="pii">S2468-6964(21)00007-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC7825993</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gebali</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>COVID-19-FAKES: a Twitter (Arabic/English) dataset for detecting misleading information on COVID-19</article-title>
          <source>Advances in Intelligent Networking and Collaborative Systems</source>
          <year>2021</year>
          <conf-name>International Conference on Intelligent Networking and Collaborative Systems</conf-name>
          <conf-date>2020</conf-date>
          <conf-loc>Victoria</conf-loc>
          <publisher-loc>Manhattan, NY</publisher-loc>
          <publisher-name>Springer International</publisher-name>
          <fpage>256</fpage>
          <lpage>268</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-57796-4_25</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ceschin</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Shrestha</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Brant</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Gregrio</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Giovanini</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Predicting misinformation and engagement in COVID-19 Twitter discourse in the first months of the outbreak</article-title>
          <source>Association for Computing Machinery</source>
          <year>2020</year>
          <month>11</month>
          <volume>37</volume>
          <issue>4</issue>
          <fpage>1</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/346614769_Predicting_Misinformation_and_Engagement_in_COVID-19_Twitter_Discourse_in_the_First_Months_of_the_Outbreak"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maynard</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Greenwood</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Who cares about sarcastic tweets? Investigating the impact of sarcasm on sentiment analysis</article-title>
          <year>2014</year>
          <conf-name>Ninth International Conference on Language Resources and Evaluation (LREC'14)</conf-name>
          <conf-date>May 2014</conf-date>
          <conf-loc>Reykjavik, Iceland</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.lrec-conf.org/proceedings/lrec2014/index.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arif</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Jefri</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bizzi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Perano</surname>
              <given-names>Gb</given-names>
            </name>
            <name name-style="western">
              <surname>Goldman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Haq</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chua</surname>
              <given-names>Kl</given-names>
            </name>
            <name name-style="western">
              <surname>Mengozzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Neunez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ghezzi</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Fake news or weak science? Visibility and characterization of antivaccine webpages returned by Google in different languages and countries</article-title>
          <source>Front Immunol</source>
          <year>2018</year>
          <month>6</month>
          <day>5</day>
          <volume>9</volume>
          <fpage>1215</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.frontiersin.org/articles/10.3389/fimmu.2018.01215"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fimmu.2018.01215</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paschen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Investigating the emotional appeal of fake news using artificial intelligence and human contributions</article-title>
          <source>J Prod Brand Manag</source>
          <year>2019</year>
          <month>05</month>
          <day>06</day>
          <volume>29</volume>
          <issue>2</issue>
          <fpage>223</fpage>
          <lpage>233</lpage>
          <pub-id pub-id-type="doi">10.1108/jpbm-12-2018-2179</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dey</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rafi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Arko</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chakrabarty</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Fake news pattern recognition using linguistic analysis</article-title>
          <year>2018</year>
          <conf-name>Joint 7th International Conference on Informatics, Electronics Vision (ICIEV) and 2nd International Conference on Imaging, Vision Pattern Recognition (IcIVPR)</conf-name>
          <conf-date>June 25-29, 2018</conf-date>
          <conf-loc>Kitakyushu, Japan</conf-loc>
          <pub-id pub-id-type="doi">10.1109/iciev.2018.8641018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alonso</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Vilares</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gómez-Rodríguez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vilares</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis for fake news detection</article-title>
          <source>Electronics</source>
          <year>2021</year>
          <month>06</month>
          <day>05</day>
          <volume>10</volume>
          <issue>11</issue>
          <fpage>1348</fpage>
          <pub-id pub-id-type="doi">10.3390/electronics10111348</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ajao</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Bhowmik</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zargari</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Sentiment aware fake news detection on online social networks</article-title>
          <year>2019</year>
          <conf-name>ICASSP 2019: 2019 IEEE International Conference on Acoustics, Speech and Signal Processing</conf-name>
          <conf-date>May 12-May 17, 2019</conf-date>
          <conf-loc>Brighton, UK</conf-loc>
          <fpage>2507</fpage>
          <lpage>2511</lpage>
          <pub-id pub-id-type="doi">10.1109/icassp.2019.8683170</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The global rise of “fake news” and the threat to democratic elections in the USA</article-title>
          <source>Public Adm Policy</source>
          <year>2019</year>
          <month>07</month>
          <day>01</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1108/pap-04-2019-0008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cowan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 misinformation and the 2020 U.S. presidential election</article-title>
          <source>HKS Misinformation Review</source>
          <year>2021</year>
          <month>3</month>
          <day>3</day>
          <access-date>2022-09-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://misinforeview.hks.harvard.edu/article/covid-19-misinformation-and-the-2020-u-s-presidential-election/">https://misinforeview.hks.harvard.edu/article/covid-19-misin formation-and-the-2020-u-s-presidential-election/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Rana</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Panwala</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kathiriya</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Analysis of contextual features’ granularity for fake news detection</article-title>
          <source>Research Square</source>
          <comment>Preprint posted online May 27, 2022 </comment>
          <pub-id pub-id-type="doi">10.21203/rs.3.rs-1676328/v1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sekulić</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Gjurković</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Šnajder</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Not just depressed: bipolar disorder prediction on reddit</article-title>
          <source>aclanthology</source>
          <comment>Preprint posted online October, 2018 </comment>
          <pub-id pub-id-type="doi">10.18653/v1/w18-6211</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deb</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>De</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chatterjee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Studying borderline personality disorder using machine learning</article-title>
          <year>2022</year>
          <conf-name>16th International Conference on Ubiquitous Information Management and Communication (IMCOM)</conf-name>
          <conf-date>January 3-5, 2022</conf-date>
          <conf-loc>Seoul, South Korea</conf-loc>
          <pub-id pub-id-type="doi">10.1109/imcom53663.2022.9721800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jubair</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Salim</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Karadsheh</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Hassona</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Saifan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Abdel-Majeed</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis for Twitter chatter during the early outbreak period of COVID-19</article-title>
          <year>2021</year>
          <conf-name>4th International Seminar on Research of Information Technology and Intelligent Systems (ISRITI)</conf-name>
          <conf-date>December 12-17, 2021</conf-date>
          <conf-loc>Yogyakarta, Indonesia</conf-loc>
          <pub-id pub-id-type="doi">10.1109/isriti54043.2021.9702837</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Santos</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pedro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Leal</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Measuring the impact of readability features in fake news detection</article-title>
          <year>2020</year>
          <conf-name>12th Language Resources and Evaluation Conference</conf-name>
          <conf-date>May 11-16, 2020</conf-date>
          <conf-loc>Marseille, France</conf-loc>
          <fpage>1404</fpage>
          <lpage>1413</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.lrec-1.176"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Phoha</surname>
              <given-names>VV</given-names>
            </name>
            <name name-style="western">
              <surname>Zafarani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Fake news early detection: a theory-driven model</article-title>
          <source>Digit Threats Res Pract</source>
          <year>2020</year>
          <month>06</month>
          <day>30</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.1145/3377478</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raj</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Meel</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>ARCNN framework for multimodal infodemic detection</article-title>
          <source>Neural Netw</source>
          <year>2022</year>
          <month>02</month>
          <volume>146</volume>
          <fpage>36</fpage>
          <lpage>68</lpage>
          <pub-id pub-id-type="doi">10.1016/j.neunet.2021.11.006</pub-id>
          <pub-id pub-id-type="medline">34839091</pub-id>
          <pub-id pub-id-type="pii">S0893-6080(21)00434-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dementieva</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Panchenko</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Cross-lingual evidence improves monolingual fake news detection</article-title>
          <year>2021</year>
          <conf-name>59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: Student Research Workshop</conf-name>
          <conf-date>August 2021</conf-date>
          <conf-loc>Online</conf-loc>
          <fpage>310</fpage>
          <lpage>320</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2021.acl-srw.32</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wahle</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ashok</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Raus</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Meuschke</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ghosal</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gipp</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Testing the generalization of neural language models for COVID-19 misinformation detection</article-title>
          <source>Information for a Better World: Shaping the Global Future. iConference 2022. Lecture Notes in Computer Science, Vol 13192</source>
          <year>2022</year>
          <conf-name>Information for a Better World: Shaping the Global Future. iConference 2022</conf-name>
          <conf-date>2022</conf-date>
          <conf-loc>Virtual</conf-loc>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>381</fpage>
          <lpage>392</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-96957-8_33</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Colas</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Brazdil</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Comparison of SVM and some older classification algorithms in text classification tasks</article-title>
          <year>2006</year>
          <conf-name>Artificial Intelligence in Theory and Practice, IFIP 19th World Computer Congress, TC 12: IFIP AI 2006 Stream</conf-name>
          <conf-date>August 21-24, 2006</conf-date>
          <conf-loc>Santiago, Chile</conf-loc>
          <fpage>169</fpage>
          <lpage>178</lpage>
          <pub-id pub-id-type="doi">10.1007/978-0-387-34747-9_18</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sagi</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Rokach</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Ensemble learning: a survey</article-title>
          <source>WIREs Data Mining Knowl Discov</source>
          <year>2018</year>
          <month>02</month>
          <day>27</day>
          <volume>8</volume>
          <issue>4</issue>
          <fpage>e1249</fpage>
          <pub-id pub-id-type="doi">10.1002/widm.1249</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Samek</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wiegand</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Explainable artificial intelligence: understanding, visualizing and interpreting deep learning models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online August 28, 2017</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1708.08296"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pickles</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>Updating Our Approach to Misleading Information</source>
          <access-date>2021-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://blog.twitter.com/en_us/topics/product/2020/updating-our-approach-to-misleading-information">https://blog.twitter.com/en_us/topics/product/2020/updating-our-approach-to-misleading-information</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Meta Business Help Centre</collab>
          </person-group>
          <source>About Fact-Checking on Facebook</source>
          <access-date>2022-09-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.facebook.com/business/help/2593586717571940">https://www.facebook.com/business/help/2593586 717571940</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nakov</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Corney</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hasanain</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Automated fact-checking for assisting human fact-checkers</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online March 13, 2021</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2103.07769"/>
          </comment>
          <pub-id pub-id-type="doi">10.24963/ijcai.2021/619</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Bot Sentinel</collab>
          </person-group>
          <source>More than just bots</source>
          <access-date>2022-09-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://botsentinel.com/info/about">https://botsentinel.com/info/about</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
