<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Infodemiology</journal-id>
      <journal-title>JMIR Infodemiology</journal-title>
      <issn pub-type="epub">2564-1891</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v2i2e38749</article-id>
      <article-id pub-id-type="pmid">37113449</article-id>
      <article-id pub-id-type="doi">10.2196/38749</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Direct-to-Consumer Genetic Testing on Social Media: Topic Modeling and Sentiment Analysis of YouTube Users' Comments</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pal</surname>
            <given-names>Anjan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zimmermann</surname>
            <given-names>Bettina</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Persky</surname>
            <given-names>Susan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Toussaint</surname>
            <given-names>Philipp A</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1617-3307</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Renner</surname>
            <given-names>Maximilian</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3004-9350</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Lins</surname>
            <given-names>Sebastian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7480-275X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Thiebes</surname>
            <given-names>Scott</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6917-1831</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Sunyaev</surname>
            <given-names>Ali</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Economics and Management</institution>
            <institution>Karlsruhe Institute of Technology</institution>
            <addr-line>Kaiserstr. 89</addr-line>
            <addr-line>Karlsruhe, 76133</addr-line>
            <country>Germany</country>
            <phone>49 72160846037</phone>
            <email>sunyaev@kit.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4353-8519</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Economics and Management</institution>
        <institution>Karlsruhe Institute of Technology</institution>
        <addr-line>Karlsruhe</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>HIDSS4Health – Helmholtz Information and Data Science School for Health</institution>
        <addr-line>Karlsruhe/Heidelberg</addr-line>
        <country>Germany</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Ali Sunyaev <email>sunyaev@kit.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Jul-Dec</season>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>9</month>
        <year>2022</year>
      </pub-date>
      <volume>2</volume>
      <issue>2</issue>
      <elocation-id>e38749</elocation-id>
      <history>
        <date date-type="received">
          <day>14</day>
          <month>4</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>9</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>15</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>18</day>
          <month>8</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Philipp A Toussaint, Maximilian Renner, Sebastian Lins, Scott Thiebes, Ali Sunyaev. Originally published in JMIR Infodemiology (https://infodemiology.jmir.org), 15.09.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Infodemiology, is properly cited. The complete bibliographic information, a link to the original publication on https://infodemiology.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://infodemiology.jmir.org/2022/2/e38749" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>With direct-to-consumer (DTC) genetic testing enabling self-responsible access to novel information on ancestry, traits, or health, consumers often turn to social media for assistance and discussion. YouTube, the largest social media platform for videos, offers an abundance of DTC genetic testing–related videos. Nevertheless, user discourse in the comments sections of these videos is largely unexplored.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to address the lack of knowledge concerning user discourse in the comments sections of DTC genetic testing–related videos on YouTube by exploring topics discussed and users' attitudes toward these videos.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We employed a 3-step research approach. First, we collected metadata and comments of the 248 most viewed DTC genetic testing–related videos on YouTube. Second, we conducted topic modeling using word frequency analysis, bigram analysis, and structural topic modeling to identify topics discussed in the comments sections of those videos. Finally, we employed Bing (binary), National Research Council Canada (NRC) emotion, and 9-level sentiment analysis to identify users' attitudes toward these DTC genetic testing–related videos, as expressed in their comments.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We collected 84,082 comments from the 248 most viewed DTC genetic testing–related YouTube videos. With topic modeling, we identified 6 prevailing topics on (1) general genetic testing, (2) ancestry testing, (3) relationship testing, (4) health and trait testing, (5) ethical concerns, and (6) YouTube video reaction. Further, our sentiment analysis indicates strong positive emotions (anticipation, joy, surprise, and trust) and a neutral-to-positive attitude toward DTC genetic testing–related videos.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>With this study, we demonstrate how to identify users' attitudes on DTC genetic testing by examining topics and opinions based on YouTube video comments. Shedding light on user discourse on social media, our findings suggest that users are highly interested in DTC genetic testing and related social media content. Nonetheless, with this novel market constantly evolving, service providers, content providers, or regulatory authorities may still need to adapt their services to users' interests and desires.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>direct-to-consumer genetic testing</kwd>
        <kwd>health information</kwd>
        <kwd>social media</kwd>
        <kwd>YouTube</kwd>
        <kwd>sentiment analysis</kwd>
        <kwd>topic modeling</kwd>
        <kwd>content analysis</kwd>
        <kwd>online health information</kwd>
        <kwd>user discourse</kwd>
        <kwd>infodemiology</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background and Objectives</title>
        <p>Since the completion of the human genome project in 2003, dwindling genome sequencing costs and a rising interest in genomics among the general public have paved the way for direct-to-consumer (DTC) genetic testing [<xref ref-type="bibr" rid="ref1">1</xref>]. Today, users can purchase DTC genetic tests via the internet for less than US $100 to gain genetic insights into their health, traits, heritage, and more without the involvement of health care professionals [<xref ref-type="bibr" rid="ref2">2</xref>]. By providing users with such interesting and novel insights, DTC genetic testing markets are growing continuously. For example, North America's DTC genetic testing market alone accounted for 39% of an estimated global market value of US $1.5 billion in 2021. Moreover, with a projected annual growth rate of 15.3%, the DTC genetic testing market value is expected to triple in the next 8 years [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>The uprise of DTC genetic testing and self-responsible genetics has also sparked countless ethical, social, technical, and legal issues [<xref ref-type="bibr" rid="ref1">1</xref>]. For example, critics argue that DTC genetic testing lacks clinical validity and meaningful interpretation of test results, whereas service providers can make unregulated advertising and marketing claims, especially for health-related tests [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Indeed, consumers taking multiple DTC genetic tests found themselves receiving different results depending on the service provider [<xref ref-type="bibr" rid="ref8">8</xref>]. Another concern often discussed by researchers and consumers is the potential sharing and reselling of genetic data (eg, to pharmaceutical companies) and the resulting implications on genetic privacy, including genetic data access to insurance companies, employers, law enforcement agencies, or malicious entities like hackers [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Although many consumers perceive these practices as unfair, low prices and potential genetic insights often outweigh the aforementioned concerns [<xref ref-type="bibr" rid="ref15">15</xref>]. However, due to genetic similarity, these consequences may also apply to blood relatives who were not involved or did not consent to genetic testing [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. This also ties in with media and research reporting that consumers in the United States use DTC genetic ancestry tests to prove their “genetic purity,” leading to instances of racism and genetic discrimination on social media [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>].</p>
        <p>With the increasing spread and availability of DTC genetic testing [<xref ref-type="bibr" rid="ref2">2</xref>] and a general tendency in society to retrieve as well as discuss health information and health-related topics on the internet [<xref ref-type="bibr" rid="ref19">19</xref>], it is by no means surprising that DTC genetic testing is a frequent and recent topic on many social media platforms [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. In particular, YouTube, one of the largest social media platforms and the most comprehensive web-based video platform [<xref ref-type="bibr" rid="ref22">22</xref>], serves as the first port of call for many internet users to discuss health information and DTC genetic testing in particular [<xref ref-type="bibr" rid="ref23">23</xref>]. While YouTube can serve to share health information and experiences with a big audience for content creators (eg, consumers, service providers, health care professionals, or journalists), it also enables user discourse through textual comments below individual videos [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        <p>Understanding the topics, opinions, and attitudes discussed by the users can prove crucial for many stakeholders, as comments are the main form of user reaction and feedback on social media [<xref ref-type="bibr" rid="ref23">23</xref>]. Service providers may gain, for instance, insights into consumer demands, whereas content creators may improve their videos by adjusting their content to meet user preferences. Moreover, with the ongoing debate on ethical and legal concerns toward DTC genetic testing [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref7">7</xref>], user opinions are of utmost importance to regulation authorities, politicians, and the industry in general. However, many stakeholders lack the means to extract the core themes discussed and attitudes expressed in the comments sections effectively and efficiently, given the sheer number of comments and manifold writing styles of users.</p>
        <p>Extant research regarding DTC genetic testing on social media confirms this lack of understanding. Prior research focuses on microblogging services such as Twitter [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>], Reddit [<xref ref-type="bibr" rid="ref27">27</xref>], or 4chan [<xref ref-type="bibr" rid="ref18">18</xref>] to investigate user discourse on DTC genetic testing and shows that we are still puzzled about users' interests and opinions toward DTC genetic testing. Inconsistent findings regarding which topics users discuss on different platforms (eg, ancestry testing on Twitter [<xref ref-type="bibr" rid="ref25">25</xref>] and health testing on Reddit [<xref ref-type="bibr" rid="ref27">27</xref>]) suggest that the DTC genetic testing discourse varies from platform to platform and must thus be investigated separately. Moreover, research has already shown the value of analyzing users' opinions and attitudes through user comments from select platforms for DTC genetic testing–related content. For instance, Mittos et al [<xref ref-type="bibr" rid="ref18">18</xref>] have uncovered extensive use of hate speech on Twitter, whereas Basch et al [<xref ref-type="bibr" rid="ref20">20</xref>] have identified the need for educational content about genetic testing on TikTok. Few studies have investigated information about DTC genetic testing on YouTube while primarily analyzing the multimedia information (ie, the content of the videos) [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref31">31</xref>] and overlooking the textual information provided by users' comments (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for a complete overview of research on DTC genetic testing on social media). Because most users do not actively produce YouTube videos but only consume them, we believe that analyzing the topics that users discuss in the YouTube comments sections provides a new perspective on the ongoing discussion regarding DTC genetic testing–related videos on social media platforms. Consequently, we ask the following research questions (RQs):</p>
        <p>RQ1: What topics do YouTube users discuss in the comments sections of DTC genetic testing–related videos?</p>
        <p>RQ2: What are users' attitudes toward DTC genetic testing–related videos, as expressed in their comments on YouTube?</p>
        <p>To answer our RQs, we analyzed the 248 most viewed videos dealing with DTC genetics in a 3-step exploratory approach. First, we analyzed the selected videos regarding media type, genetic test purpose, and related health information. Second, we employed topic modeling to investigate user discourse in the comments sections of those videos. Third, we conducted a sentiment analysis unveiling users' attitudes toward the discussed topics and DTC genetic testing videos in general.</p>
        <p>Through our study, we contribute to research and practice in several ways. As for research, we add to the literature on user attitudes toward DTC genetic testing by delineating topics and opinions discussed about these genetic tests. Further, we contribute to the research stream regarding health information on social media by showing that YouTube comments provide valuable insights on user discourse on social media and demonstrate that DTC genetic testing and health information topics may generally vary from platform to platform. As for practice, our research may help providers of DTC genetic testing services and regulatory authorities gain further insights into user attitudes and consequently adapt or improve genetic testing services and regulations. As most videos are user-generated, our analysis of user discourse can provide valuable insights on the topics discussed in the comments sections of these videos, providing content creators with valuable information for improving their future DTC genetic testing–themed videos.</p>
      </sec>
      <sec>
        <title>Health Information on Social Media Platforms</title>
        <p>During the past decade, social media platforms have become increasingly attractive in the digital health sector as a means of communicating medical information [<xref ref-type="bibr" rid="ref32">32</xref>]. In addition to accessing professional and nonprofessional medical information, users can also share their experiences and get in touch with each other [<xref ref-type="bibr" rid="ref33">33</xref>]. Users already discuss various health topics like diabetes, medication and medication information, physical health, mental health, cancer, or more recently, COVID-19 on social media [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref38">38</xref>].</p>
        <p>Consequently, information dissemination platforms (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for a detailed description of social media platform types), such as YouTube, have garnered interest from researchers to study various health care–related topics. For example, studies have investigated users' attitudes toward the effect of sleep-aiding music [<xref ref-type="bibr" rid="ref24">24</xref>], users' preferences regarding treatment and symptoms of diabetes as well as the social culture pertaining to diabetes-related video clips [<xref ref-type="bibr" rid="ref39">39</xref>], or public opinions and concerns about daily coverage of the COVID-19 crisis in Canada [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      </sec>
      <sec>
        <title>DTC Genetic Testing</title>
        <p>DTC genetic testing differs from traditional clinical genetic testing in that it is initiated by the consumers and does not require the direct interaction of consumers with health care professionals [<xref ref-type="bibr" rid="ref2">2</xref>]. With the internet being the leading advertising and distribution channel, the DTC genetic testing service provider usually sends a DNA sample collection kit (eg, buccal swab or blood spot collection) to the consumers' homes for self-collection [<xref ref-type="bibr" rid="ref5">5</xref>] or arranges for sample collection at a local laboratory [<xref ref-type="bibr" rid="ref7">7</xref>]. Afterward, the service provider may perform various genetic analyses and then return the results directly to the consumers via the internet or mail [<xref ref-type="bibr" rid="ref5">5</xref>]. Regarding DTC genetic testing, the consumers can choose the interpreter (ie, service provider) and the type and objective of the analysis of their genetic information (as opposed to a health care professional interpreting the genetic data). The most common types of testing services offered include ancestry tests (eg, AncestryDNA), nonmedical lifestyle tests (eg, FitnessGenes), relationship tests (eg, EasyDNA), and health tests (eg, 23andMe) [<xref ref-type="bibr" rid="ref2">2</xref>]. Although DTC genetic testing provides consumers with novel and valuable information, it also has its downsides, such as consumers being responsible for managing and ensuring the security of their personal genetic information [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Research Approach</title>
        <p>We employed a 3-step exploratory research approach to answer our RQs (see <xref rid="figure1" ref-type="fig">Figure 1</xref>). First, we performed comprehensive data collection by gathering DTC genetic testing–related videos on YouTube, including their comments, and coding the contents of these videos. Second, we performed topic modeling for the user discourse in the comments sections to reveal topics discussed in those comments (answering RQ1). Third, we analyzed users' attitudes toward DTC genetic testing videos using sentiment analysis (answering RQ2).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of the 3-step research approach. NRC: National Research Council Canada.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38749_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>We used the official YouTube application programming interface (API) to create a list of the most relevant DTC genetic testing–related videos on YouTube. With the region set to the United States (ie, the largest DTC genetic testing market), we queried the 300 most viewed video results for each of 6 different DTC genetic testing–related search terms (ie, direct to consumer genetic testing, home genetic testing, ancestry testing, DNA testing, genetic testing, and 23andMe). Thereafter, we combined the 1800 results from the 6 queries, removed duplicates, and sorted them by video views in descending order. We further excluded all videos with less than 50,000 views because they had very few comments per video (average of 61.2), with many having no comments (n=336).</p>
        <p>Next, the remaining 468 videos were reviewed for relevance through iterative manual inspection by 2 researchers, with a third researcher breaking ties in case of differences. For this, our predefined exclusion criteria were as follows: (1) videos not focusing on DTC genetic testing, (2) videos focusing on genetic testing of animals, (3) videos focusing on clinical prenatal genetic testing, (4) videos not in English, (5) live stream videos, (6) duplicate videos (ie, reuploads from different users), (7) videos commenting/reacting on videos (ie, showing the original video and adding commentary), or (8) videos with disabled ratings and comments sections (see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for a detailed overview of the data collection process, including a rationale for each exclusion criterion). This resulted in a total of 250 relevant videos.</p>
        <p>To gain insights on what topics the videos entailed, particularly the goal of the genetic test presented and the presentation type of the video, we coded the included videos according to their genetic test purpose and media type. For the genetic test purpose, we selected the most common test types suggested in the literature (ie, ancestry, traits, genetic predisposition, relationship, and other [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]). As for the media type, we adapted the categories used by Zhang et al [<xref ref-type="bibr" rid="ref39">39</xref>] to our set of videos. Therefore, the categories were advertising, documentary, interview, news, user-generated video, and other. After the initial coding and comparison of 20 videos, 2 researchers conducted deductive coding of the remaining videos in parallel. In general, the agreement between both researchers was high, with the genetic test purpose and media type having Cohen κ values of 0.581 and 0.613, respectively. Differences in coding were discussed with a third author to break ties. This coding information allowed us to further analyze the comments regarding the contents of the videos and served as a base to evaluate the discussions in the comments.</p>
        <p>With the final coded set of 250 videos in place, we again used the YouTube API to download each video's 500 most recent comments. This number was chosen due to the YouTube API download limitations while still allowing meaningful analysis. Among these, 80 videos had less than 500 comments, and 2 videos were no longer available, leaving us with 84,082 comments from 248 videos, which is a sufficient number for topic modeling and sentiment analysis [eg, 28,31,40,41].</p>
      </sec>
      <sec>
        <title>Topic Modeling of Comments</title>
        <p>To answer our first RQ, we employed topic modeling to identify common topics discussed by users in the comments sections of DTC genetic testing–related YouTube videos. Topic modeling is frequently used in medical informatics and related disciplines for text mining large data sets (such as comments or tweets) and deducing meaningful topics [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. For our study, we used several topic modeling approaches, including word frequency, bigram correlations, and structural topic modeling, as described and recommended by Silge and Robinson [<xref ref-type="bibr" rid="ref42">42</xref>]. Because they are some of the most common topic modeling methods and include different approaches [<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref44">44</xref>], they are well suited for our exploratory study design. All analyses and visualizations were conducted using R (version 4.1.0, R Foundation for Statistical Computing) in RStudio (version 1.4.1106) and the tidytext package (version 0.3.2).</p>
        <p>Before conducting any topic modeling, we first separated the comments into 1-word tokens (ie, comments were split into single words) and performed 2 essential data cleaning tasks. First, we used the SnowballC package to perform word stemming. This step was necessary to ensure that words with identical meanings (eg, plural or verb) were grouped together to allow for meaningful topic modeling. For each word stem, the most frequent word was used to represent its stem (eg, test represents test, tests, test's, and testing). Second, we removed common stop words with the stop word list included in the tidytext package. This list comprises 1149 common stop words such as the, of, or to. As these do not hold any topical information, removing stop words reduces the data set size and benefits topic accuracy [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
        <p>With the cleansed word list in place, we first conducted a word frequency analysis by grouping, counting, and listing the words in descending order. This provides an overview of the most used words and can give a first insight into topics discussed most prominently (eg, “DNA” occurs 15,702 times and “test” 10,902 times).</p>
        <p>Second, we created word bigrams. We created a frequency list of 2-word tokens, which are found by pairing every 2 consecutive words in each comment (eg, “DTC genetic testing” results in the bigrams “DTC genetic” and “genetic testing”). In contrast to the single word list, bigrams can be used to span a network with the number of occurrences indicating the weight of each bigram edge [<xref ref-type="bibr" rid="ref42">42</xref>]. To allow for meaningful interpretation, we found that setting a minimum of 70 occurrences resulted in a comprehensible network. Lower values led to the inclusion of less interpretable and impactful bigrams while cluttering the network (eg, “grocery store,” “hey kelsey,” or “omg lol”).</p>
        <p>Finally, we conducted structural topic modeling with the help of the stm package [<xref ref-type="bibr" rid="ref43">43</xref>]. Structural topic modeling aims to group words from different documents (ie, comments) into topics based on their co-occurrences [<xref ref-type="bibr" rid="ref43">43</xref>]. The stm package uses document-level covariate information to estimate topic models for a given number of topics. We estimated models ranging from 15 to 100 topics in increments of 5. We then compared these models in terms of best-practice metrics, such as held-out likelihood, lower bound, residuals, and semantic coherence [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref45">45</xref>].</p>
        <p>Although there is no definite answer for the correct number of topics [<xref ref-type="bibr" rid="ref43">43</xref>], after a manual review of these metrics and discussion among 3 researchers, we selected 50 as the appropriate number of topics. A more detailed description of the structural topic modeling process and metrics, as well as a comparison with the 45- and 55-topic model, can be found in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <p>With the 50-topic model chosen, we sorted topics according to prevalence and within each topic, the words contributing to it in descending order. We then manually inspected the 50 most prevalent topics and their 10 most contributing words to deduce meaningful topics and categorized them according to their content. For this, we relied on our prior knowledge of DTC genetic testing as well as knowledge on the content of the videos that we gained during the video coding phase of the data collection step. All topic assignments were discussed among 3 researchers.</p>
      </sec>
      <sec>
        <title>Sentiment Analysis of Comments</title>
        <p>Because topic modeling can only help us identify topics discussed in the comments but not users' attitudes toward the videos, we next conducted word- and comment-level sentiment analyses to answer our second RQ. Sentiment analysis is a common tool to elicit people's opinions, sentiments, emotions, and attitudes from written language [<xref ref-type="bibr" rid="ref46">46</xref>]. Although sentiment and attitude are near equivalents and often used synonymously, they do differ in the sense that sentiment is a more permanent disposition to react emotionally, cognitively, and conatively, whereas attitude is a disposition to react with belief, thought, feeling, and overt behavior as part of a larger sentiment [<xref ref-type="bibr" rid="ref47">47</xref>]. In this sense, we can only deduce users' attitudes from a single YouTube comment and not their whole sentiment toward a certain topic.</p>
        <p>Therefore, we decided to conduct 2 word-level sentiment analyses and 1 comment-level sentiment analysis to deduce users' attitudes. For the word-level sentiment, we again used the tidytext package, which entails typical word-level approaches that are well suited for a first exploratory overview [<xref ref-type="bibr" rid="ref42">42</xref>]. We then followed an approach similar to that used by Mittos et al [<xref ref-type="bibr" rid="ref18">18</xref>] for the comment-level analysis, who also performed sentiment analysis in the DTC genetic testing context.</p>
        <p>Consequently, we first conducted a positive and negative sentiment analysis using the Bing lexicon, which consists of approximately 6800 words that are predefined and classified as either positive or negative [<xref ref-type="bibr" rid="ref48">48</xref>]. Subsequently, we aggregated the sentiments by word and overall sentiment. Even though this method provides a good sentiment overview, the lexicon's limited number of words omits most topic-specific words.</p>
        <p>We also used the National Research Council Canada (NRC) emotion lexicon to get a more detailed overview of users' sentiments toward DTC genetic testing [<xref ref-type="bibr" rid="ref49">49</xref>]. This lexicon attributes 1 or multiple emotions to approximately 14,000 words (ie, a word may have more than 1 emotion), whereby the classification is also predefined. The emotions covered are anger, anticipation, disgust, fear, joy, sadness, surprise, and trust. Similar to the Bing lexicon, we classified and aggregated all words by NRC sentiment. However, initial inspection revealed that the terms “black” and “white” were strongly associated with negative and positive emotions, respectively. Because it was likely that the overproportional use of these words in our data set was due to ancestry testing–related topics, and to avoid a strong association of ethnicity with emotions, we reran the analysis without them.</p>
        <p>For the comment-level sentiment analysis, we used SentiStrength [<xref ref-type="bibr" rid="ref50">50</xref>], a Java-based sentiment tool optimized for short social web text in English such as Twitter tweets or YouTube comments. The tool reports 2 predefined and experience-based sentiments for each document (ie, comment). First, a negative sentiment ranging from –1 (not negative) to –5 (extremely negative) and a second, positive sentiment ranging from 1 (not positive) to 5 (extremely positive). When combining both, we obtained a total sentiment score between –4 and +4. After calculating the sentiment score for each comment, we performed several analyses regarding sentiment as well as media type and test purpose.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Ethics approval was not necessary for this study, as it did not directly involve human participants. All data used in this study (ie, videos and video comments) were publicly available on YouTube and accessible through the YouTube API at the time of retrieval. All results are only published in aggregated form, and single references are presented anonymously and without context to protect the privacy of the comments’ authors.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview of Video Contents and Comments</title>
        <p>We examined a total of 248 videos related to DTC genetic testing, collected on September 14, 2020, with a total of 30 videos from official company accounts (21 videos from 23andMe, 8 videos from Ancestry.com, and 1 video from MyHeritage). Based on the media type, these included 27 advertising-related videos, 14 documentaries, 16 interviews, 12 news, 174 user-generated videos, and 5 with other media types (mainly recordings of television shows such as The Late Show with Stephen Colbert or The Jim Jefferies Show/Comedy Central). Among the 248 videos, 194 videos address ancestry as a test purpose, 15 address trait testing, 9 address genetic predispositions, 19 address relationship testing, and 11 address other purposes (such as how to use a test kit or comparison/presentation of multiple genetic test purposes). In total, the videos had 724,574 comments on the day of video data aggregation. We collected the comments of the videos on January 3, 2021, focusing on the 500 most recent comments of each video (total number of comments=84,082). An overview of the video metadata, content, and comments is provided in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Overview of video metadata, content, and comments.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Video characteristic</td>
                <td>Value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Number (N)</td>
                <td>248</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Date of collection</td>
                <td>September 14, 2020</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Media type (n)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Advertising</td>
                <td>27</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Documentary</td>
                <td>14</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Interview</td>
                <td>16</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>News</td>
                <td>12</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>User-generated videos</td>
                <td>174</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td>5</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Test purpose addressed (n)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ancestry</td>
                <td>194</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Traits/characteristics</td>
                <td>15</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Genetic predisposition</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Relationship</td>
                <td>19</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td>11</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Upload date</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Oldest</td>
                <td>January 15, 2015</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Newest</td>
                <td>July 7, 2020</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>View count</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Minimum</td>
                <td>52,802</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Maximum</td>
                <td>20,453,890</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Average</td>
                <td>1,158,064</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Likes</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Minimum</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Maximum</td>
                <td>368,294</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Average</td>
                <td>22,114</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Dislikes</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Minimum</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Maximum</td>
                <td>10,277</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Average</td>
                <td>813</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Duration (minutes)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Minimum</td>
                <td>00:31</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Maximum</td>
                <td>34:23</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Average</td>
                <td>09:30</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Comments</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Minimum</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Maximum</td>
                <td>24,523</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Average</td>
                <td>2922</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Comment publication date</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Oldest</td>
                <td>March 29, 2017</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Newest</td>
                <td>January 2, 2021</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Topics of the DTC Genetic Testing Video Comments</title>
        <p>Word frequency analysis using the comments on DTC genetic testing–related videos provides valuable insights into the topics discussed by users. DNA (n=15,702), test (n=10,902), and people (n=9259) are by far the most frequent terms, thus indicating that users indeed primarily discuss DTC genetic testing in their comments. Additionally, we identified many words referring to ancestry testing such as ancestry (n=5015), african (n=6268), or american (n=6139). Moreover, words such as family (n=5252), dad (n=2932), or parents (n=2228) can be attributed to relationship tests. Overall, the 100 most frequent words resemble the test purposes identified from the videos themselves as well as a general excitement for DTC genetic testing videos (eg, video, n=4794; love, n=4751). <xref ref-type="table" rid="table2">Table 2</xref> provides an overview of the 20 most frequent words. Additionally, <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> provides a word cloud and overview of the 100 most frequent words.</p>
        <p>The bigram network of the comments provides a more fine-grained picture of the words used together often. Unlike the single word cloud, it allows us to see how multiple words are connected. Additionally, the arrows indicate in which order the words appear, whereas the shade of the edge represents the frequency of the word pair. Therefore, we can deduce possible topics discussed by users from the network.</p>
        <p>As shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, we identified 5 main topics within the network. The largest topic we identified revolves around ancestry testing (blue cluster). Although the most indicative bigram is “ancestry DNA” (n=679), most bigrams in this topic describe a specific heritage such as “native american” (n=3255), “north african” (n=831), or “middle eastern” (n=756), further substantiating that users largely discuss ancestry results of genetic testing in the comments. The second-largest topic deals with trait testing (green cluster) and holds bigrams such as “blonde/brown/red hair” (n=203/n=72/n=41), “skin color” (n=131), or “blue eyes” (n=285). The third topic entails bigrams related to health testing (yellow cluster). Typical bigrams include “insurance companies” (n=121), “genetic makeup” (n=76), and “23andme test” (n=72). The last topic related to genetic testing indicates relationship testing (red cluster). It includes bigrams such as “identical twins” (n=231), “half sister” (n=124), or “biological parents” (n=74). We also identified 1 topic not specific to DTC genetic testing but YouTube as a platform in general (gray cluster). The bigrams found in this topic are parts of video URLs, for example, “https youtu.be” (n=246) or “www.youtube.com watch” (n=201). This indicates that users often share videos in the comments sections of videos, possibly on related topics.</p>
        <p>Finally, we trained structural topic models, of which we selected the 50-topic model. <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the 20 most prevalent topics, including the 10 most important words for each topic of this model. The complete list of all 50 topics can be found in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. For a better overview of the topics discussed in the comments sections, we grouped these 20 topics into 6 categories, briefly described in the following:</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>List of the 20 most frequent words obtained from comment analysis.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="300"/>
            <col width="400"/>
            <thead>
              <tr valign="top">
                <td>Rank</td>
                <td>Word</td>
                <td>Frequency (n)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>dna</td>
                <td>15,702</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>test</td>
                <td>10,902</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>people</td>
                <td>9259</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>african</td>
                <td>6268</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>results</td>
                <td>6178</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>american</td>
                <td>6139</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>family</td>
                <td>5252</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>european</td>
                <td>5142</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>ancestry</td>
                <td>5015</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>video</td>
                <td>4794</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>love</td>
                <td>4751</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>native</td>
                <td>4665</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>white</td>
                <td>4489</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>black</td>
                <td>4203</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>lol</td>
                <td>3469</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>asian</td>
                <td>3276</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>irish</td>
                <td>3177</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>mixed</td>
                <td>2984</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>dad</td>
                <td>2932</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>father</td>
                <td>2782</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Bigram network of 2-word tokens found in the comments of direct-to-consumer genetic testing–related videos on YouTube with a minimum of 70 occurrences. Colored legends indicate topic attribution.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38749_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Top 20 topics and their 10 most indicative words from the 50-topic model. Colored legends indicate topic attribution.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38749_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>General Genetic Testing</title>
          <p>This topic group indicates a general interest in DTC genetic testing (eg, topics 16, 31, 49), entailing company names such as MyHeritage, AncestryDNA, or Ancestry.com and words of interest (eg, excited or expect). Moreover, topic 16 touches on the home collection (spit, tube) and financial (money) aspects of DTC genetic testing.</p>
        </sec>
        <sec>
          <title>Ancestry Testing</title>
          <p>In line with our previous findings, most topics are about the results of genetic ancestry testing. Topic 8 shows a general interest in ancestry testing by users. Topics 17, 26, 37, and 47 describe findings on heritage from a specific region, whereas topic 41 is about paternal and maternal ancestry. Additionally, topic 19 might indicate that users hope to find lost relatives through ancestry testing.</p>
        </sec>
        <sec>
          <title>Relationship Testing</title>
          <p>We also identified 3 topics about genetic relationship testing. Topics 34 and 48 deal with relationships between children such as identical twins, whereas topic 36 entails the aspects of adoption and genealogy (ie, searching for one's biological family).</p>
        </sec>
        <sec>
          <title>Health and Trait Testing</title>
          <p>Although less prevalent, health genetic testing and trait testing are also covered in the top 20 topics. Topic 44 focuses on health information and data, whereas topic 28 entails words on traits such as hair or eye color.</p>
        </sec>
        <sec>
          <title>Ethical Concerns</title>
          <p>The 50-topic model also reveals some topics not contained in our previous findings. Topic 32 touches on instances of racism signified through words such as black, racist, or mad. Given the ongoing and complex debate toward instances of racism in the United States and the majority of DTC genetic testing revolving around ancestry and heritage, this could explain why this topic was found in the comments of these videos. Moreover, topic 22 deals with users' concerns regarding genetic testing and the government, with words such as lie, ad, or crime.</p>
        </sec>
        <sec>
          <title>YouTube Video Reaction</title>
          <p>In contrast to the previous findings, topics 18, 27, and 43 do not directly relate to genetic testing but rather entail reactions to the videos on YouTube (eg, love, awesome, watching, video, or channel). Further, users seem interested in personal stories (eg, amazing, story, or reaction).</p>
        </sec>
      </sec>
      <sec>
        <title>Comparison of Topic Modeling Approaches and Identified Topics</title>
        <p>Although the bigram network and structural topic modeling use different approaches, the majority of the identified topics are present in both methods. Both approaches show strong indications of ancestry testing, relationship testing, trait testing, and health testing topics. Moreover, both methods led to the deduction of a YouTube or YouTube video–related topic. <xref ref-type="table" rid="table3">Table 3</xref> compares the topics covered by the bigram network and structural topic modeling and lists some of the most indicative bigrams and words for each method, respectively.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of identified topics using the bigram network and structural topic modeling.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="420"/>
            <col width="390"/>
            <thead>
              <tr valign="top">
                <td>Topic</td>
                <td>Bigram network</td>
                <td>Structural topic modeling</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>General genetic testing</td>
                <td>N/A<sup>a</sup></td>
                <td>Myheritage; ancestrydna; ancestrycom; excited; expect; spit; tube; money; genes; dna; genetic</td>
              </tr>
              <tr valign="top">
                <td>Ancestry testing</td>
                <td>Ancestry dna; native american; north african: middle eastern</td>
                <td>Ancestry; african; american; native; irish; german; french; father; parents; race; mexican</td>
              </tr>
              <tr valign="top">
                <td>Relationship testing</td>
                <td>Identical twins; half sister; biological parents</td>
                <td>Kids; cry; family; adopted; genealogy; lies</td>
              </tr>
              <tr valign="top">
                <td>Trait testing</td>
                <td>Blonde/brown/red hair; skin color; blue eyes</td>
                <td>Hair; eyes; blonde; blue; red</td>
              </tr>
              <tr valign="top">
                <td>Health testing</td>
                <td>Insurance companies; genetic makeup; 23andme test</td>
                <td>Companies; information; health; pay</td>
              </tr>
              <tr valign="top">
                <td>Ethical concerns</td>
                <td>N/A</td>
                <td>Black; racist; claim; government; clone; crime; evidence</td>
              </tr>
              <tr valign="top">
                <td>YouTube-related</td>
                <td>https youtu.be; www.youtube.com watch</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>YouTube video reaction</td>
                <td>N/A</td>
                <td>Love; awesome; watching; video; channel; amazing; story; reaction</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Sentiments of DTC Genetic Testing Video Comments</title>
        <p>Even though topic modeling can help unveil what users discuss in the comments sections, it does not provide insights into users' attitudes toward these topics. Therefore, conducting a Bing sentiment analysis can provide a first overview of the sentiment regarding words used in the comments sections. <xref rid="figure4" ref-type="fig">Figure 4</xref> shows the 20 most used words with negative and positive sentiments. The results show that the most used positive words are used significantly more often. In fact, the first negative word, funny (n=864), is only the seventh most used word overall in the sentiment list. Moreover, the positive word love (n=4751) is used overproportionally, having more than twice as many occurrences as the second most used word, beautiful (n=1953). However, when observing all positively and negatively classified occurrences, we can identify more negative word uses (n=38,734) than positive ones (n=35,897).</p>
        <p>Another type of sentiment analysis is the identification of emotions with the NRC lexicon. Our results show that the most frequent words representing positive emotions, namely anticipation, joy, surprise, and trust, have higher occurrences than the words expressing negative emotions, namely anger, fear, disgust, and sadness (see <xref rid="figure5" ref-type="fig">Figure 5</xref>). This finding is also supported by overall occurrences of positive word emotions (n=148,791) and negative word emotions (n=76,761). Love<italic>,</italic> the single most used word (n=4751), is associated with the emotion of joy, and the most frequent emotion is trust (n=54,814). In contrast, disgust (n=15,541) has the least word occurrences.</p>
        <p>The comment-level sentiment analysis provides insights into user attitudes as well as attitudes toward DTC genetic testing videos and their respective content (ie, test purpose and media type). Although the SentiStrength sentiment can vary on a scale of –4 to 4, the average sentiment score of all comments is 0.32, meaning slightly positive. This is also reflected by almost half of all the comments (n=36,804) having a neutral sentiment (ie, 0). Grouping comment sentiment by video shows that the lowest sentiment score per video comments section is –0.62, whereas the highest is 1.33. Overall, only 30 of the 248 inspected videos have a negative sentiment, indicating an overall positive attitude toward DTC genetic testing videos.</p>
        <p>When comparing comment sentiment regarding the test purpose of the videos, our results show that from the comments with a sentiment score of 4, 91.6% (230/251) are in the comments sections of videos about ancestry testing (most frequent test purpose), whereas for comments with a sentiment score of –4, ancestry testing videos only account for 67.9% (76/112). In contrast, only 1.6% (4/251) of the comments with a sentiment score of 4 are in the responses to a video dealing with relationship testing. However, this increases to 17% (19/112) for comments with a sentiment score of –4. As shown in <xref rid="figure6" ref-type="fig">Figure 6</xref> (left), videos with an ancestry test purpose seem to evoke more positive user comments, whereas this is the opposite for relationship test videos.</p>
        <p>The analysis of comment sentiment regarding media type unveils that user-generated videos account for the most significant number of positive comments with 91.6% (230/251) for a sentiment score of 4. On the contrary, for a sentiment score of –4, user-generated videos only account for 60.7% (68/112) of the comments. Consequently, as shown in <xref rid="figure6" ref-type="fig">Figure 6</xref> (right), user-generated videos tend to evoke the most positive attitude toward their video content. This is in contrast to the media types advertising, documentary, and interview; all of these show an increase in the number of comments with decreasing sentiment values. For example, the number of comments for the media type documentary increases from 2% (5/251) with a sentiment score of 4 to 15.2% (17/112) with a sentiment score of –4. Therefore, advertisements, documentaries, and interviews may evoke more negative responses than user-generated videos.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Bing sentiment by most frequent words for negative and positive sentiments.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38749_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>National Research Council Canada (NRC) sentiment by most frequent words for the emotions anger, anticipation, disgust, fear, joy, sadness, surprise, and trust.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38749_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Spreads for test purpose (left) and media type (right) by sentiment.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e38749_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our analysis of user comments on DTC genetic testing–related YouTube videos yields several valuable findings. The test purposes found in the videos largely resemble the most common genetic test purposes, with most videos talking about ancestry or relationship testing and fewer about trait and health testing. This finding is in line with previous research on YouTube videos related to DTC genetic testing [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref31">31</xref>] and social media in general [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Nonetheless, in contrast to our study, Yin et al [<xref ref-type="bibr" rid="ref27">27</xref>] found in their collected Reddit data set that relationship and health testing were more often mentioned than ancestry testing. Although Mittos et al [<xref ref-type="bibr" rid="ref18">18</xref>] do not report the same finding for their Reddit data set, this may indicate that users of different social media platforms have other interests regarding DTC genetic testing. Another possible explanation for this could be that platform suggestion algorithms differ and may hence propose distinct content to users depending on the platform. Thus, discourses on the respective platforms should be investigated individually before assuming DTC genetic testing–related findings to be true across multiple platforms.</p>
        <p>Moreover, most topics found with the bigram network and structural topic modeling can be attributed to common DTC genetic testing purposes. This indicates that user discourse revolves around the contents of the videos and DTC genetic testing. In line with previous research, we also identified topics dealing with general genetic testing and users' interest in and excitement for DTC genetic testing [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref51">51</xref>].</p>
        <p>Besides, research has also shown instances of racism regarding ancestry testing on Twitter [<xref ref-type="bibr" rid="ref18">18</xref>], which we also identified as a topic in the video comments. Even though it is unclear whether these comments relate directly to the content of the respective video or are in the replies to other comments, the identified topics largely revolve around racism and discrimination against African Americans and Native Americans. However, our results did not show any specific topics on the educational content of DTC genetic testing. Considering that consumers in the United States continue to use DTC ancestry tests to prove their “genetic purity” and discriminate against marginalized ethnic groups such as the aforementioned ones, especially on social media [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>], research has called for more educational content and scientific explanations about DTC genetic testing [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Despite finding some videos expressing concerns toward DTC genetic testing (eg, documentaries), the majority of the videos seem to fail to highlight the advantages as well as the disadvantages and risks of DTC genetic testing. Hence, the discussions in the comments section may also largely neglect these aspects.</p>
        <p>Sentiment analysis revealed that users have more negative attitudes toward the content of advertisements, news, or documentary videos compared to user-generated videos on DTC genetic testing. Although this finding could be explained through some media types being more thought-provoking (eg, documentaries covering disadvantages and risks of DTC genetic testing or news covering stories of genetic discrimination), another explanation might be that user-generated videos are often produced by single creators often trying to engage more with their YouTube community (eg, through specific content or active discussion in the comments sections) than, for example, a news broadcaster or DTC genetic testing service provider. Hence, this may result in a more positive user attitude. This assumption is further supported by our findings on YouTube-related and YouTube video reaction topics. On the one hand, these findings once again indicate that users discuss and respond to the content discussed in the respective videos, and on the other hand, they suggest a more complex discussion between content creators and their community (eg, through expressing enjoyment of content or including links to further YouTube videos). It should be noted that the revealed user attitudes on DTC genetic testing videos do not necessarily reflect user attitudes toward DTC genetic testing in general. However, as our topic modeling results suggest that user comments largely revolve around DTC genetic testing, it is likely that users’ attitudes toward DTC genetic testing videos also reflect their attitudes toward DTC genetic testing to some degree. This notion is further supported by the finding that videos discussing the disadvantages and risks of DTC genetic testing tend to have more negative user attitudes. Comparable results on user attitudes toward DTC genetic testing were also found for Twitter and related textual platforms [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref51">51</xref>], thereby strengthening this assumption.</p>
        <p>Similar to DTC genetic testing–related Reddit posts [<xref ref-type="bibr" rid="ref41">41</xref>], we found that user emotions toward DTC genetic testing videos expressed through the comments are mainly positive. The NRC sentiment and comment-level sentiment analyses also indicate a clear tendency toward a positive user attitude. This may be explained by the majority of videos being user-generated ones and aforementioned higher community engagement of content creators. Previous research on user sentiment toward Twitter tweets also shows a positive sentiment toward DTC genetic testing [<xref ref-type="bibr" rid="ref51">51</xref>]. However, Mittos et al [<xref ref-type="bibr" rid="ref18">18</xref>] found that most tweets only have a sentiment score of 0 or 1. In line with previous research [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref51">51</xref>], these less positive emotions and attitudes could indicate that although users are generally interested in DTC genetic testing, they still have reservations regarding this new technology. These reservations are mirrored in the results of the NRC sentiment analysis that highlighted fear as the most prominent negative attitude toward DTC genetic testing, whereas trust was the most prominent positive attitude. These reservations toward DTC genetic testing were also highlighted in prior research [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      </sec>
      <sec>
        <title>Implications for Research and Practice</title>
        <p>This study conveys several implications for research and practice. As for research, we contribute to the literature on user attitudes toward DTC genetic testing by investigating topics and opinions discussed about these genetic tests. We examined the 248 most viewed DTC genetic testing videos on YouTube in terms of their content (ie, test purpose, media type) and analyzed users' attitudes in the form of their comments. Further, we contribute to research regarding health information on social media by showing that YouTube comments provide valuable insights into user discourse on social media. This study suggests that video content and user comments are co-dependent and should therefore be investigated together. To this end, we provide new insights into the discourse on genetic testing on YouTube by showing that the discourse in the comments primarily revolves around the content of the videos. Our research indicates that the discourse on YouTube may differ from that on other social media platforms, and hence, a detailed and differentiated consideration of the different platforms may be necessary. We further contribute to knowledge regarding user behavior on social media by examining users' attitudes and emotions toward DTC genetic testing videos on YouTube.</p>
        <p>As for practice, our research offers important implications for DTC genetic testing service providers, content creators, and regulatory authorities regarding user attitudes, which may help adapt or improve genetic testing services, multimedia content, or regulations. Similar to the study of Lee et al [<xref ref-type="bibr" rid="ref21">21</xref>] involving Twitter, our identified topics indicate a lack of educational information about DTC genetic testing in YouTube videos. Further, sentiment analysis shows that users have more negative attitudes toward advertisements, news, or documentary videos and prefer user-generated content on DTC genetic testing. Hence, authorities could consider working with content creators to promote user education on DTC genetic testing. Finally, our topic modeling indicates instances of racism, especially regarding ancestry testing. Service providers and authorities should be aware of this and ensure genetic testing is not used for discrimination. Therefore, we suggest that it may be helpful to flag videos with high numbers of negative comments, including racism or anxiety, and provide further information regarding DTC genetic testing via banners or other visual cues, similar to those used on many platforms for content related to COVID-19 [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
      </sec>
      <sec>
        <title>Limitations and Future Research</title>
        <p>The limitations of this study are as follows. First, we only considered a limited number of videos and comments. Even though we attempted to include an appropriate sample by saturating the videos and comments using metrics such as views and number of comments, examining all the initially identified videos (n=1325) and comments could provide further insight, particularly concerning topic modeling and sentiment analysis. Second, we limited our YouTube API queries to the United States because the related DTC genetic testing market is the most evolved there. However, other regions with striving markets, such as Asia [<xref ref-type="bibr" rid="ref30">30</xref>], could offer further insights into user discourse and should therefore be investigated in future research. Third, because there is no way to determine the optimal number of topics [<xref ref-type="bibr" rid="ref42">42</xref>], we concentrated on models in increments of 5, selecting the 50-topic model. Although adjacent models tend to have many similar topics, it is possible that we did not identify a vital topic covered in a different solution. Future research could also attempt using different topic modeling methods and larger sample sizes to unveil a more fine-grained view of the topics discussed. Fourth, despite covering several sentiment lexicons, they may have been limited with respect to words associated with a sentiment (eg, Bing sentiment), and research should further investigate YouTube comment sentiment to gain deeper insight into user attitudes. It should also be pointed out that the generic association of words with sentiment values and emotions could omit or alter some findings in specific contexts such as DTC genetic testing. However, we tried to minimize this effect by using different approaches and content-specific modifications such as removing the words “white” and “black” from the NRC sentiment analysis, as these were used overproportionally. Finally, although this study investigated videos spanning from 2015 to 2020, we did not specifically focus on whether or how user discourse and attitudes might have changed over time. Because we only collected the 500 most recent comments, the majority of these can be dated to 2021. However, the DTC genetic testing market has and continues to evolve and change rapidly [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Future research should thus consider a temporal analysis of DTC genetic testing videos and comments to investigate if the market changes also affected user discourse and attitudes.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study examined 248 DTC genetic testing videos and 84,082 comments on YouTube to investigate user discourse. To this end, we employed topic modeling and identified 6 prevailing topics discussed among users, which largely revolve around the test purposes mentioned within those videos, such as ancestry or relationship testing. Further, we conducted sentiment analysis, showing that users have positive emotions, as indicated by the NRC sentiments of anticipation, joy, surprise, and trust<italic>,</italic> and a generally neutral-to-positive attitude toward DTC genetic testing expressed through words such as love, beautiful, pretty, and cool as well as a positive attitude toward DTC genetic testing–related videos on YouTube in general. Through this study, we show how users' attitudes toward DTC genetic testing can be determined by analyzing topics and opinions based on YouTube video comments. Our findings show that users are highly interested in DTC genetic testing and related social media content. Nonetheless, with this novel market still evolving, service providers, content providers, or regulatory authorities may need to adapt their services to users' interests and desires.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Direct-to-consumer genetic testing on social media.</p>
        <media xlink:href="infodemiology_v2i2e38749_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 61 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Data collection process.</p>
        <media xlink:href="infodemiology_v2i2e38749_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 97 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Structural topic modeling.</p>
        <media xlink:href="infodemiology_v2i2e38749_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 321 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Word frequency analysis results.</p>
        <media xlink:href="infodemiology_v2i2e38749_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 122 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DTC</term>
          <def>
            <p>direct-to-consumer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NRC</term>
          <def>
            <p>National Research Council Canada</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">RQ</term>
          <def>
            <p>research question</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The present contribution is supported by the Helmholtz Association under the joint research school “HIDSS4Health – Helmholtz Information and Data Science School for Health.” We acknowledge support by the KIT-Publication Fund of the Karlsruhe Institute of Technology.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Allyse</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Ferber</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sharp</surname>
              <given-names>RR</given-names>
            </name>
          </person-group>
          <article-title>Direct-to-Consumer Testing 2.0: emerging models of direct-to-consumer genetic testing</article-title>
          <source>Mayo Clin Proc</source>
          <year>2018</year>
          <month>01</month>
          <volume>93</volume>
          <issue>1</issue>
          <fpage>113</fpage>
          <lpage>120</lpage>
          <pub-id pub-id-type="doi">10.1016/j.mayocp.2017.11.001</pub-id>
          <pub-id pub-id-type="medline">29304915</pub-id>
          <pub-id pub-id-type="pii">S0025-6196(17)30772-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramos</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Weissman</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>The dawn of consumer-directed testing</article-title>
          <source>Am J Med Genet C Semin Med Genet</source>
          <year>2018</year>
          <month>03</month>
          <volume>178</volume>
          <issue>1</issue>
          <fpage>89</fpage>
          <lpage>97</lpage>
          <pub-id pub-id-type="doi">10.1002/ajmg.c.31603</pub-id>
          <pub-id pub-id-type="medline">29512889</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ugalmugle</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Swain</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Direct-to-consumer (DTC) genetic testing market size projections 2022 - 2028</article-title>
          <source>Global Market Insights</source>
          <year>2022</year>
          <month>04</month>
          <access-date>2022-09-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gminsights.com/industry-analysis/direct-to-consumer-dtc-genetic-testing-market">https://www.gminsights.com/industry-analysis/direct-to-consumer-dtc-genetic-testing-market</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haga</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Willard</surname>
              <given-names>HF</given-names>
            </name>
          </person-group>
          <article-title>Letting the genome out of the bottle</article-title>
          <source>N Engl J Med</source>
          <year>2008</year>
          <month>05</month>
          <volume>358</volume>
          <issue>20</issue>
          <fpage>2184</fpage>
          <pub-id pub-id-type="doi">10.1056/NEJMc086053</pub-id>
          <pub-id pub-id-type="medline">18480218</pub-id>
          <pub-id pub-id-type="pii">358/20/2184</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Javitt</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Byers</surname>
              <given-names>P</given-names>
            </name>
            <collab>American Society of Human Genetics Social Issues Committee</collab>
          </person-group>
          <article-title>ASHG statement* on direct-to-consumer genetic testing in the United States</article-title>
          <source>Obstet Gynecol</source>
          <year>2007</year>
          <month>12</month>
          <volume>110</volume>
          <issue>6</issue>
          <fpage>1392</fpage>
          <lpage>1395</lpage>
          <pub-id pub-id-type="doi">10.1097/01.AOG.0000292086.98514.8b</pub-id>
          <pub-id pub-id-type="medline">18055737</pub-id>
          <pub-id pub-id-type="pii">110/6/1392</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hunter</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Khoury</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Drazen</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Letting the genome out of the bottle--will we get our wish?</article-title>
          <source>N Engl J Med</source>
          <year>2008</year>
          <month>01</month>
          <volume>358</volume>
          <issue>2</issue>
          <fpage>105</fpage>
          <lpage>107</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp0708162</pub-id>
          <pub-id pub-id-type="medline">18184955</pub-id>
          <pub-id pub-id-type="pii">358/2/105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thiebes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Toussaint</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Ju</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ahn</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lyytinen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sunyaev</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Valuable genomes: taxonomy and archetypes of business models in direct-to-consumer genetic testing</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>01</month>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>e14890</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/1/e14890/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14890</pub-id>
          <pub-id pub-id-type="medline">31961329</pub-id>
          <pub-id pub-id-type="pii">v22i1e14890</pub-id>
          <pub-id pub-id-type="pmcid">PMC7001042</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saey</surname>
              <given-names>TH</given-names>
            </name>
          </person-group>
          <article-title>What I actually learned about my family after trying 5 DNA ancestry tests</article-title>
          <source>ScienceNews</source>
          <year>2018</year>
          <month>06</month>
          <access-date>2022-09-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sciencenews.org/article/family-dna-ancestry-tests-review-comparison">https://www.sciencenews.org/article/family-dna-ancestry-tests-review-comparison</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Briscoe</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ajunwa</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Gaddis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McCormick</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Evolving public views on the value of one's DNA and expectations for genomic database governance: results from a national survey</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <month>03</month>
          <volume>15</volume>
          <issue>3</issue>
          <fpage>e0229044</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0229044"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0229044</pub-id>
          <pub-id pub-id-type="medline">32160204</pub-id>
          <pub-id pub-id-type="pii">PONE-D-19-26198</pub-id>
          <pub-id pub-id-type="pmcid">PMC7065739</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Majumder</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Cook-Deegan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>McGuire</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <article-title>Beyond our borders? Public resistance to global genomic data sharing</article-title>
          <source>PLoS Biol</source>
          <year>2016</year>
          <month>11</month>
          <volume>14</volume>
          <issue>11</issue>
          <fpage>e2000206</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pbio.2000206"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pbio.2000206</pub-id>
          <pub-id pub-id-type="medline">27806054</pub-id>
          <pub-id pub-id-type="pii">pbio.2000206</pub-id>
          <pub-id pub-id-type="pmcid">PMC5091881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Majumder</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Guerrini</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>McGuire</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <article-title>Direct-to-consumer genetic testing: value and risk</article-title>
          <source>Annu Rev Med</source>
          <year>2021</year>
          <month>01</month>
          <volume>72</volume>
          <fpage>151</fpage>
          <lpage>166</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev-med-070119-114727</pub-id>
          <pub-id pub-id-type="medline">32735764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raz</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Niemiec</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Howard</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Sterckx</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cockbain</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Prainsack</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Transparency, consent and trust in the use of customers' data by an online genetic testing company: an exploratory survey among 23andMe users</article-title>
          <source>New Genet Soc</source>
          <year>2020</year>
          <month>05</month>
          <volume>39</volume>
          <issue>4</issue>
          <fpage>459</fpage>
          <lpage>482</lpage>
          <pub-id pub-id-type="doi">10.1080/14636778.2020.1755636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thiebes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lyytinen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sunyaev</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Sharing is about caring? Motivating and discouraging factors in sharing individual genomic data</article-title>
          <source>ICIS 2017 Proceedings</source>
          <year>2017</year>
          <conf-name>Thirty Eighth International Conference on Information Systems</conf-name>
          <conf-date>December 10-13, 2017</conf-date>
          <conf-loc>Seoul, South Korea</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aisel.aisnet.org/icis2017/IT-and-Healthcare/Presentations/15/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beyene</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Toussaint</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Thiebes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schlesner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brors</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sunyaev</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A scoping review of distributed ledger technology in genomics: thematic analysis and directions for future research</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2022</year>
          <month>07</month>
          <volume>29</volume>
          <issue>8</issue>
          <fpage>1433</fpage>
          <lpage>1444</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocac077</pub-id>
          <pub-id pub-id-type="medline">35595301</pub-id>
          <pub-id pub-id-type="pii">6589998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Toussaint</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Thiebes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt-Kraepelin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sunyaev</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Perceived fairness of direct-to-consumer genetic testing business models</article-title>
          <source>Electron Mark</source>
          <year>2022</year>
          <month>07</month>
          <fpage>1</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35874303"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s12525-022-00571-x</pub-id>
          <pub-id pub-id-type="medline">35874303</pub-id>
          <pub-id pub-id-type="pii">571</pub-id>
          <pub-id pub-id-type="pmcid">PMC9294841</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weidman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Aurite</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Grossklags</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>On sharing intentions, and personal and interdependent privacy considerations for genetic data: a vignette study</article-title>
          <source>IEEE/ACM Trans Comput Biol Bioinform</source>
          <year>2019</year>
          <month>07</month>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>1349</fpage>
          <lpage>1361</lpage>
          <pub-id pub-id-type="doi">10.1109/TCBB.2018.2854785</pub-id>
          <pub-id pub-id-type="medline">30004885</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boodman</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>White nationalists are flocking to genetic ancestry tests--with surprising results</article-title>
          <source>Scientific American</source>
          <year>2017</year>
          <month>08</month>
          <access-date>2022-09-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.scientificamerican.com/article/white-nationalists-are-flocking-to-genetic-ancestry-tests-with-surprising-results/">https://www.scientificamerican.com/article/white-nationalists-are-flocking-to-genetic-ancestry-tests-with -surprising-results/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mittos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zannettou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Blackburn</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cristofaro</surname>
              <given-names>ED</given-names>
            </name>
          </person-group>
          <article-title>Analyzing genetic testing discourse on the web through the lens of Twitter, Reddit, and 4chan</article-title>
          <source>ACM Trans Web</source>
          <year>2020</year>
          <month>09</month>
          <volume>14</volume>
          <issue>4</issue>
          <fpage>1</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1145/3404994</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Consumer health information seeking in social media: a literature review</article-title>
          <source>Health Info Libr J</source>
          <year>2017</year>
          <month>12</month>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>268</fpage>
          <lpage>283</lpage>
          <pub-id pub-id-type="doi">10.1111/hir.12192</pub-id>
          <pub-id pub-id-type="medline">29045011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Basch</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Fera</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Quinones</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>A content analysis of direct-to-consumer DNA testing on TikTok</article-title>
          <source>J Community Genet</source>
          <year>2021</year>
          <month>07</month>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>489</fpage>
          <lpage>492</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33860464"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s12687-021-00526-3</pub-id>
          <pub-id pub-id-type="medline">33860464</pub-id>
          <pub-id pub-id-type="pii">10.1007/s12687-021-00526-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC8241950</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Abitbol</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>VanDyke</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Science communication meets consumer relations: an analysis of Twitter use by 23andMe</article-title>
          <source>Sci Commun</source>
          <year>2020</year>
          <month>04</month>
          <volume>42</volume>
          <issue>2</issue>
          <fpage>244</fpage>
          <lpage>264</lpage>
          <pub-id pub-id-type="doi">10.1177/1075547020914906</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McLachlan</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>23 YouTube stats that matter to marketers in 2022</article-title>
          <source>Hootsuite</source>
          <year>2022</year>
          <month>02</month>
          <access-date>2022-09-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://blog.hootsuite.com/youtube-stats-marketers/">https://blog.hootsuite.com/youtube-stats-marketers/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Public opinions and concerns regarding the Canadian prime minister's daily COVID-19 briefing: longitudinal study of YouTube comments using machine learning techniques</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>02</month>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>e23957</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/2/e23957/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23957</pub-id>
          <pub-id pub-id-type="medline">33544690</pub-id>
          <pub-id pub-id-type="pii">v23i2e23957</pub-id>
          <pub-id pub-id-type="pmcid">PMC7903980</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eke</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bond</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Viewing trends and users' perceptions of the effect of sleep-aiding music on YouTube: quantification and thematic content analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>08</month>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>e15697</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/8/e15697/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/15697</pub-id>
          <pub-id pub-id-type="medline">32831182</pub-id>
          <pub-id pub-id-type="pii">v22i8e15697</pub-id>
          <pub-id pub-id-type="pmcid">PMC7477671</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mittos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Blackburn</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cristofaro</surname>
              <given-names>ED</given-names>
            </name>
          </person-group>
          <article-title>"23andMe confirms: I'm super white" - analyzing Twitter discourse on genetic testing</article-title>
          <source>ArXiv. Preprint posted online on Jan 30, 2018</source>
          <year>2021</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/pdf/1801.09946v2"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1801.09946</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>BL</given-names>
            </name>
          </person-group>
          <article-title>The FDA authorization of direct-to-consumer genetic testing for three  pathogenic variants: a Twitter analysis of the public's response</article-title>
          <source>JAMIA Open</source>
          <year>2019</year>
          <month>12</month>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>411</fpage>
          <lpage>415</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32025636"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooz037</pub-id>
          <pub-id pub-id-type="medline">32025636</pub-id>
          <pub-id pub-id-type="pii">ooz037</pub-id>
          <pub-id pub-id-type="pmcid">PMC6993995</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>Health and kinship matter: learning about direct-to-consumer genetic testing user experiences via online discussions</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <month>09</month>
          <volume>15</volume>
          <issue>9</issue>
          <fpage>e0238644</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0238644"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0238644</pub-id>
          <pub-id pub-id-type="medline">32898148</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-04299</pub-id>
          <pub-id pub-id-type="pmcid">PMC7478842</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Basch</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Hillyer</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Wahrman</surname>
              <given-names>MZ</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Basch</surname>
              <given-names>CE</given-names>
            </name>
          </person-group>
          <article-title>DNA testing information on YouTube: inadequate advice can mislead and harm the public</article-title>
          <source>J Genet Couns</source>
          <year>2021</year>
          <month>06</month>
          <volume>30</volume>
          <issue>3</issue>
          <fpage>785</fpage>
          <lpage>792</lpage>
          <pub-id pub-id-type="doi">10.1002/jgc4.1375</pub-id>
          <pub-id pub-id-type="medline">33382174</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Wyatt</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Autobiologies on YouTube: narratives of direct-to-consumer genetic testing</article-title>
          <source>New Genet Soc</source>
          <year>2014</year>
          <month>03</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>60</fpage>
          <lpage>78</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/24772003"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/14636778.2014.884456</pub-id>
          <pub-id pub-id-type="medline">24772003</pub-id>
          <pub-id pub-id-type="pmcid">PMC3996527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kragh-Furbo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tutton</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Spitting images: remaking saliva as a promissory substance</article-title>
          <source>New Genet Soc</source>
          <year>2017</year>
          <month>05</month>
          <volume>36</volume>
          <issue>2</issue>
          <fpage>159</fpage>
          <lpage>185</lpage>
          <pub-id pub-id-type="doi">10.1080/14636778.2017.1320943</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marcon</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Rachul</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Caulfield</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The consumer representation of DNA ancestry testing on YouTube</article-title>
          <source>New Genetics and Society</source>
          <year>2020</year>
          <month>08</month>
          <volume>40</volume>
          <issue>2</issue>
          <fpage>133</fpage>
          <lpage>154</lpage>
          <pub-id pub-id-type="doi">10.1080/14636778.2020.1799343</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Dmour</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Masa'deh</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Salman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Abuhashesh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Dmour</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Influence of social media platforms on public health protection against the COVID-19 pandemic via the mediating effects of public health awareness and behavioral changes: integrated model</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>08</month>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>e19996</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/8/e19996/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19996</pub-id>
          <pub-id pub-id-type="medline">32750004</pub-id>
          <pub-id pub-id-type="pii">v22i8e19996</pub-id>
          <pub-id pub-id-type="pmcid">PMC7439806</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>GG</given-names>
            </name>
            <name name-style="western">
              <surname>Glasgow</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>The delivery of public health interventions via the internet: actualizing their potential</article-title>
          <source>Annu Rev Public Health</source>
          <year>2009</year>
          <month>04</month>
          <volume>30</volume>
          <fpage>273</fpage>
          <lpage>292</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev.publhealth.031308.100235</pub-id>
          <pub-id pub-id-type="medline">19296777</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rezaallah</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zeilhofer</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Social media surveillance of multiple sclerosis medications used during pregnancy and breastfeeding: content analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>08</month>
          <volume>21</volume>
          <issue>8</issue>
          <fpage>e13003</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/8/e13003/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13003</pub-id>
          <pub-id pub-id-type="medline">31392963</pub-id>
          <pub-id pub-id-type="pii">v21i8e13003</pub-id>
          <pub-id pub-id-type="pmcid">PMC6702799</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Turk</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jilka</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cella</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Measuring attitudes towards mental health using social media: investigating stigma and trivialisation</article-title>
          <source>Soc Psychiatry Psychiatr Epidemiol</source>
          <year>2019</year>
          <month>01</month>
          <volume>54</volume>
          <issue>1</issue>
          <fpage>51</fpage>
          <lpage>58</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30069754"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00127-018-1571-5</pub-id>
          <pub-id pub-id-type="medline">30069754</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00127-018-1571-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6336755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vraga</surname>
              <given-names>EK</given-names>
            </name>
            <name name-style="western">
              <surname>Stefanidis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lamprianidis</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Croitoru</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Crooks</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Delamater</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Pfoser</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Radzikowski</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>KH</given-names>
            </name>
          </person-group>
          <article-title>Cancer and social media: a comparison of traffic about breast cancer, prostate cancer, and other reproductive cancers on Twitter and Instagram</article-title>
          <source>J Health Commun</source>
          <year>2018</year>
          <month>01</month>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>181</fpage>
          <lpage>189</lpage>
          <pub-id pub-id-type="doi">10.1080/10810730.2017.1421730</pub-id>
          <pub-id pub-id-type="medline">29313761</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Quan</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Examining accumulated emotional traits in suicide blogs with an emotion topic model</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2016</year>
          <month>09</month>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>1384</fpage>
          <lpage>1396</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2015.2459683</pub-id>
          <pub-id pub-id-type="medline">26208372</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jelodar</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Orji</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Deep sentiment classification and topic discovery on novel coronavirus or COVID-19 online discussions: NLP using LSTM recurrent neural network approach</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2020</year>
          <month>10</month>
          <volume>24</volume>
          <issue>10</issue>
          <fpage>2733</fpage>
          <lpage>2742</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2020.3001216</pub-id>
          <pub-id pub-id-type="medline">32750931</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A study on users' preference towards diabetes-related video clips on YouTube</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <month>02</month>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>43</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-1035-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-1035-1</pub-id>
          <pub-id pub-id-type="medline">32111208</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-1035-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7048121</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Venkatesaramani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Downey</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Vorobeychik</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A semantic cover approach for topic modeling</article-title>
          <source>Proceedings of the Eighth Joint Conference on Lexical and Computational Semantics (*SEM 2019))</source>
          <year>2019</year>
          <conf-name>Eighth Joint Conference on Lexical and Computational Semantics</conf-name>
          <conf-date>June 6-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>92</fpage>
          <lpage>102</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/s19-1011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Adam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Benlian</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>ECIS 2021 Research Papers</source>
          <year>2021</year>
          <conf-name>29th European Conference on Information Systems</conf-name>
          <conf-date>June 14-16, 2021</conf-date>
          <conf-loc>Marrakech, Morocco</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Silge</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Text Mining With R: A Tidy Approach</source>
          <year>2017</year>
          <publisher-loc>Sebastopol, CA</publisher-loc>
          <publisher-name>O'Reilly Media</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Tingley</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>stm: An R package for structural topic models</article-title>
          <source>J Stat Soft</source>
          <year>2019</year>
          <month>10</month>
          <volume>91</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v091.i02</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taddy</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>On estimation and selection for topic models</article-title>
          <source>Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics, PMLR 22</source>
          <year>2012</year>
          <conf-name>Fifteenth International Conference on Artificial Intelligence and Statistics</conf-name>
          <conf-date>April 21-23, 2012</conf-date>
          <conf-loc>La Palma, Canary Islands</conf-loc>
          <fpage>1184</fpage>
          <lpage>1193</lpage>
          <pub-id pub-id-type="doi">10.1109/inmic.2016.7840071</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mimno</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wallach</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Talley</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Leenders</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McCallum</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Optimizing semantic coherence in topic models</article-title>
          <source>Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2011</year>
          <conf-name>Conference on Empirical Methods in Natural Language Processing</conf-name>
          <conf-date>July 27-31, 2011</conf-date>
          <conf-loc>Edinburgh, Scotland</conf-loc>
          <fpage>262</fpage>
          <lpage>272</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis and opinion mining</article-title>
          <source>Synth Lect Hum Lang Technol</source>
          <year>2012</year>
          <month>05</month>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>167</lpage>
          <pub-id pub-id-type="doi">10.2200/s00416ed1v01y201204hlt016</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cattell</surname>
              <given-names>RB</given-names>
            </name>
          </person-group>
          <article-title>Sentiment or attitude? The core of a terminology problem in personality research</article-title>
          <source>J Pers</source>
          <year>1940</year>
          <month>09</month>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1467-6494.1940.tb02192.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Mining and summarizing customer reviews</article-title>
          <source>Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2004</year>
          <conf-name>KDD04: ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 22-25, 2004</conf-date>
          <conf-loc>Seattle, WA</conf-loc>
          <fpage>168</fpage>
          <lpage>177</lpage>
          <pub-id pub-id-type="doi">10.1145/1014052.1014073</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mohammad</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Turney</surname>
              <given-names>PD</given-names>
            </name>
          </person-group>
          <article-title>Crowdsourcing a word-emotion association lexicon</article-title>
          <source>Comput Intell</source>
          <year>2013</year>
          <month>08</month>
          <volume>29</volume>
          <issue>3</issue>
          <fpage>436</fpage>
          <lpage>465</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1467-8640.2012.00460.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thelwall</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Buckley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Paltoglou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kappas</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Sentiment strength detection in short informal text</article-title>
          <source>J Am Soc Inf Sci</source>
          <year>2010</year>
          <month>12</month>
          <volume>61</volume>
          <issue>12</issue>
          <fpage>2544</fpage>
          <lpage>2558</lpage>
          <pub-id pub-id-type="doi">10.1002/asi.21416</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chow-White</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Struve</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lusoli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lesage</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Saraf</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Oldring</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>‘Warren Buffet is my cousin’: shaping public understanding of big data biotechnology, direct-to-consumer genomics, and 23andMe on Twitter</article-title>
          <source>Inf Commun Soc</source>
          <year>2017</year>
          <month>02</month>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>448</fpage>
          <lpage>464</lpage>
          <pub-id pub-id-type="doi">10.1080/1369118x.2017.1285951</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nunziato</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>Misinformation mayhem: social media platforms' efforts to combat medical and political misinformation</article-title>
          <source>GW Law Faculty Publications &amp; Other Works</source>
          <year>2020</year>
          <month>08</month>
          <fpage>1</fpage>
          <lpage>68</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scholarship.law.gwu.edu/faculty_publications/1502/"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
