<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Infodemiology</journal-id>
      <journal-title>JMIR Infodemiology</journal-title>
      <issn pub-type="epub">2564-1891</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v2i2e35702</article-id>
      <article-id pub-id-type="pmid">37113452</article-id>
      <article-id pub-id-type="doi">10.2196/35702</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Implicit Incentives Among Reddit Users to Prioritize Attention Over Privacy and Reveal Their Faces When Discussing Direct-to-Consumer Genetic Test Results: Topic and Attention Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Meacham</surname>
            <given-names>Meredith</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Thompson</surname>
            <given-names>Riki</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Elbattah</surname>
            <given-names>Mahmoud</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ceron</surname>
            <given-names>Wilson</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Yongtai</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0279-3644</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Yin</surname>
            <given-names>Zhijun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3075-1337</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wan</surname>
            <given-names>Zhiyu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3752-5778</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Yan</surname>
            <given-names>Chao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6719-1388</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Xia</surname>
            <given-names>Weiyi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0406-4944</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Ni</surname>
            <given-names>Congning</given-names>
          </name>
          <degrees>ME</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6950-6948</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Clayton</surname>
            <given-names>Ellen Wright</given-names>
          </name>
          <degrees>MD, JD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0308-4110</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Vorobeychik</surname>
            <given-names>Yevgeniy</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2471-5345</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Kantarcioglu</surname>
            <given-names>Murat</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9795-9063</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Malin</surname>
            <given-names>Bradley A</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>Vanderbilt University Medical Center</institution>
            <addr-line>2525 West End Ave Room / Suite1030</addr-line>
            <addr-line>Nashville, TN, 37203</addr-line>
            <country>United States</country>
            <phone>1 615 343 9096</phone>
            <email>b.malin@vumc.org</email>
          </address>
          <xref rid="aff8" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3040-5175</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>Vanderbilt University</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>Vanderbilt University Medical Center</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Law, Vanderbilt University</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Pediatrics, Vanderbilt University Medical Center</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Health Policy</institution>
        <institution>Vanderbilt University Medical Center</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Computer Science and Engineering, Washington University in St. Louis</institution>
        <addr-line>St. Louis, MO</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Department of Computer Science, University of Texas at Dallas</institution>
        <addr-line>Richardson, TX</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Department of Biostatistics</institution>
        <institution>Vanderbilt University Medical Center</institution>
        <addr-line>Nashville, TN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Bradley A Malin <email>b.malin@vumc.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Jul-Dec</season>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>3</day>
        <month>8</month>
        <year>2022</year>
      </pub-date>
      <volume>2</volume>
      <issue>2</issue>
      <elocation-id>e35702</elocation-id>
      <history>
        <date date-type="received">
          <day>14</day>
          <month>12</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>24</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>22</day>
          <month>6</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Yongtai Liu, Zhijun Yin, Zhiyu Wan, Chao Yan, Weiyi Xia, Congning Ni, Ellen Wright Clayton, Yevgeniy Vorobeychik, Murat Kantarcioglu, Bradley A Malin. Originally published in JMIR Infodemiology (https://infodemiology.jmir.org), 03.08.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Infodemiology, is properly cited. The complete bibliographic information, a link to the original publication on https://infodemiology.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://infodemiology.jmir.org/2022/2/e35702" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>As direct-to-consumer genetic testing services have grown in popularity, the public has increasingly relied upon online forums to discuss and share their test results. Initially, users did so anonymously, but more recently, they have included face images when discussing their results. Various studies have shown that sharing images on social media tends to elicit more replies. However, users who do this forgo their privacy. When these images truthfully represent a user, they have the potential to disclose that user’s identity.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study investigates the face image sharing behavior of direct-to-consumer genetic testing users in an online environment to determine if there exists an association between face image sharing and the attention received from other users.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study focused on r/23andme, a subreddit dedicated to discussing direct-to-consumer genetic testing results and their implications. We applied natural language processing to infer the themes associated with posts that included a face image. We applied a regression analysis to characterize the association between the attention that a post received, in terms of the number of comments, the karma score (defined as the number of upvotes minus the number of downvotes), and whether the post contained a face image.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We collected over 15,000 posts from the r/23andme subreddit, published between 2012 and 2020. Face image posting began in late 2019 and grew rapidly, with over 800 individuals revealing their faces by early 2020. The topics in posts including a face were primarily about sharing, discussing ancestry composition, or sharing family reunion photos with relatives discovered via direct-to-consumer genetic testing. On average, posts including a face image received 60% (5/8) more comments and had karma scores 2.4 times higher than other posts.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Direct-to-consumer genetic testing consumers in the r/23andme subreddit are increasingly posting face images and testing reports on social platforms. The association between face image posting and a greater level of attention suggests that people are forgoing their privacy in exchange for attention from others. To mitigate this risk, platform organizers and moderators could inform users about the risk of posting face images in a direct, explicit manner to make it clear that their privacy may be compromised if personal images are shared.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>direct-to-consumer genetic testing</kwd>
        <kwd>topic modeling</kwd>
        <kwd>social media</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The cost of genome sequencing has steadily decreased over time [<xref ref-type="bibr" rid="ref1">1</xref>], which, in turn, has enabled the emergence of direct-to-consumer genetic testing (DTC-GT) services available to the public [<xref ref-type="bibr" rid="ref2">2</xref>]. DTC-GT allows consumers to learn about their genetic information without consulting with a health care provider [<xref ref-type="bibr" rid="ref3">3</xref>]. The number of people who have participated in DTC-GT has increased dramatically, growing from 12 million in January 2018 to 26 million in January 2019 [<xref ref-type="bibr" rid="ref4">4</xref>]. As of late 2021, the two largest DTC-GT companies, AncestryDNA and 23andme, had amassed over 20 million and 12 million clients, respectively [<xref ref-type="bibr" rid="ref5">5</xref>]. Recent studies indicate that people pursue DTC-GT for various reasons, primarily to learn about their ancestry and to discover or confirm kinship [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>As DTC-GT services have grown in popularity, consumers have increasingly relied upon online social platforms to discuss and share their test results (though not always the raw genome sequences) [<xref ref-type="bibr" rid="ref8">8</xref>]. One particularly notable platform is Reddit, an online content rating and discussion site where users can create different subreddits based on specific topics of interest. One of the most popular subreddits related to DTC-GT is r/23andme, with more than 81,400 subscribers as of May 2022. In r/23andme, users discuss a wide range of topics related to genetic testing, including testing services, test results, explanations and interpretations, and share stories about what happened after undergoing testing (eg, health-related decisions) [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>When r/23andme users share their results for discussion, instead of simply typing text, some users attach a screenshot of their DTC-GT result page (eg, the ancestry composition). Since Reddit is a virtual online community where users generally rely upon pseudonyms for communication, such screenshots of results typically do not contain a user’s real name. Therefore, even when users share and discuss their DNA test results, this subreddit has historically been a community with a culture of anonymity.</p>
      <p>However, in 2019, r/23andme users began attaching personal images to their posts. <xref rid="figure1" ref-type="fig">Figure 1</xref> presents an example of a screenshot of a user’s DTC-GT result page on the left, with the full-face image of this user on the right. This movement toward revealing one’s face directly affects personal privacy [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Although these posts used pseudonyms, face image posting in online environments constitutes a knowing decision to give up one’s privacy. Other users may utilize these face images to determine a user’s identity, relying, in part, on the rapid development and deployment of modern face recognition [<xref ref-type="bibr" rid="ref11">11</xref>] and identity detection systems [<xref ref-type="bibr" rid="ref12">12</xref>]. This is a concern, because identity disclosure may lead to various negative consequences for individuals, including identity theft [<xref ref-type="bibr" rid="ref13">13</xref>], discrimination [<xref ref-type="bibr" rid="ref14">14</xref>], and threats to personal safety [<xref ref-type="bibr" rid="ref15">15</xref>]. Since Reddit is a public platform, a user’s posts and face images are readily accessible, making an identity disclosure attack feasible with little cost [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>An example of a face image posted on the r/23andme subreddit. The report is shown together with a face image and testing results. The actual face and name are obscured for this publication; however, the data exist in the public domain.</p>
        </caption>
        <graphic xlink:href="infodemiology_v2i2e35702_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>Though users may be aware that revealing their face likely compromises their privacy, it is unclear why they choose to do so. Various investigations into behavioral psychology and economics show that some people waive their privacy rights in exchange for a service that they value [<xref ref-type="bibr" rid="ref17">17</xref>]. Thus, we hypothesize that r/23andme users may receive more attention by publishing more personal information. This is supported by findings on other social platforms. For instance, including photos with tweets on the Twitter platform can boost retweets by 35% [<xref ref-type="bibr" rid="ref18">18</xref>]. Instagram photos with faces are 38% more likely to receive likes and 32% more likely to receive comments [<xref ref-type="bibr" rid="ref19">19</xref>]. However, unlike Twitter or Instagram, the DTC-GT forum examined in this paper provides an anonymous environment for users to share and discuss sensitive personal genetic information. Thus, we sought to determine whether this forum supports the same privacy-service exchange hypothesis. To formally test our hypothesis, we investigated the following research questions: (1) What are the topics communicated in the natural language of posts with face images? (2) Is face image posting associated with the attention that a post receives?</p>
      <p>To answer these questions, we collected posts from the r/23andme subreddit and categorized them into three types: (1) posts with only text, (2) posts with face images, and (3) posts with images not containing a face. We next measured the temporal posting trends regarding the type of post. Then, we applied topic modeling to compare the primary topics associated with types of post. Finally, we performed a regression analysis to infer the association between the attention that a post received, in terms of votes, comments, and whether the post contained a face image.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethics Considerations</title>
        <p>This study involved only online posts that were openly accessible on Reddit. We have published the analysis results only in this paper, and any referenced posts or figures have been anonymized to protect the privacy of users.</p>
      </sec>
      <sec>
        <title>Overview</title>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> provides an overview of the research pipeline, which had two primary steps. The first step involved data collection and categorization, in which we collected the posts on the r/23andme subreddit and extracted those with a face image using face recognition software. The second step focused on analysis. Specifically, we first conducted an exploratory analysis to investigate the temporal posting trends and then leveraged topic modeling to infer the themes communicated in these posts. Finally, we performed a regression analysis to determine whether including a face image in a post was associated with the attention it received. In this study, we characterized attention by the number of comments and the karma score that a post received from other online users. The karma score on Reddit is defined as the number of upvotes minus the number of downvotes, indicating the popularity of a post.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>An overview of the research workflow for r/23andme post analysis. RQ: research question.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e35702_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Collection and Categorization</title>
        <p>To collect data from the r/23andme subreddit, we first gathered the IDs of all posts (ie, submissions) and comments using pushshift.io. We then applied the Python Reddit application programming interface wrapper package (version 6.3.1) to extract data from Reddit for each post ID. Specifically, we collected all posts and comments published on r/23andme between December 31, 2012, and January 31, 2020. Each collected post contained the following information: (1) author identifier, (2) post title, (3) post text body, (4) image URL (if there was an image in the post), (5) comments on the post, (6) post date, and (7) karma scores of the post and affiliated comments.</p>
        <p>We downloaded the images from posts containing an image URL and applied the face-recognition Python package (version 1.3.0) [<xref ref-type="bibr" rid="ref20">20</xref>] to classify images into (1) images with a face and (2) images without a face (ie, faceless images). To assess the accuracy of the face detection algorithm, we randomly selected 100 images from each group and manually examined the quality of classification. We found that 7 faceless images were classified as face images, indicating a false positive rate of 7% (7/100), while 2 face images were classified as faceless images, indicating a false negative rate of 2% (2/100). To achieve 100% precision, we manually reviewed all the images in the face group and relabeled the misclassified images. Due to a high true positive rate of 98% (98/100) and the large volume of the faceless images (3865), we did not perform a manual review step for the set of faceless images. As such, we categorized all of the collected posts into three types: (1) text-only posts; (2) posts with faceless images; and (3) posts with face images (such as the post in <xref rid="figure1" ref-type="fig">Figure 1</xref>), corresponding to 3 types of users.</p>
      </sec>
      <sec>
        <title>Data Analysis</title>
        <p>To describe face image posting behavior, we compared the face posts with the other two types of posts along three perspectives: (1) posting temporal trend, (2) post theme, and (3) the attention that a post received from other users, in terms of the number of comments and karma score.</p>
        <sec>
          <title>Topic Analysis</title>
          <p>To examine the thematic differences between the three post types, we applied topic modeling [<xref ref-type="bibr" rid="ref21">21</xref>] to the post title rather than the post body, because 41.1% (6404/15,596) of the posts had an empty text body. We first tokenized the data and removed all punctuation. Next, we lemmatized words into their base forms (eg, “walks” became “walk”) using the nltk Python package (version 3.3). We also replaced personal pronouns, such as <italic>“</italic>we,” “she,” and “they,” with the symbol “-PRON-,” and replaced numbers with the word “datum.” We then applied latent Dirichlet allocation (LDA) [<xref ref-type="bibr" rid="ref22">22</xref>], as implemented in the gensim Python package (version 3.8.1), to extract topics. Since LDA is an unsupervised learning model, we calibrated the number of topics for the optimal model based on the coherence score, which measures the pairwise word semantic similarity in a topic. To do so, we ran LDA models with 2 to 20 topics (using a step size of 2) on the set of lemmatized words and selected the topic number that achieved the highest coherence score. Finally, to demonstrate the quality of topic modeling, we used <italic>t</italic>-distributed stochastic neighbor embedding [<xref ref-type="bibr" rid="ref23">23</xref>] to cluster topics and displayed the results as a 2D representation (Figure S1 and Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        </sec>
        <sec>
          <title>Regression Analysis</title>
          <p>We investigated two types of associations. First, we considered the association between an image post (with and without a face) and the attention it received. Second, we considered the association between a face post and the attention it received. Since the number of comments and the karma score are nonnegative count variables, we applied a negative binomial regression to infer the association [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
          <p>Given that posts published earlier may be read by more readers and, thus, receive more comments and votes, we included the number of days a post had been published as a control variable. In addition, posts on different topics might receive different levels of attention. To reduce the effects of post topic, we incorporated the topic distribution of each post as an additional set of control variables. During model fitting, we dropped one topic (T<sub>4</sub>, see below) to address collinearity.</p>
          <p>Moreover, the activity level of users might affect the popularity of their posts. For example, posts from active users may receive more attention. To reduce the impact of user activity, we incorporated the number of posts and the number of comments of each user as an additional set of control variables. We utilized the implementation of negative binomial regression in the statsmodels Python package (version 0.11.1) to fit models for the karma score and the number of comments separately. We reported the features that achieved statistical significance at the <italic>P</italic>&lt;.001 level.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We collected 15,596 posts and 188,843 comments, which were published by 20,883 users between December 31, 2012, and January 31, 2020. Among the collected posts, 24.8% (3818/15,596) contained faceless images, while 5.4% (849/15,596) contained face images.</p>
      <sec>
        <title>Temporal Trends</title>
        <p>In <xref rid="figure3" ref-type="fig">Figure 3</xref>A, the graph depicts the temporal post trend on a monthly basis. It can be seen that the r/23andme subreddit exhibited relatively low activity until 2017, after which the number of monthly posts grew rapidly. Image posts (with and without a face) became popular after 2018. In <xref rid="figure3" ref-type="fig">Figure 3</xref>B, the graph shows the quarterly growth rate of the number of posts. The green dotted line indicates that, since 2019, the number of face posts exhibited a rapid increase, with a growth rate that surpassed the growth rate of all posts (represented by the blue line) and image posts (represented by the orange dashed line). Notably, we find that posting rates for all 3 types of post increased rapidly after major promotions by 23andme (eg, as part of Black Friday and Amazon Prime Day, advertising events held by Amazon Inc), which is consistent with the findings of Yin et al [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Smoothed temporal trends of three types of post, including the number of posts published per month (A) and quarterly growth rate of posts (B).</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e35702_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Attention to Posts</title>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref>A is a boxplot showing the number of comments per post for each post type. Face posts received the most comments, followed by posts not containing a face. The median number of comments for text-only posts was 6, but the median increased to 9 for posts with faceless images and 13 for posts with face images. <xref rid="figure4" ref-type="fig">Figure 4</xref>B is a boxplot showing the karma score by post type. Face posts received the highest median karma score (34), followed by posts with faceless images (median karma score 13). In contrast, the median karma score for text posts was only 4. One-way ANOVA tests for comments and karma scores indicated that the differences were statistically significant (<italic>P</italic>&lt;.001).</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Attention to three types of posts. The number of comments per post (A) and karma score per post (B). For presentation purposes, we removed posts with more than 80 comments or karma scores greater than 150 (3% of the data). The entire data set is provided in Figure S3 and Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e35702_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>User Activity</title>
        <p>We measured user activity in terms of the number of posts and comments. We found that 26.8% (2442/9114) of the users posted faceless images, while 8.5% (774/9114) posted face images. <xref rid="figure5" ref-type="fig">Figure 5</xref>A is a graph showing that the median number of posts for all 3 user types was 1. However, the third quartile of users who posted images (with or without a face) was 2. This suggests that, on average, authors who posted images (with or without a face) had more posts than authors who posted only text. The graph in <xref rid="figure5" ref-type="fig">Figure 5</xref>B depicts the number of comments posted for each user type. The users who posted face images wrote the most comments, with a median of 8. The median dropped to 6 for users who posted images not containing a face. For users who posted only text, the median number of comments was substantially lower, at 3. The results of 1-way ANOVA tests for the number of posts and the number of comments indicated that the differences were statistically significant (<italic>P</italic>&lt;.001).</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Number of posts per user (A) and number of comments per user (B) for users who posted (1) text only, (2) faceless images, and (3) face images. For presentation purposes, we removed users who published more than 10 posts or 50 comments, accounting for 4.4% of the total number of users. The entire data set is provided in Figure S3 and Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e35702_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Topic Analysis</title>
        <p><xref ref-type="table" rid="table1">Table 1</xref> shows the 10 inferred topics, their most relevant words, and the topic distribution (Figure S1 and Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> show details on the selection of the number of topics). The most relevant words were ranked based on their marginal distribution within a topic and displayed in descending order. The topic distribution was calculated as the percentage of posts belonging to the topic. Based on the relevant words and posts with the highest probability for each topic, we further grouped the 10 topics into three categories: (1) ancestry composition, (2) kinship and family discovery, and (3) general questions about genetic testing.</p>
        <p>Ancestry composition included 4 topics: T<sub>1</sub>, T<sub>2</sub>, T<sub>3</sub>, and T<sub>4</sub>. Posts in this category focused on the presentation and discussion of ancestry composition testing results. The 4 topics captured ancestry information, which communicate a user’s race, continental origin, and nationality. <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> shows example posts for each topic. Kinship finding and family discovery was communicated in T<sub>5</sub> and T<sub>6</sub>. Specifically, T<sub>5</sub> communicated the discovery of ancestors and distinct relatives, where it can be seen that terms like “family” and “history” were often used. In T<sub>6</sub>, words such as “find,” “dad,” and “siblings” show that this topic focused on findings relating to immediate family members. General questions related to DTC-GT were communicated in T<sub>7</sub>, T<sub>8</sub>, T<sub>9</sub>, and T<sub>10</sub>. Specifically, T<sub>7</sub> posts mainly asked about testing service progress. Words such as “time” and “wait” were highly weighted in this topic. T<sub>8</sub> posts were mainly comparisons of DTC-GT companies. There were mentions of companies, such as “MyHeritage,” “23andme,” and “WeGene.” T<sub>9</sub> covered posts about understanding, or questions about, the test result report. T<sub>10</sub> posts mainly discussed an upgrade to the genetic testing algorithm and the subsequent changes in testing results. Words such as “beta,” “update,” and “change” were highly weighted.</p>
        <p><xref rid="figure6" ref-type="fig">Figure 6</xref> presents the topic distribution for each type of post. The 1-way ANOVA tests showed that there were statistically significant differences between the means of the 3 post types for all 10 topics (<italic>P</italic>&lt;.001). Face posts were more likely to communicate ancestry composition (T<sub>1</sub>, T<sub>2</sub>, T<sub>3</sub>, and T<sub>4</sub>) and kinship and family discovery (T<sub>5</sub> and T<sub>6</sub>), while text posts were more likely to be about general questions (T<sub>7</sub>, T<sub>8</sub>, and T<sub>9</sub>). T<sub>10</sub>, a topic about an algorithm upgrade by 23andMe, shows that faceless image posts were more likely to communicate this topic, followed by text posts and then face image posts. This may be because users tended to post screenshots of the results before and after the algorithm upgrade for easy comparison.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The topics inferred from the r/23andme subreddit. The sample words are presented in descending order according to their relevance score within the topic.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="120"/>
            <col width="700"/>
            <col width="0"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Category</td>
                <td>Top-20 most relevant terms</td>
                <td colspan="2">Topic distribution</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Ancestry composition</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 1</td>
                <td colspan="2">European, -PRON-, result, Italian, Irish, British, surprise, Jewish, white, Chinese, broadly, bit, eastern, Ashkenazi, surprised, Scandinavian, give, eye, lot, surprising</td>
                <td>11.6%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 2</td>
                <td colspan="2">-PRON-, ancestry, German, guess, French, make, post, heritage, year, ethnicity, grandmother, common, grandparent, explain, mega-thread, feel, polish, Canadian, confused, wrong</td>
                <td>7.9%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 3</td>
                <td colspan="2">result, -PRON-, expect, finally, back, ancestor, interesting, pretty, AncestryDNA, bear, confidence, recent, location, Filipino, cool, guy, live, thought, Finnish, big</td>
                <td>9.1%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 4</td>
                <td colspan="2">American, Asian, African, native, Mexican, people, south, percentage, region, Neanderthal, gene, high, part, Spanish, unassigned, east, north, variant, trace, add</td>
                <td>10.6%</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Kinship and family discovery</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 5</td>
                <td colspan="2">-PRON-, family, today, close, tree, understand, worth, info, don, trait, history, link, happen, picture, excited, love, list, connection, inherit, risk</td>
                <td>6.5%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 6</td>
                <td colspan="2">-PRON-, find, dad, half, mom, father, cousin, mother, side, sister, adopt, brother, great, sibling, grandfather, full, grandma, biological, aunt, figure</td>
                <td>9.2%</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>General questions</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 7</td>
                <td colspan="2">kit, long, time, extraction, wait, timeline, genetic, day, receive, sample, analysis, week, testing, step, send, batch, fail, information, work, stick</td>
                <td>14.2%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 8</td>
                <td colspan="2">andme, ancestry, datum, health, raw, accurate, GEDmatch, MyHeritage, good, DNA, upload, compare, site, comparison, land, data, service, difference, WeGene, interpret</td>
                <td>11.0%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 9</td>
                <td colspan="2">DNA, test, relative, question, parent, report, share, -PRON-, phase, show, generation, relate, computation, person, unexpected, noise, mystery, relationship, account, number</td>
                <td>9.7%</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Topic 10</td>
                <td colspan="2">result, update, beta, haplogroup, match, maternal, change, paternal, chromosome, map, mixed, chip, Puerto Rican, Korean, lose, comment, late, original, Romanian</td>
                <td>10.2%</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <boxed-text id="box1" position="float">
          <title>Examples of posts for different topics.</title>
          <list list-type="bullet">
            <list-item>
              <p>“So I’m a lot less British than I thought, and a lot more Swiss” (Topic 1).</p>
            </list-item>
            <list-item>
              <p>“Any guesses on my friend’s ethnicity? He thinks he’s French/German, English, and maybe some Slavic” (Topic 2).</p>
            </list-item>
            <list-item>
              <p>“Born and raised in Manila, grew up thinking I was 100% Filipino. A bit shocked at my results” (Topic 3).</p>
            </list-item>
            <list-item>
              <p>“Found out I am East Asian and Native American but I have northern Asian and Native American so high” (Topic 4).</p>
            </list-item>
            <list-item>
              <p>“Found out I have about a dozen cousins I didn’t know about” (Topic 6).</p>
            </list-item>
            <list-item>
              <p>“My cousin did the DNA test and connected us to our great grandmother’s family!” (Topic 5).</p>
            </list-item>
            <list-item>
              <p>“On my account apparently my mom and her twin sister are both my moms” (Topic 6).</p>
            </list-item>
            <list-item>
              <p>“Is my kit moving slow? It took 2 weeks to be marked as “arrived” after tracking showed it was delivered” (Topic 7).</p>
            </list-item>
            <list-item>
              <p>“23andMe vs WEGENE – uploaded 23andMe raw data to WEGENE and here are the differences” (Topic 8).</p>
            </list-item>
            <list-item>
              <p>“What is a likely relationship if the shared DNA is 1610 centimorgans across 80 segments?” (Topic 9).</p>
            </list-item>
            <list-item>
              <p>“Beta update v5.2 should now be available to all earlier chip (pre-V5) users, when opting into the Beta program” (Topic 10).</p>
            </list-item>
          </list>
        </boxed-text>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>The prevalence of topics for each post type. The topics are arranged according to category. *<italic>P</italic>&lt;.001 according to a 1-way ANOVA with post-hoc Tukey honestly significant difference tests for pairwise differences between the 3 post types for the topic.</p>
          </caption>
          <graphic xlink:href="infodemiology_v2i2e35702_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Regression Analysis</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> summarizes the results of the negative binomial regressions. <italic>R</italic> for image→comment and <italic>R</italic> for image→score indicate the association between the number of comments, karma score, and whether the post contained images, either faceless or with a face. Image posting exhibited statistically significant positive associations with both dependent variables, suggesting that image posts received more attention than text-only posts.</p>
        <p>With respect to the <italic>R</italic> for face→comment and <italic>R</italic> for face→score tests, we selected 4717 image posts and assessed the association between the number of comments, karma score, and whether the image contained a face. Face image posting exhibited statistically significant positive associations with both dependent variables, which indicates that face posts received more attention than faceless posts. Comparing the <italic>R</italic> for image→comment and <italic>R</italic> for face→comment tests showed that posting a face image achieved a more positive impact on receiving comments. Comparing the <italic>R</italic> for image→score and <italic>R</italic> for face→score tests showed a similar result.</p>
        <p>In addition, there were two notable findings with respect to the control variables. First, the log-transformed number of published days exhibited a negative association in the <italic>R</italic> for image→comment and <italic>R</italic> for image→score tests (<italic>β</italic>=–.09 for image→comment, <italic>β</italic>=–.26 for image→score, <italic>P</italic>&lt;.001). Second, T<sub>8</sub> (the DTC-GT company comparison) had a negative association in all 4 tests (<italic>P</italic>&lt;.001 for image→comment and face→comment, <italic>P</italic>=.003 for image→score, and <italic>P</italic>=.013 for face→score), while topic T<sub>7</sub> (asking about testing service progress) showed a negative association in <italic>R</italic> for image→score, <italic>R</italic> for face→score, and <italic>R</italic> for face→comment tests (<italic>P</italic>&lt;.001 for image→score, <italic>P</italic>=.003 for face→score, and <italic>P</italic>=.04 for face→comment). The negative association between topics T<sub>7</sub>, T<sub>8</sub>, and face posting reinforce our previous finding that the topics in posts including a face were less likely to correspond to a general question about DTC-GT.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Results of the regression analysis relating post type to comments and karma score. All associations were statistically significant (<italic>P</italic>&lt;.001).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="220"/>
            <col width="230"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>Negative binomial regression</td>
                <td>Dependent variable</td>
                <td>Independent variable</td>
                <td>
                  <italic>β</italic>
                </td>
                <td>Z</td>
                <td>SD</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td><italic>R</italic> for image→comment</td>
                <td>Number of comments</td>
                <td>Posting image</td>
                <td>.152</td>
                <td>6.41</td>
                <td>0.024</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td><italic>R</italic> for image→score</td>
                <td>Karma score</td>
                <td>Posting image</td>
                <td>.618</td>
                <td>12.35</td>
                <td>0.050</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td><italic>R</italic> for face→comment</td>
                <td>Number of comments</td>
                <td>Posting face image</td>
                <td>.451</td>
                <td>10.21</td>
                <td>0.044</td>
                <td>&lt;.001</td>
              </tr>
              <tr valign="top">
                <td><italic>R</italic> for face→score</td>
                <td>Karma score</td>
                <td>Posting face image</td>
                <td>.760</td>
                <td>9.64</td>
                <td>0.079</td>
                <td>&lt;.001</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This investigation made several notable findings. First, consistent with previous studies on other social platforms [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], we observed that posts with face images in the r/23andme subreddit received more attention than other posts. It is possible that the increase in attention drove the disclosure of personal information in this online environment. However, this is only a conjecture, as our investigation was not designed to be a causal analysis. Regardless of the motivation for face image posting, it is evident that this behavior has rapidly grown within this subreddit.</p>
        <p>Second, the 10 inferred topics from the titles of r/23andme posts appeared to fall into three categories. Posts in the first category, which covered 4 out of 10 topics, focused on discussing users’ ancestry composition. Notably, the topics in this category were associated with a higher rate of image and face image posting. It was further observed that users invoked their face images as proof (or counterexamples) of the genetic testing results. Posts about kinship and family member discovery exhibited a moderate rate of face image sharing. When inspecting posts in this category, posts such as “finally find my half-sister,” with a group photo of a reunion attached, were more prevalent than in other categories. Finally, posts asking general questions about genetic testing, which focused on comparisons between DTC-GT companies, the progress of testing result delivery, and upgrades to testing algorithms, exhibited the lowest rate of image sharing.</p>
        <p>Third, counter to our expectation, we found that the number of days a post was published was negatively associated with a post’s attention. One possible explanation for this result is that Reddit archives posts older than 6 months and no longer allows commenting on them. Thus, the number of comments and votes was limited for earlier posts. We further noticed that the topic related to general questions was negatively correlated with attention to a post.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>Natural language processing techniques have been applied to various health care applications [<xref ref-type="bibr" rid="ref25">25</xref>]. Considering health care–related social media studies as an example, Liu et al [<xref ref-type="bibr" rid="ref26">26</xref>] analyzed the association between weight loss progress and Reddit users’ online interactions; Klein et al [<xref ref-type="bibr" rid="ref27">27</xref>] relied upon Twitter data to identify potential cases of COVID-19 in the United States; and Ni et al [<xref ref-type="bibr" rid="ref28">28</xref>] compared the attitudes of users of 4 different social platforms toward the “gene-edited babies” event. For DTC-GT, most investigations have focused on consumer motivations [<xref ref-type="bibr" rid="ref29">29</xref>], health implications [<xref ref-type="bibr" rid="ref30">30</xref>], and ethical implications [<xref ref-type="bibr" rid="ref31">31</xref>], with only a handful considering the disclosure of test reports over social platforms [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. Most previous studies that used social media data focused solely on mining knowledge from text. In this study, by taking image posting into consideration, we assess the behavior of personal image sharing on this DTC-GT forum.</p>
        <p>This paper analyzes the association between face image sharing and attention paid to posts in an online setting; this setting may incentivize users to sacrifice their privacy in exchange for the benefit of a social response. This observation, however, does not imply that attention is undesirable in all cases, as several studies have shown that social engagement is beneficial to an individual’s physical and mental health. For instance, in a large online breast cancer forum, Yin et al [<xref ref-type="bibr" rid="ref34">34</xref>] found that the volume of online interchange was positively associated with patient treatment adherence. Pan et al [<xref ref-type="bibr" rid="ref35">35</xref>] found that receiving replies could benefit online participants in depression forums. Naslund et al [<xref ref-type="bibr" rid="ref36">36</xref>] analyzed the benefits and risks of using social media as a potentially viable platform for offering support intervention to persons with mental disorders. Thus, the perceived benefits an individual receives from a service typically outweigh the perceived privacy risks in the near term. Nevertheless, given that privacy concerns tend to be understood only later on [<xref ref-type="bibr" rid="ref37">37</xref>], Reddit may wish to consider warning users about the potential negative consequences of their actions.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Despite our findings, there are certain limitations to this work, which we believe serve as opportunities for future research. First, the face recognition package had an estimated 2% false negative rate, which means that approximately 76 of the 3865 face images (2%) were likely wrongly labeled as faceless images. These misclassified images might have influenced the accuracy of our findings, although not their overall direction. Second, most topics inferred from topic modeling were interpretable and intuitive, but topic T<sub>10</sub> was difficult to interpret. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, sample words for T<sub>10</sub> conveyed different kinds of information: “Puerto Rican” and “Korean” are related to ancestry composition, whereas “late” and “lost” are evidence of asking about delivery progress. In this respect, newer topic modeling techniques [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>] or language model–based topic modeling (eg, top2vec [<xref ref-type="bibr" rid="ref41">41</xref>] and BERTopic [<xref ref-type="bibr" rid="ref42">42</xref>]) may provide better insights into the semantics of posts on social platforms. Importantly, however, the quality of individual topics had little effect on our main conclusion, since the regression analysis (using the topic distribution as control variable; <xref ref-type="table" rid="table2">Table 2</xref>) and ANOVA test (without topic distribution; <xref rid="figure4" ref-type="fig">Figure 4</xref>) yielded the same finding—a statistically significant association between face image sharing on r/23andme and user engagement.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>DTC-GT users are increasingly posting full-face images with their DTC-GT results on social platforms. In this study, we investigated the trend in this behavior in the r/23andme subreddit to obtain insight into potential underlying motivations. Our findings show that such behavior began in September 2019 and experienced rapid growth, with over 849 face-revealing posts by early 2020. Furthermore, our study suggests that posts including a face received, on average, 60% (5/8) more comments and 2.4 times higher karma scores than other posts. Posts that included face images were primarily about sharing and discussing ancestry composition and sharing family reunion photos with relatives discovered via DTC-GT. These findings verify our hypothesis that posting a personal image is associated with receiving more online attention, which is consistent with previous findings that people appear to be willing to give up their privacy (ie, their personal images) in exchange for a benefit (ie, attention from others). Based on this analysis, platform organizers and moderators might inform users about the risk of posting face images in a direct, explicit manner and make it clear that users’ privacy may be compromised if personal images are disclosed.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary materials.</p>
        <media xlink:href="infodemiology_v2i2e35702_app1.docx" xlink:title="DOCX File , 386 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">DTC-GT</term>
          <def>
            <p>direct-to-consumer genetic testing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>YL, ZY, ZW, and CY proposed the research idea, which was finalized by BAM. YL and CN collected the data. YL and ZY designed and conducted the experiments. BAM and EWC provided advice on the data analysis. YL drafted the manuscript. EWC, ZY, BAM, YV, MK, and WX edited the final manuscript. All authors reviewed the final manuscript. This research was sponsored in part by the National Institutes of Health (grant RM1-HG009034, grant R01-HG006844, and grant U2COD023196).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>The Cost of Sequencing a Human Genome</article-title>
          <source>National Human Genome Research Institute</source>
          <access-date>2022-07-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.genome.gov/about-genomics/fact-sheets/Sequencing-Human-Genome-cost">https://www.genome.gov/about-genomics/fact-sheets/Sequencing-Human-Genome-cost</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bellcross</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Meaney-Delman</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Direct-to-consumer personal genome testing and cancer risk prediction</article-title>
          <source>Cancer J</source>
          <year>2012</year>
          <volume>18</volume>
          <issue>4</issue>
          <fpage>293</fpage>
          <lpage>302</lpage>
          <pub-id pub-id-type="doi">10.1097/PPO.0b013e3182610e38</pub-id>
          <pub-id pub-id-type="medline">22846729</pub-id>
          <pub-id pub-id-type="pii">00130404-201207000-00003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <article-title>What is direct-to-consumer genetic testing?</article-title>
          <source>MedlinePlus</source>
          <access-date>2022-07-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ghr.nlm.nih.gov/primer/dtcgenetictesting/directtoconsumer">https://ghr.nlm.nih.gov/primer/dtcgenetictesting/directtoconsumer</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Regalado</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>More than 26 million people have taken an at-home ancestry test</article-title>
          <source>MIT Technology Review</source>
          <access-date>2020-03-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.technologyreview.com/s/612880/more-than-26-million-people-have-taken-an-at-home-ancestry-test/">https://www.technologyreview.com/s/612880/more-than-26-million-people-have-taken-an-at-home-ancestry-test/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>23andMe vs AncestryDNA</article-title>
          <source>SmarterHobby</source>
          <access-date>2021-11-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.smarterhobby.com/genealogy/23andme-vs-ancestry-dna/">https://www.smarterhobby.com/genealogy/23andme-vs-ancestry-dna/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruhl</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Hazel</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>Public Attitudes Toward Direct to Consumer Genetic Testing</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2019</year>
          <volume>2019</volume>
          <fpage>774</fpage>
          <lpage>783</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32308873"/>
          </comment>
          <pub-id pub-id-type="medline">32308873</pub-id>
          <pub-id pub-id-type="pmcid">PMC7153088</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haeusermann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Greshake</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Blasimme</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Irdam</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vayena</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Open sharing of genomic data: Who does it and why?</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <month>5</month>
          <day>9</day>
          <volume>12</volume>
          <issue>5</issue>
          <fpage>e0177158</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0177158"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0177158</pub-id>
          <pub-id pub-id-type="medline">28486511</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-51274</pub-id>
          <pub-id pub-id-type="pmcid">PMC5423632</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>Health and kinship matter: Learning about direct-to-consumer genetic testing user experiences via online discussions</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <month>9</month>
          <day>8</day>
          <volume>15</volume>
          <issue>9</issue>
          <fpage>e0238644</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0238644"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0238644</pub-id>
          <pub-id pub-id-type="medline">32898148</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-04299</pub-id>
          <pub-id pub-id-type="pmcid">PMC7478842</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stavrou</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Privacy risk assessment on online photos</article-title>
          <year>2008</year>
          <conf-name>International Symposium on Recent Advances in Intrusion Detection</conf-name>
          <conf-date>Nov 2, 2015</conf-date>
          <conf-loc>Berlin, Heidelberg</conf-loc>
          <fpage>427</fpage>
          <lpage>447</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-540-88313-5_3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Venkatesaramani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Vorobeychik</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Re-identification of individuals in genomic datasets using public face images</article-title>
          <source>Sci Adv</source>
          <year>2021</year>
          <month>11</month>
          <day>19</day>
          <volume>7</volume>
          <issue>47</issue>
          <fpage>eabg3296</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https:///www.science.org/doi/10.1126/sciadv.abg3296?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/sciadv.abg3296</pub-id>
          <pub-id pub-id-type="medline">34788101</pub-id>
          <pub-id pub-id-type="pmcid">PMC8597988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Masi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hassner</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Natarajan</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Deep Face Recognition: A Survey</article-title>
          <year>2018</year>
          <conf-name>31st SIBGRAPI Conference on Graphics, Patterns and Images</conf-name>
          <conf-date>Oct 29-Nov 1, 2018</conf-date>
          <conf-loc>Parana, Brazil</conf-loc>
          <fpage>471</fpage>
          <lpage>478</lpage>
          <pub-id pub-id-type="doi">10.1109/sibgrapi.2018.00067</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bäuml</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tapaswi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stiefelhagen</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised Learning with Constraints for Person Identification in Multimedia Data</article-title>
          <year>2013</year>
          <conf-name>IEEE Conference on Computer Vision and Pattern Recognition</conf-name>
          <conf-date>Jun 23-28, 2013</conf-date>
          <conf-loc>Portland, OR</conf-loc>
          <fpage>3602</fpage>
          <lpage>3609</lpage>
          <pub-id pub-id-type="doi">10.1109/cvpr.2013.462</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Irshad</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Soomro</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Identity theft and social media</article-title>
          <source>Int J Comp Sci Net Sec</source>
          <year>2018</year>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>43</fpage>
          <lpage>55</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://paper.ijcsns.org/07_book/201801/20180106.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Acquisti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fong</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>An Experiment in Hiring Discrimination via Online Social Networks</article-title>
          <source>Manage Sci</source>
          <year>2020</year>
          <month>03</month>
          <volume>66</volume>
          <issue>3</issue>
          <fpage>1005</fpage>
          <lpage>1024</lpage>
          <pub-id pub-id-type="doi">10.1287/mnsc.2018.3269</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nosko</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Molema</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>All about me: Disclosure in online social networking profiles: The case of FACEBOOK</article-title>
          <source>Comput Hum Behav</source>
          <year>2010</year>
          <month>5</month>
          <volume>26</volume>
          <issue>3</issue>
          <fpage>406</fpage>
          <lpage>418</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2009.11.012</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Vorobeychik</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Kantarcioglu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ganta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Heatherly</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>A game theoretic framework for analyzing re-identification risk</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <month>3</month>
          <day>25</day>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>e0120592</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0120592"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0120592</pub-id>
          <pub-id pub-id-type="medline">25807380</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-47977</pub-id>
          <pub-id pub-id-type="pmcid">PMC4373733</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Acquisti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brandimarte</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Loewenstein</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Privacy and human behavior in the age of information</article-title>
          <source>Science</source>
          <year>2015</year>
          <month>01</month>
          <day>30</day>
          <volume>347</volume>
          <issue>6221</issue>
          <fpage>509</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1126/science.aaa1465</pub-id>
          <pub-id pub-id-type="medline">25635091</pub-id>
          <pub-id pub-id-type="pii">347/6221/509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maranga</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Social Photos Generate More Engagement: New Research Social Media Marketing</article-title>
          <source>Social Media Examiner</source>
          <access-date>2020-03-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.socialmediaexaminer.com/photos-generate-engagement-research/">https://www.socialmediaexaminer.com/photos-generate-engagement-research/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bakhshi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shamma</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Faces Engage Us: Photos with Faces Attract More Likes and Comments on Instagram</article-title>
          <year>2014</year>
          <conf-name>SIGCHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>Apr 26-May 1, 2014</conf-date>
          <conf-loc>Toronto, ON</conf-loc>
          <publisher-name>Association for Computing Machiner</publisher-name>
          <fpage>965</fpage>
          <lpage>974</lpage>
          <pub-id pub-id-type="doi">10.1145/2556288.2557403</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>ageitgey/face_recognition: The world's simplest facial recognition api for Python and the command line</article-title>
          <source>GitHub</source>
          <access-date>2020-03-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/ageitgey/face_recognition">https://github.com/ageitgey/face_recognition</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mohr</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Bogdanov</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Introduction—Topic models: What they are and why they matter</article-title>
          <source>Poetics</source>
          <year>2013</year>
          <month>12</month>
          <volume>41</volume>
          <issue>6</issue>
          <fpage>545</fpage>
          <lpage>569</lpage>
          <pub-id pub-id-type="doi">10.1016/j.poetic.2013.10.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Latent Dirichlet Allocation</article-title>
          <source>J Mach Learn Res</source>
          <year>2003</year>
          <volume>3</volume>
          <fpage>1022</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/pdf/10.5555/944919.944937"/>
          </comment>
          <pub-id pub-id-type="doi">10.5555/944919.944937</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van der Maaten</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Visualizing Data using t-SNE</article-title>
          <source>J Mach Learn Res</source>
          <year>2008</year>
          <volume>9</volume>
          <fpage>2579</fpage>
          <lpage>2605</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ver Hoef</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Boveng</surname>
              <given-names>PL</given-names>
            </name>
          </person-group>
          <article-title>Quasi-Poisson vs. negative binomial regression: how should we model overdispersed count data?</article-title>
          <source>Ecology</source>
          <year>2007</year>
          <month>11</month>
          <volume>88</volume>
          <issue>11</issue>
          <fpage>2766</fpage>
          <lpage>72</lpage>
          <pub-id pub-id-type="doi">10.1890/07-0043.1</pub-id>
          <pub-id pub-id-type="medline">18051645</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elbattah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Arnaud</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gignon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dequen</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The Role of Text Analytics in Healthcare: A Review of Recent Developments and Applications</article-title>
          <year>2021</year>
          <conf-name>BIOSTEC 2021: 14th International Joint Conference on Biomedical Engineering Systems and Technologies</conf-name>
          <conf-date>Feb 11-13, 2021</conf-date>
          <conf-loc>Vienna, Austria</conf-loc>
          <fpage>825</fpage>
          <lpage>832</lpage>
          <pub-id pub-id-type="doi">10.5220/0010414508250832</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Understanding Weight Loss via Online Discussions: Content Analysis of Reddit Posts Using Topic Modeling and Word Clustering Techniques</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>08</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e13745</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e13745/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13745</pub-id>
          <pub-id pub-id-type="medline">32510460</pub-id>
          <pub-id pub-id-type="pii">v22i6e13745</pub-id>
          <pub-id pub-id-type="pmcid">PMC7308899</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Flores Amaro</surname>
              <given-names>Jesus Ivan</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez Hernandez</surname>
              <given-names>Graciela</given-names>
            </name>
          </person-group>
          <article-title>Toward Using Twitter for Tracking COVID-19: A Natural Language Processing Pipeline and Exploratory Data Set</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>01</month>
          <day>22</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e25314</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e25314/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25314</pub-id>
          <pub-id pub-id-type="medline">33449904</pub-id>
          <pub-id pub-id-type="pii">v23i1e25314</pub-id>
          <pub-id pub-id-type="pmcid">PMC7834613</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>The Public Perception of the #GeneEditedBabies Event Across Multiple Social Media Platforms: Observational Study</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <month>03</month>
          <day>11</day>
          <volume>24</volume>
          <issue>3</issue>
          <fpage>e31687</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/3/e31687/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/31687</pub-id>
          <pub-id pub-id-type="medline">35275077</pub-id>
          <pub-id pub-id-type="pii">v24i3e31687</pub-id>
          <pub-id pub-id-type="pmcid">PMC8957000</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Gornick</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Carere</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Uhlmann</surname>
              <given-names>WR</given-names>
            </name>
            <name name-style="western">
              <surname>Ruffin</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>RC</given-names>
            </name>
          </person-group>
          <article-title>Direct-to-Consumer Genetic Testing: User Motivations, Decision Making, and Perceived Utility of Results</article-title>
          <source>Public Health Genomics</source>
          <year>2017</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>36</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.karger.com?DOI=10.1159/000455006"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000455006</pub-id>
          <pub-id pub-id-type="medline">28068660</pub-id>
          <pub-id pub-id-type="pii">000455006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>European Society of Human Genetics</collab>
          </person-group>
          <article-title>Statement of the ESHG on direct-to-consumer genetic testing for health-related purposes</article-title>
          <source>Eur J Hum Genet</source>
          <year>2010</year>
          <month>12</month>
          <day>25</day>
          <volume>18</volume>
          <issue>12</issue>
          <fpage>1271</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20736974"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/ejhg.2010.129</pub-id>
          <pub-id pub-id-type="medline">20736974</pub-id>
          <pub-id pub-id-type="pii">ejhg2010129</pub-id>
          <pub-id pub-id-type="pmcid">PMC3002858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Halverson</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Sathe</surname>
              <given-names>NA</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>BA</given-names>
            </name>
          </person-group>
          <article-title>A systematic literature review of individuals' perspectives on privacy and genetic information in the United States</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <month>10</month>
          <day>31</day>
          <volume>13</volume>
          <issue>10</issue>
          <fpage>e0204417</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0204417"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0204417</pub-id>
          <pub-id pub-id-type="medline">30379944</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-09954</pub-id>
          <pub-id pub-id-type="pmcid">PMC6209148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olejnik</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Agnieszka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Castelluccia</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>I'm 2.8% Neanderthal - The Beginning of Genetic Exhibitionism?</article-title>
          <year>2014</year>
          <conf-name>Workshop on Genome Privacy</conf-name>
          <conf-date>Jul 15, 2014</conf-date>
          <conf-loc>Amsterdam, Netherlands</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hal.inria.fr/hal-01087696"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mittos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Blackburn</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>De Cristofaro</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Analyzing Twitter Discourse On Genetic Testing</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on April 20, 2018
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1801.09946"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1801.09946</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Reciprocity and its association with treatment adherence in an online breast cancer forum</article-title>
          <year>2017</year>
          <conf-name>30th IEEE International Symposium on Computer-Based Medical Systems (CBMS)</conf-name>
          <conf-date>Jun 22-24, 2017</conf-date>
          <conf-loc>Thessaloniki, Greece</conf-loc>
          <fpage>618</fpage>
          <lpage>623</lpage>
          <pub-id pub-id-type="doi">10.1109/cbms.2017.51</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Examining Social Capital, Social Support, and Language Use in an Online Depression Forum: Social Network and Content Analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>24</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e17365</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e17365/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17365</pub-id>
          <pub-id pub-id-type="medline">32579125</pub-id>
          <pub-id pub-id-type="pii">v22i6e17365</pub-id>
          <pub-id pub-id-type="pmcid">PMC7381041</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naslund</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Bondre</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Torous</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Aschbrenner</surname>
              <given-names>KA</given-names>
            </name>
          </person-group>
          <article-title>Social Media and Mental Health: Benefits, Risks, and Opportunities for Research and Practice</article-title>
          <source>J Technol Behav Sci</source>
          <year>2020</year>
          <month>09</month>
          <day>20</day>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>245</fpage>
          <lpage>257</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33415185"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s41347-020-00134-x</pub-id>
          <pub-id pub-id-type="medline">33415185</pub-id>
          <pub-id pub-id-type="pmcid">PMC7785056</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Acquisti</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Privacy in electronic commerce and the economics of immediate gratification</article-title>
          <year>2004</year>
          <conf-name>5th ACM conference on electronic commerce</conf-name>
          <conf-date>May 17, 2004</conf-date>
          <conf-loc>New York, NY</conf-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>21</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.1145/988772.988777</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lindstedt</surname>
              <given-names>NC</given-names>
            </name>
          </person-group>
          <article-title>Structural Topic Modeling For Social Scientists: A Brief Case Study with Social Movement Studies Literature, 2005–2017</article-title>
          <source>Soc Curr</source>
          <year>2019</year>
          <month>05</month>
          <day>02</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>307</fpage>
          <lpage>318</lpage>
          <pub-id pub-id-type="doi">10.1177/2329496519846505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Venkatesaramani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Downey</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Vorobeychik</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A Semantic Cover Approach for Topic Modeling</article-title>
          <source>Proceedings of the 8th Joint Conference on Lexical and Computational Semantics</source>
          <year>2019</year>
          <conf-name>8th Joint Conference on Lexical and Computational Semantics</conf-name>
          <conf-date>Jun 6-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>92</fpage>
          <lpage>102</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/s19-1011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Leischow</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>DD</given-names>
            </name>
          </person-group>
          <article-title>Identifying Topics for E-Cigarette User-Generated Contents: A Case Study From Multiple Social Media Platforms</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>01</month>
          <day>20</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>e24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2017/1/e24/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5780</pub-id>
          <pub-id pub-id-type="medline">28108428</pub-id>
          <pub-id pub-id-type="pii">v19i1e24</pub-id>
          <pub-id pub-id-type="pmcid">PMC5291865</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Angelov</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Top2vec: Distributed representations of topics</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on August 19, 2020
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2008.09470"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2008.09470</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grootendorst</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>BERTopic: Neural topic modeling with a class-based TF-IDF procedure</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on March 11, 2022
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2203.05794"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2203.05794</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
