<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Infodemiology</journal-id>
      <journal-title>JMIR Infodemiology</journal-title>
      <issn pub-type="epub">2564-1891</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i1e77242</article-id>
      <article-id pub-id-type="pmid">41543876</article-id>
      <article-id pub-id-type="doi">10.2196/77242</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Health Data for Linguistic Minority Group Research in Canada: Proof-of-Concept Centralized Health Care Metadata Repository Development and Usability Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Brooks</surname>
            <given-names>Ian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Spechbach</surname>
            <given-names>Herve</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Cahill</surname>
            <given-names>Brian</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Al Zoubi</surname>
            <given-names>Mohammad</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Martin-Schreiber</surname>
            <given-names>Vincent</given-names>
          </name>
          <degrees>BScN, MEng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2006-6598</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Peixoto</surname>
            <given-names>Cayden</given-names>
          </name>
          <degrees>HBSc, MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3473-1816</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Batista</surname>
            <given-names>Ricardo</given-names>
          </name>
          <degrees>MSc, MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5272-2457</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Belanger</surname>
            <given-names>Christopher</given-names>
          </name>
          <degrees>HBSc, MBA, PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2070-5721</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Tanuseputro</surname>
            <given-names>Peter</given-names>
          </name>
          <degrees>HBSc, MSc, MD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4409-0795</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Hsu</surname>
            <given-names>Amy T</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2747-4121</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Bjerre</surname>
            <given-names>Lise M</given-names>
          </name>
          <degrees>MDCM, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <xref rid="aff8" ref-type="aff">8</xref>
          <address>
            <institution>Department of Family Medicine</institution>
            <institution>Faculty of Medicine</institution>
            <institution>University of Ottawa</institution>
            <addr-line>600 Peter Morand Crescent</addr-line>
            <addr-line>Suite 201</addr-line>
            <addr-line>Ottawa, ON, K1G 5Z3</addr-line>
            <country>Canada</country>
            <phone>1 613 562 5800 ext 2982</phone>
            <email>lbjerre@uottawa.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3634-3585</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Faculty of Health Sciences</institution>
        <institution>University of Ottawa</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Epidemiology and Public Health</institution>
        <institution>Faculty of Medicine</institution>
        <institution>University of Ottawa</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Akausivik Inuit Family Health Team</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Telfer School of Management</institution>
        <institution>University of Ottawa</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Family Medicine and Primary Care</institution>
        <institution>University of Hong Kong</institution>
        <addr-line>Hong Kong</addr-line>
        <country>China (Hong Kong)</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Bruyère Health Research Institute</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Institut du Savoir Montfort</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Department of Family Medicine</institution>
        <institution>Faculty of Medicine</institution>
        <institution>University of Ottawa</institution>
        <addr-line>Ottawa, ON</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Lise M Bjerre <email>lbjerre@uottawa.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>9</day>
        <month>2</month>
        <year>2026</year>
      </pub-date>
      <volume>6</volume>
      <elocation-id>e77242</elocation-id>
      <history>
        <date date-type="received">
          <day>9</day>
          <month>5</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>10</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>19</day>
          <month>12</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>14</day>
          <month>1</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Vincent Martin-Schreiber, Cayden Peixoto, Ricardo Batista, Christopher Belanger, Peter Tanuseputro, Amy T Hsu, Lise M Bjerre. Originally published in JMIR Infodemiology (https://infodemiology.jmir.org), 09.02.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Infodemiology, is properly cited. The complete bibliographic information, a link to the original publication on https://infodemiology.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://infodemiology.jmir.org/2026/1/e77242" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Language barriers between Canadian patients and health care providers are associated with poorer health outcomes, including decreased patient safety and quality of care, misdiagnosis and longer treatment initiation times, and increased mortality. However, research exploring language as a social determinant of health is limited, as Canadian health data are scattered across many jurisdictions, each with its own policies and procedures. This fragmentation makes it difficult for researchers to identify, locate, and use existing data. This paper presents the results of a pilot study that attempts to address this gap by creating a metadata repository (MDR) to act as a central source of information about what data are available at which data holdings across Canada.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This project aimed to (1) create a proof-of-concept MDR for Canadian health data at the variable level; (2) identify and label language-related variables existing within the MDR data; and (3) develop an interactive, public-facing web application to let users browse and search the MDR.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Metadata were collected from 5 Canadian health data sources, including 4 provincial data holdings and 1 national survey, and pooled to create a data repository. Then, we performed bottom-up labeling of language-related variables within the pooled metadata by first using a search string algorithm across all variable labels, names, and definitions and then consensus screening these variables using a derived, standardized definition of language or linguistic variables. Using the <italic>Shiny</italic> web framework in R, we then developed an openly accessible web application to allow users to search the proof-of-concept MDR.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 850,343 variables were collected and included in the repository, with most coming from Ontario (n=712,037, 83.7%) and Manitoba (n=97,051, 11.4%) provincial data holdings. Among all variables in the repository, 213,696 (25.1%) were confirmed to be language related.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Developing a national MDR would be a transformative opportunity for Canadian researchers to leverage the full scope of Canadian health administrative data. Although a top-down approach with consistent engagement of and collaboration between provincial data holdings and federal data agencies is ideal to develop a national MDR, this study demonstrates the feasibility of a bottom-up approach in contributing to this overarching goal.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>metadata</kwd>
        <kwd>metadata repository</kwd>
        <kwd>variables</kwd>
        <kwd>language</kwd>
        <kwd>linguistic</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Canada’s publicly funded health care system generates a vast amount of data covering factors as wide ranging as pharmacy or prescription records, laboratory results, and health care services [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. These data hold immense potential for health care research and for health policy and planning. However, because health care is administered differently across each of Canada’s provinces and territories, the data are scattered across a large number of agencies and institutions, each with its own data policies and procedures [<xref ref-type="bibr" rid="ref3">3</xref>]. This makes it difficult for researchers to access Canada’s provincial health data and also creates a more fundamental problem—it is often difficult for researchers to even discover what types of data are available, where they are held, and how to access them. This fragmentation has contributed to significant differences in the availability and accessibility of administrative and other health data across provinces, posing a major challenge for interprovincial or pan-Canadian health care research. This “data fragmentation” can create particular problems for health care research related to patient and health care provider language abilities.</p>
        <p>Language as a social determinant of health is an important and emerging topic in health research [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], and language barriers between Canadian patients and health care providers are associated with misdiagnosis and longer treatment initiation times [<xref ref-type="bibr" rid="ref6">6</xref>]; negative experiences for patients [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] and physicians [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]; and, in hospital settings, decreased patient safety and quality of care [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>] as well as increased mortality [<xref ref-type="bibr" rid="ref13">13</xref>]. This issue is of specific concern in Canada, an officially bilingual country in which 76.1% of the population are native English speakers, 22% are native French speakers, and 18% are bilingual [<xref ref-type="bibr" rid="ref14">14</xref>]. Although French speakers and English speakers can be found across the country, most French speakers live in the provinces of Quebec and New Brunswick. Despite the importance of language-related health research, the data fragmentation described previously makes it difficult and time consuming to even discover what language-related data are available, let alone access and analyze them. This paper presents the results of a pilot study that attempts to bridge this gap by creating a “metadata repository” (MDR) to serve as a central source of information about which data are available at which locations across Canada.</p>
        <p>Metadata can be defined as “data about data” [<xref ref-type="bibr" rid="ref15">15</xref>], and for this project, we sought to create a repository of variable-level metadata. In this context, variable-level metadata include information such as the institution holding the variable, the larger collection or “library” to which it belongs, and a plaintext description. To help illustrate the utility of MDR in light of Canada’s bilingual health care context, we put a special focus on identifying language-related variables. In addition to our final metadata dataset, we also created an interactive public-facing web application to let users browse and search the repository.</p>
        <p>We discuss the current state of health data and metadata management in Canada and outline the principles and scope guiding our pilot project subsequently.</p>
      </sec>
      <sec>
        <title>Current Initiatives in Canada</title>
        <p>There are currently 2 main health metadata initiatives in Canada: the Health Data Research Network (HDRN) Canada’s Data Access Support Hub (DASH) and the Strategy for Patient-Oriented Research (SPOR) Canadian Data Platform (CDP). The HDRN is a pan-Canadian network of health data–holding organizations, and it established DASH [<xref ref-type="bibr" rid="ref16">16</xref>] to guide researchers and streamline access to data held by its members. However, DASH only helps researchers access data housed at member organizations of HDRN Canada, and its services are not free to use.</p>
        <p>The SPOR CDP, announced in 2019 by Canada’s Ministry of Health, is intended to function as a single portal for researchers to request access to administrative, clinical, and social data from sources across the country [<xref ref-type="bibr" rid="ref17">17</xref>]. To achieve this goal, the SPOR CDP aims to harmonize and validate definitions for key analytic variables (eg, chronic diseases) while expanding the sources, types, and linkages of data available to researchers (eg, social data). Standardizing data definitions allows information exchanged between data holdings to be equally understood by all parties, a concept known as semantic interoperability [<xref ref-type="bibr" rid="ref18">18</xref>]. Semantic interoperability is especially important as it allows researchers to combine datasets. Canada is known to lag in health data interoperability [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>], and the development of metadata standards, a set of guidelines that establish a common way of structuring and understanding data [<xref ref-type="bibr" rid="ref15">15</xref>], would be very helpful. However, the CDP platform was originally announced as a 7-year initiative and is still ongoing as of 2026.</p>
        <p>In addition to larger metadata projects, some data-holding organizations also have public-facing websites that allow users to search their metadata. For example, the Institute for Clinical Evaluative Sciences (ICES) in Ontario provides a publicly accessible data dictionary of their metadata that is searchable at the variable level [<xref ref-type="bibr" rid="ref22">22</xref>]. Although such resources can be helpful, they lead to the problem of data fragmentation described previously, as researchers must visit each organization’s website and consolidate results themselves.</p>
        <p>Although there are clear use cases for larger projects such as DASH or the CDP and smaller, institution-level metadata websites, they do not offer a free-to-use and up-to-date repository of health metadata from across Canada. The goal of this study is to take the initial steps toward bridging this gap.</p>
      </sec>
      <sec>
        <title>A National MDR: Pilot Project Principles</title>
        <p>In this study, we were guided by 2 sets of principles: the principles of findability, accessibility, interoperability, and reusability (FAIR) data stewardship [<xref ref-type="bibr" rid="ref23">23</xref>] and a bottom-up principle of researcher-driven development.</p>
        <p>The FAIR principles were developed by Wilkinson et al [<xref ref-type="bibr" rid="ref23">23</xref>] to address the challenges in managing large amounts of data. The FAIR principles stipulate that both data and metadata should be findable, accessible, interoperable, and reusable by researchers [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. Clearly, the fragmented landscape of Canadian administrative health data does not adhere to FAIR principles in this sense, which creates what we view as unnecessary delays and roadblocks to potentially life-saving research.</p>
        <p>We also postulate that there is a useful role for researchers to play in creating a pragmatic and useful national MDR within the current Canadian health data landscape. Given the size and complexity of administrative health databases and the dappled policy environment governing data access across Canada, creating an MDR through the top-down approach at the organizational level would take a large degree of coordination, political will, and resources to harmonize data selection, definition, collection, and sharing procedures across all provincial and territorial health data holdings. Although an MDR built through top-down standardization would be ideal, there is no guarantee that one will be available in Canada soon.</p>
        <p>According to the Public Health Agency of Canada, federal, provincial, and territorial governments are currently working to improve the sharing of public health information [<xref ref-type="bibr" rid="ref25">25</xref>]. However, a data-sharing agreement between these governments is not expected until the end of 2026, with bilateral agreements to follow and then a lengthy process of harmonizing definitions and processes across the data holdings. In the meantime, a simpler solution built by and for researchers has the potential to provide value now.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Sources and Data Collection</title>
        <p>To ensure that our proof-of-concept MDR is robust and inclusive, we aimed to include metadata from a variety of national and provincial administrative health data sources. Administrators and data custodians at national and provincial data holdings (<xref ref-type="table" rid="table1">Table 1</xref>) were contacted via email between January 2023 and September 2023 to request access to the metadata from all held administrative health datasets, ideally in a raw data format such as CSV. Among the data custodians contacted, metadata were provided by or accessible from the ICES [<xref ref-type="bibr" rid="ref26">26</xref>], the Manitoba Centre for Health Policy (MCHP) [<xref ref-type="bibr" rid="ref27">27</xref>], the Institut de la statistique du Québec [<xref ref-type="bibr" rid="ref28">28</xref>], and the New Brunswick Institute for Research, Data and Training [<xref ref-type="bibr" rid="ref29">29</xref>]. We also obtained and included metadata from the Canadian Longitudinal Study on Aging [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
        <p>Metadata files were obtained from MCHP and the New Brunswick Institute for Research, Data and Training. For the other 4 data holdings, data scraping [<xref ref-type="bibr" rid="ref31">31</xref>] was performed by a member of the research team (VM-S) to extract metadata from publicly available online sources and data dictionaries. Detailed explanations of how data were collected from each data holding are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Once the metadata from the 5 included data holdings were pooled into a single CSV file, the metadata were organized according to commonly reported data elements across sources, including data holding, dataset name, dates available, variable label, variable name, and variable definition, as reported by the respective data source.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Data dictionary availability from administrative health data custodians by province.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="510"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td>Province</td>
                <td>Data custodian</td>
                <td>Publicly accessible data dictionary or catalog</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Alberta</td>
                <td>Alberta Health</td>
                <td>Yes<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>BC<sup>b</sup></td>
                <td>Population Data BC</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>Manitoba</td>
                <td>Manitoba Centre for Health Policy</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>New Brunswick</td>
                <td>New Brunswick Institute for Research, Data and Training</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>Newfoundland and Labrador</td>
                <td>Newfoundland and Labrador Centre for Health Information</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>Nova Scotia</td>
                <td>Health Data Nova Scotia</td>
                <td>Yes<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>Ontario</td>
                <td>Institute for Clinical Evaluative Sciences</td>
                <td>Yes</td>
              </tr>
              <tr valign="top">
                <td>PEI<sup>c</sup></td>
                <td>Health PEI</td>
                <td>No</td>
              </tr>
              <tr valign="top">
                <td>Quebec</td>
                <td>Régie de l’assurance maladie du Québec</td>
                <td>No</td>
              </tr>
              <tr valign="top">
                <td>Saskatchewan</td>
                <td>eHealth Saskatchewan</td>
                <td>No</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Available only upon request.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>BC: British Columbia.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>PEI: Prince Edward Island.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Labeling</title>
        <p>Data labeling (or tagging) is the common process of assigning one or more descriptive tags or labels to a dataset [<xref ref-type="bibr" rid="ref32">32</xref>], which can make it easier to search and filter results while enabling other uses of the data (eg, machine learning) [<xref ref-type="bibr" rid="ref33">33</xref>]. To provide an example of searchability in our proof-of-concept MDR, we identified potential language-related variables. We used a naive string-searching algorithm, which works by checking for the occurrence of a pattern (or string) at every possible position in the text [<xref ref-type="bibr" rid="ref34">34</xref>]. Given Canada’s status as a bilingual English-speaking and French-speaking country, we identified potentially linguistic variables as those matching any of the following text strings: “french,” “english,” “lang,” “spoken,” “speak,” “ling,” and “franc.”</p>
        <p>From here, 2 members of the research team (VM-S and CP) independently reviewed 9.9% (84,068/850,343) of the overall variable names and definitions in the dataset (these were taken from the list of potential language-related variables; <xref rid="figure1" ref-type="fig">Figure 1</xref>) to agree on the criteria to define what a language variable is, placing higher value on the most common definitions. The 2 researchers then met to reach a consensus on the standardized definition for tagging language-related variables within the proof-of-concept MDR: “any variable that directly or indirectly provides information regarding the linguistic characteristics of an individual, a health professional, or an organization.” This definition aimed to be extremely broad to be able to accommodate any form of research method, including Bayesian statistical approaches.</p>
        <p>Both researchers then independently screened all previously identified variables, including variable names and definitions, to identify all language-related variables according to our standardized definition. Screening results were then compared to ensure consensus in the labeled variables between the 2 researchers. Any conflicts in the identification of language-related variables or in the application of the standardized tagging definition were resolved via conversation with a third member of the research team (LMB).</p>
        <p>To quantitatively assess the reliability of this screening process, we calculated interrater reliability using Cohen κ [<xref ref-type="bibr" rid="ref35">35</xref>], which measures the consistency with which both researchers independently applied the standardized definition, accounting for agreement that would be expected by chance. This approach is a standard practice in systematic reviews and content analysis methodologies where operational definitions are developed through iterative refinement and consensus-building discussion [<xref ref-type="bibr" rid="ref36">36</xref>]. To reduce potential bias, we calculated Cohen κ on 137,594 of the 221,662 (62.0%) variables screened after the standardized definition was established (<xref rid="figure1" ref-type="fig">Figure 1</xref>), excluding the 84,068 (37.9%) variables from the initial screening phase used for definition development.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Flowchart for identification of language-related variables from health care data holdings. CLSA: Canadian Longitudinal Study on Aging; ICES: Institute for Clinical Evaluative Sciences; ISQ: Institut de la statistique du Québec; MCHP: Manitoba Centre for Health Policy; NB-IRDT: New Brunswick Institute for Research, Data and Training.</p>
          </caption>
          <graphic xlink:href="infodemiology_v6i1e77242_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>All data used in this study were limited to publicly available metadata, which posed no privacy risk or potential for harm. No personal health information, patient data, or confidential research data were accessed. The collected metadata consisted solely of variable names, descriptions, dataset structures, and data availability information—content that data custodians have chosen to make publicly available to facilitate research discovery and data access applications. For these reasons, approval from a research ethics board was neither required nor sought.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>Across the 5 included data sources, metadata from a total of 850,343 variables were collected and included in our repository. The number of metadata variables collected from each data holding is presented in <xref ref-type="table" rid="table2">Table 2</xref>. Among the data holdings, the ICES (712,037/850,343, 83.7%) and MCHP (n=97,051, 11.4%) data holdings contained the most variables.</p>
        <p>Among the initial 850,343 variables in our repository, 221,662 (26.1%) potential or unconfirmed language-related variables were identified by using a search string algorithm across variable labels, names, and definitions. Consensus screening of these variables using a derived, standardized definition of language or linguistic variables identified 213,696 (25.1%) confirmed language-related variables in our repository (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <p>Interrater reliability for the independent screening process was assessed using observed percent agreement and Cohen κ. The 2 researchers initially agreed on 96.3% (132,538/137,594) of these postdefinition variables, with 5056 (3.7%) disagreements that were subsequently resolved through consensus-building discussion. The calculated Cohen κ was 0.621 (95% CI 0.611-0.632), indicating substantial agreement between the 2 researchers.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Number of metadata variables included in the proof-of-concept metadata repository by data holding (N=850,343).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="660"/>
            <col width="340"/>
            <thead>
              <tr valign="top">
                <td>Data holding</td>
                <td>Variables, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Institute for Clinical Evaluative Sciences</td>
                <td>712,037 (83.7)</td>
              </tr>
              <tr valign="top">
                <td>Institut de la statistique du Québec</td>
                <td>26,481 (3.1)</td>
              </tr>
              <tr valign="top">
                <td>Manitoba Centre for Health Policy</td>
                <td>97,051 (11.4)</td>
              </tr>
              <tr valign="top">
                <td>New Brunswick Institute for Research, Data and Training</td>
                <td>5971 (0.7)</td>
              </tr>
              <tr valign="top">
                <td>Canadian Longitudinal Study on Aging</td>
                <td>8803 (1.0)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Creating a Usable Proof-of-Concept Web Interface</title>
        <p>To facilitate exploration and use of the MDR, we developed a prototype web application that allows users to browse and search the MDR over the internet [<xref ref-type="bibr" rid="ref37">37</xref>]. Although data management frameworks exist, such as the DataHub Project [<xref ref-type="bibr" rid="ref38">38</xref>] and the Comprehensive Knowledge Archive Network (CKAN) [<xref ref-type="bibr" rid="ref39">39</xref>], we developed our application in R (version 4.3.1; R Foundation for Statistical Computing) using <italic>Shiny</italic> [<xref ref-type="bibr" rid="ref40">40</xref>]. <italic>Shiny</italic> is an open-source R package that makes it easy to build interactive web applications directly using R, a programming language widely used for statistical computing and graphics. <italic>Shiny</italic> was chosen due to its relative simplicity compared to other web development frameworks and the research team’s familiarity with the R programming language. The user interface of the <italic>Shiny</italic> app was designed with user-friendliness and functionality in mind, and it allows users to search by keyword, filter by data properties (eg, data holding and linguistic properties), and browse through paginated results. The <italic>Shiny</italic> application was built into a Docker image and hosted on a public platform-as-a-service provider.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Canada’s health data are scattered across many organizations and jurisdictions, each with its own policies and procedures, making it difficult for researchers to identify, locate, and use existing data [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. To address this gap, we developed a proof-of-concept MDR containing metadata for more than 850,000 variables from 5 different Canadian data holdings and performed bottom-up labeling of 213,696 (25.1%) of the 850,343 language-related variables within the repository to help researchers easily identify language-related data within the vast landscape of Canadian health data. We also developed an openly accessible web application to allow users to search for the MDR [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
      </sec>
      <sec>
        <title>Building a Bottom-Up MDR: Lessons Learned</title>
        <p>Our pilot project demonstrated the feasibility of a bottom-up approach to building an MDR for Canadian health data, but we learned several important lessons that we summarize here. First, complex, manual effort was required to collect (or “scrape”) data that are publicly available on the internet. Web scraping is very fast when it works, but each data source needs a bespoke approach. Some websites are straightforward to scrape (eg, those that use backend application programming interfaces) that can be queried directly), but others use an architecture that is not well suited to automatic data collection (eg, those that require repeated form submissions or client-side JavaScript). In addition, the scraping logic is custom-built to each website’s design at that moment in time, and if repositories update their websites, the scraping code will need to be updated as well.</p>
        <p>Second, there is a need for robust internal data management practices when developing an MDR. We initially prioritized simplicity as well as data portability and transparency; therefore, we stored our data in plaintext CSV files. However, as we collected more data, we were surprised by the size of our final dataset, at a little more than 1 GB. Although this is small compared to many geospatial or genetic datasets, files of this size are unwieldy to work with, since common office software, such as Microsoft Excel, may not be able to load all variables and can be slow and difficult to transfer to others. For any similar projects, we suggest that a simple data-storage format, such as CSV files, is appropriate for initial feasibility studies, but the project should move quickly to a more sophisticated centralized data-storage solution (eg, a database or a large-file storage solution with version tracking) once feasibility has been established.</p>
        <p>Finally, we learned that <italic>Shiny</italic> has several limitations that make it ill-suited for public-facing web applications with datasets this large. <italic>Shiny</italic> creates a new R session for each user and loads the entire dataset into server memory. For a typical dataset measured in KBs or MBs, the overhead is negligible; however, since our data are approximately 1 GB, our application runs out of memory and crashes with more than a few concurrent users. So, although <italic>Shiny</italic> was indispensable to us for rapid prototyping on a local computer, for production deployments, we suggest a different framework in which the data are stored in a single database and queried as needed, as opposed to the server loading a new in-memory copy of the dataset for each user. The user interface could be written using any web development framework (eg, Phoenix and React) and the open-source database software such as PostgreSQL, which is commonly used in large commercial and government projects, would be capable of handling queries on a million-row dataset with millisecond-level response times [<xref ref-type="bibr" rid="ref42">42</xref>]. Direct access to the application programming interface could also be added, but implementation details of this potential future project are outside the scope of this paper.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Although our proof of concept provides a working example of a bottom-up labeled MDR, our methodology is not without limitations. For our initial screening of language variables, we used a search string algorithm to first identify potential language-related variables within all datasets in the proof-of-concept MDR. This search string may not have been exhaustive and could have missed potential language-related variables within the included datasets. Moreover, to best assess the accuracy of our algorithm, it would need to be tested against manually screened datasets as a gold standard for our definition. Although this process was considered too time consuming for the scope of this proof-of-concept project, given the size of the datasets used, it would allow us to evaluate measures such as sensitivity and specificity.</p>
        <p>Finally, because variables in our repository were web scraped from various data sources, our repository reflects what variables were available at the point in time of data scraping and would require a repeat of the scraping, screening, and labeling process to update the repository as it is. In addition, there may have been additional metadata in the data holdings that were not made publicly available and therefore were not scrapable, meaning our proof-of-concept MDR may not be exhaustive of variables from the included data holdings. Nonetheless, without top-down policies and procedures in place to allow for easy data collection and labeling processes across Canada, our language-variable data labeling provides a working example of how bottom-up data labeling can be performed by researchers.</p>
      </sec>
      <sec>
        <title>Future Directions</title>
        <p>Although currently in a beta version, we have plans to expand the MDR to include variables from additional Canadian administrative health data holdings, such as Population Data British Columbia [<xref ref-type="bibr" rid="ref43">43</xref>], and data from Statistics Canada [<xref ref-type="bibr" rid="ref44">44</xref>]. Moreover, additional variable tagging can be performed to identify sociodemographic variable types within all included datasets for research purposes, such as sex, gender, race, ethnicity, income, and immigration status. Regarding language-related variables specifically, subtagging can be performed for more specific variable definitions [<xref ref-type="bibr" rid="ref45">45</xref>], including knowledge of official languages (French or English), variables indicating first language or mother tongue, or variables related to patient–health care provider language concordance.</p>
        <p>We also intend to develop a new MDR web application that overcomes the limitations of our <italic>Shiny</italic> app by using a backend database, so that the entire dataset does not need to be loaded into server memory repeatedly for each user.</p>
        <p>Finally, we believe that creating a <italic>top-down</italic> national MDR is a worthy goal that should be pursued in tandem with <italic>bottom-up</italic> efforts such as ours. However, such a project would face a number of governance, legal, ethical, and administrative barriers and require a high degree of alignment across diverse organizations so as not to create numerous delays in the collection and integration of data from provincial and organizational data custodians [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. In other words, an ideal top-down MDR will need intense collaboration between many organizations, and although this is beyond our power as individual researchers, we hope Canada’s data custodians will rise to the challenge.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This paper addresses the need for a national MDR of administrative and other health data in Canada, underscoring how an MDR can address issues caused by data fragmentation and increase the FAIRness of health care data across the country. However, complex challenges hinder the development of a top-down health data MDR in Canada. We developed a proof-of-concept MDR of administrative health data from 5 different data sources and performed bottom-up labeling of language-related variables within the repository to help researchers easily identify language data in the vast landscape of Canadian health data. This MDR is publicly available online as a searchable data dictionary [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        <p>Our proof-of-concept MDR illustrates the methodological limitations of a bottom-up approach, which can be complementary and synergistic with but cannot replace top-down approaches to the development of such a repository. Engagement of and collaboration between provincial data holdings and federal data agencies are critical to ensuring a pan-Canadian MDR is comprehensive and can be kept up to date. A national MDR would make it simple and straightforward for Canadian researchers to leverage the full scope of Canadian health data, and open opportunities for new studies as researchers discover datasets previously unknown to them. We believe that this could be transformative, and we hope this pilot project demonstrates the feasibility of a bottom-up approach in contributing toward this overarching goal.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Data collection methods and compliance by source.</p>
        <media xlink:href="infodemiology_v6i1e77242_app1.docx" xlink:title="DOCX File , 22 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CDP</term>
          <def>
            <p>Canadian Data Platform</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CKAN</term>
          <def>
            <p>Comprehensive Knowledge Archive Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">DASH</term>
          <def>
            <p>Data Access Support Hub</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FAIR</term>
          <def>
            <p>findability, accessibility, interoperability, and reusability</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HDRN</term>
          <def>
            <p>Health Data Research Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICES</term>
          <def>
            <p>Institute for Clinical Evaluative Sciences</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MCHP</term>
          <def>
            <p>Manitoba Centre for Health Policy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MDR</term>
          <def>
            <p>metadata repository</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SPOR</term>
          <def>
            <p>Strategy for Patient-Oriented Research</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors acknowledge the contributions of the following steering committee members and other contributors who, while not fulfilling all authorship criteria, have generously provided their expertise and contributed to the success of this project: Marie-Hélène Chomienne, associate professor and research chair, Chaire de recherche en francophonie internationale et santé de l’immigrant ou du réfugié d’Afrique francophone subsaharienne, University of Ottawa; Jan Warnke, data model analyst, l’Hôpital Jeffery Hale; Claire Kendall, senior scientist, Bruyère Health Research Institute; Cynthia Kendell, assistant professor and research implementation scientist, Department of Medicine, Dalhousie University; and Louise Bouchard, co-director, Réseau Francophonie Observatoire de Recherche Collaborative en Études sur la Santé et les services en contexte minoritaire.</p>
    </ack>
    <notes>
      <sec>
        <title>Funding</title>
        <p>This work was funded through a Canadian Institutes of Health Research Catalyst Grant: Official Language Minority Communities in Health Research (grant 472426; principal investigator LMB).</p>
      </sec>
    </notes>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data for this study are available for browsing [<xref ref-type="bibr" rid="ref37">37</xref>], and the full dataset is available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cadarette</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>An introduction to health care administrative data</article-title>
          <source>Can J Hosp Pharm</source>
          <year>2015</year>
          <month>06</month>
          <day>25</day>
          <volume>68</volume>
          <issue>3</issue>
          <fpage>232</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26157185"/>
          </comment>
          <pub-id pub-id-type="doi">10.4212/cjhp.v68i3.1457</pub-id>
          <pub-id pub-id-type="medline">26157185</pub-id>
          <pub-id pub-id-type="pmcid">PMC4485511</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lucyk</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sajobi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Quan</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Administrative health data in Canada: lessons from history</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2015</year>
          <month>08</month>
          <day>19</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>69</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-015-0196-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-015-0196-9</pub-id>
          <pub-id pub-id-type="medline">26286712</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-015-0196-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC4544791</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kendell</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Porter</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Urquhart</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Factors affecting access to administrative health data for research in Canada: a study protocol</article-title>
          <source>Int J Popul Data Sci</source>
          <year>2021</year>
          <month>09</month>
          <day>23</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>1653</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34632104"/>
          </comment>
          <pub-id pub-id-type="doi">10.23889/ijpds.v6i1.1653</pub-id>
          <pub-id pub-id-type="medline">34632104</pub-id>
          <pub-id pub-id-type="pii">S2399490821016530</pub-id>
          <pub-id pub-id-type="pmcid">PMC8477899</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mansoor</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Comeau</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Language: the ignored determinant of health</article-title>
          <source>Paediatr Child Health</source>
          <year>2024</year>
          <month>09</month>
          <day>12</day>
          <volume>29</volume>
          <issue>3</issue>
          <fpage>168</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.1093/pch/pxad066</pub-id>
          <pub-id pub-id-type="medline">38827371</pub-id>
          <pub-id pub-id-type="pii">pxad066</pub-id>
          <pub-id pub-id-type="pmcid">PMC11141594</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Reaume</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Seale</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rhodes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sucha</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pugliese</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kendall</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Bjerre</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Bouchard</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Prud'homme</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Manuel</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Tanuseputro</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Prevalence and patterns of multimorbidity among linguistic groups of patients receiving home care in Ontario: a retrospective cohort study</article-title>
          <source>BMC Geriatr</source>
          <year>2023</year>
          <month>11</month>
          <day>09</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>725</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcgeriatr.biomedcentral.com/articles/10.1186/s12877-023-04267-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12877-023-04267-5</pub-id>
          <pub-id pub-id-type="medline">37946126</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12877-023-04267-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC10634019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Moissac</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bowen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Impact of language barriers on quality of care and patient safety for official language minority francophones in Canada</article-title>
          <source>J Patient Exp</source>
          <year>2019</year>
          <month>03</month>
          <day>18</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/2374373518769008?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/2374373518769008</pub-id>
          <pub-id pub-id-type="medline">31236448</pub-id>
          <pub-id pub-id-type="pii">10.1177_2374373518769008</pub-id>
          <pub-id pub-id-type="pmcid">PMC6572938</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jutras</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gauthier</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Timony</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Côté</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kpazaï</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Expérience de francophones en Ontario chez leur médecin de famille: concordance et discordance linguistique</article-title>
          <source>Divers Res Health J</source>
          <year>2020</year>
          <month>3</month>
          <volume>3</volume>
          <fpage>12</fpage>
          <lpage>33</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/339884178_Experience_de_francophones_en_Ontario_chez_leur_medecin_de_famille_concordance_et_discordance_linguistique"/>
          </comment>
          <pub-id pub-id-type="doi">10.28984/drhj.v3i0.310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bowen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The impact of language barriers on patient safety and quality of care</article-title>
          <source>Société Santé en français</source>
          <year>2015</year>
          <month>8</month>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.santefrancais.ca/wp-content/uploads/2018/11/SSF-Bowen-S.-Language-Barriers-Study-1.pdf">https://www.santefrancais.ca/wp-content/uploads/2018/11/SSF-Bowen-S.-Language-Barriers-Study-1.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Timony</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Gauthier</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Serresse</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Goodale</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Prpic</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Barriers to offering French language physician services in rural and northern Ontario</article-title>
          <source>Rural Remote Health</source>
          <year>2016</year>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>3805</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rrh.org.au/articles/subviewnew.asp?ArticleID=3805"/>
          </comment>
          <pub-id pub-id-type="medline">27316568</pub-id>
          <pub-id pub-id-type="pii">3805</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gauthier</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Timony</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Serresse</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Goodale</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Prpic</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Strategies for improved French-language health services: perspectives of family physicians in northeastern Ontario</article-title>
          <source>Can Fam Physician</source>
          <year>2015</year>
          <month>08</month>
          <volume>61</volume>
          <issue>8</issue>
          <fpage>e382</fpage>
          <lpage>90</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cfp.ca/cgi/pmidlookup?view=long&amp;pmid=26505060"/>
          </comment>
          <pub-id pub-id-type="medline">26505060</pub-id>
          <pub-id pub-id-type="pmcid">PMC4541448</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reaume</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Talarico</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Guerin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rhodes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Carson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Prud'homme</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tanuseputro</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>In-hospital patient harm across linguistic groups: a retrospective cohort study of home care recipients</article-title>
          <source>J Patient Saf</source>
          <year>2022</year>
          <month>01</month>
          <day>01</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>e196</fpage>
          <lpage>204</lpage>
          <pub-id pub-id-type="doi">10.1097/PTS.0000000000000726</pub-id>
          <pub-id pub-id-type="medline">32433437</pub-id>
          <pub-id pub-id-type="pii">01209203-202201000-00034</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reaume</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Talarico</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rhodes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Guerin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Carson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Prud'homme</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tanuseputro</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The impact of hospital language on the rate of in-hospital harm. A retrospective cohort study of home care recipients in Ontario, Canada</article-title>
          <source>BMC Health Serv Res</source>
          <year>2020</year>
          <month>04</month>
          <day>21</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>340</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-020-05213-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12913-020-05213-6</pub-id>
          <pub-id pub-id-type="medline">32316965</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12913-020-05213-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7175496</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seale</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Reaume</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Eddeen</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rhodes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McIsaac</surname>
              <given-names>DI</given-names>
            </name>
            <name name-style="western">
              <surname>Kendall</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Sood</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Prud'homme</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tanuseputro</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Patient-physician language concordance and quality and safety outcomes among frail home care recipients admitted to hospital in Ontario, Canada</article-title>
          <source>CMAJ</source>
          <year>2022</year>
          <month>07</month>
          <day>11</day>
          <volume>194</volume>
          <issue>26</issue>
          <fpage>E899</fpage>
          <lpage>908</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cmaj.ca/cgi/pmidlookup?view=long&amp;pmid=35817434"/>
          </comment>
          <pub-id pub-id-type="doi">10.1503/cmaj.212155</pub-id>
          <pub-id pub-id-type="medline">35817434</pub-id>
          <pub-id pub-id-type="pii">194/26/E899</pub-id>
          <pub-id pub-id-type="pmcid">PMC9328476</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <article-title>Statistics on official languages in Canada</article-title>
          <source>Government of Canada</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.canada.ca/en/canadian-heritage/services/official-languages-bilingualism/publications/statistics.html">https://www.canada.ca/en/canadian-heritage/services/official-languages-bilingualism/publications/statistics.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ulrich</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kock-Schoppenhauer</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Deppenwiese</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gött</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lablans</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Majeed</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Stöhr</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Stausberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Varghese</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dugas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ingenerf</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Understanding the nature of metadata: systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <month>01</month>
          <day>11</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>e25440</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/1/e25440/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25440</pub-id>
          <pub-id pub-id-type="medline">35014967</pub-id>
          <pub-id pub-id-type="pii">v24i1e25440</pub-id>
          <pub-id pub-id-type="pmcid">PMC8790684</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <article-title>About DASH</article-title>
          <source>Health Data Research Network Canada</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hdrn.ca/dash/about-dash/">https://www.hdrn.ca/dash/about-dash/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <article-title>Health minister announces launch of SPOR Canadian Data Platform</article-title>
          <source>Health Data Research Network Canada</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hdrn.ca/en/news/formal-launch-of-the-spor-canadian-data-platform/">https://www.hdrn.ca/en/news/formal-launch-of-the-spor-canadian-data-platform/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Mello</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Rigo</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>da Costa</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>da Rosa Righi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Donida</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bez</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Schunke</surname>
              <given-names>LC</given-names>
            </name>
          </person-group>
          <article-title>Semantic interoperability in health records standards: a systematic literature review</article-title>
          <source>Health Technol (Berl)</source>
          <year>2022</year>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>255</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35103230"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s12553-022-00639-w</pub-id>
          <pub-id pub-id-type="medline">35103230</pub-id>
          <pub-id pub-id-type="pii">639</pub-id>
          <pub-id pub-id-type="pmcid">PMC8791650</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Read</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Leahey</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rutle</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Stathis</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Identifying metadata commonalities across restricted health data sources: a mixed methods study exploring how to improve the discovery of and access to restricted datasets</article-title>
          <source>J Escience Librariansh</source>
          <year>2024</year>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>e907</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publishing.escholarship.umassmed.edu/jeslib/article/id/907/"/>
          </comment>
          <pub-id pub-id-type="doi">10.7191/jeslib.907</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Expert Advisory Group</collab>
          </person-group>
          <article-title>Pan-Canadian health data strategy: toward a world-class health data system</article-title>
          <source>Public Health Agency of Canada</source>
          <year>2022</year>
          <access-date>2026-01-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.canada.ca/content/dam/phac-aspc/documents/corporate/mandate/about-agency/external-advisory-bodies/list/pan-canadian-health-data-strategy-reports-summaries/expert-advisory-group-report-03-toward-world-class-health-data-system/expert-advisory-group-report-03-toward-world-class-health-data-system.pdf">https://tinyurl.com/ycfb2r8z</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Affleck</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sutherland</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lindeman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Golonka</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Layton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fraser</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Human factor health data interoperability</article-title>
          <source>Healthc Pap</source>
          <year>2024</year>
          <month>01</month>
          <day>31</day>
          <volume>21</volume>
          <issue>4</issue>
          <fpage>47</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.12927/hcpap.2024.27272</pub-id>
          <pub-id pub-id-type="medline">38482657</pub-id>
          <pub-id pub-id-type="pii">hcpap.2024.27272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <article-title>Data dictionary</article-title>
          <source>Institute for Clinical Evaluative Sciences (ICES)</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://datadictionary.ices.on.ca/Applications/datadictionary/Default.aspx">https://datadictionary.ices.on.ca/Applications/datadictionary/Default.aspx</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wilkinson</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Dumontier</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aalbersberg</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Appleton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Axton</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Baak</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Blomberg</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Boiten</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>da Silva Santos</surname>
              <given-names>LB</given-names>
            </name>
            <name name-style="western">
              <surname>Bourne</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Bouwman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brookes</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Crosas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dillo</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dumon</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Edmunds</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Evelo</surname>
              <given-names>CT</given-names>
            </name>
            <name name-style="western">
              <surname>Finkers</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Beltran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Groth</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Goble</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Grethe</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Heringa</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>'t Hoen</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Hooft</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kok</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kok</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lusher</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Martone</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Mons</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Packer</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Persson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rocca-Serra</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Roos</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>van Schaik</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sansone</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schultes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sengstag</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Slater</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Strawn</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Swertz</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>van der Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van Mulligen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Velterop</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Waagmeester</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wittenburg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wolstencroft</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mons</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>The FAIR Guiding Principles for scientific data management and stewardship</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <month>03</month>
          <day>15</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>160018</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/sdata.2016.18"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id>
          <pub-id pub-id-type="medline">26978244</pub-id>
          <pub-id pub-id-type="pii">sdata201618</pub-id>
          <pub-id pub-id-type="pmcid">PMC4792175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boeckhout</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zielhuis</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Bredenoord</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <article-title>The FAIR guiding principles for data stewardship: fair enough?</article-title>
          <source>Eur J Hum Genet</source>
          <year>2018</year>
          <month>07</month>
          <day>17</day>
          <volume>26</volume>
          <issue>7</issue>
          <fpage>931</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29777206"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41431-018-0160-0</pub-id>
          <pub-id pub-id-type="medline">29777206</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41431-018-0160-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6018669</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <article-title>Modernizing public health information sharing</article-title>
          <source>Government of Canada</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.canada.ca/en/public-health/services/data/modernizing-information-sharing.html">https://www.canada.ca/en/public-health/services/data/modernizing-information-sharing.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <article-title>Institute for Clinical Evaluative Sciences homepage</article-title>
          <source>Institute for Clinical Evaluative Sciences (ICES)</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ices.on.ca/">https://www.ices.on.ca/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>Manitoba Centre for Health Policy</article-title>
          <source>University of Manitoba</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://umanitoba.ca/manitoba-centre-for-health-policy/">https://umanitoba.ca/manitoba-centre-for-health-policy/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>Institut de la statistique du Québec homepage</article-title>
          <source>Institut de la statistique du Québec</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://statistique.quebec.ca/en">https://statistique.quebec.ca/en</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <article-title>DataNB</article-title>
          <source>University of New Brunswick</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.unb.ca/nbirdt/">https://www.unb.ca/nbirdt/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <article-title>CLSA homepage</article-title>
          <source>Canadian Longitudinal Study of Aging</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.clsa-elcv.ca/">https://www.clsa-elcv.ca/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>Web scraping</article-title>
          <source>Statistics Canada</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.statcan.gc.ca/en/our-data/where/web-scraping">https://www.statcan.gc.ca/en/our-data/where/web-scraping</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mahalle</surname>
              <given-names>PN</given-names>
            </name>
            <name name-style="western">
              <surname>Shinde</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Ingle</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Wasatkar</surname>
              <given-names>NN</given-names>
            </name>
          </person-group>
          <source>Data Centric Artificial Intelligence: A Beginner’s Guide</source>
          <year>2023</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Artstein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Poesio</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Statistical Methods for Annotation Analysis</source>
          <year>2022</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdeen</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>An algorithm for string searching</article-title>
          <source>Int J Comput Appl</source>
          <year>2019</year>
          <month>10</month>
          <day>17</day>
          <volume>177</volume>
          <issue>10</issue>
          <fpage>17</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ijcaonline.org/archives/volume177/number10/abdeen-2019-ijca-919484.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.5120/ijca2019919484</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McHugh</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Interrater reliability: the kappa statistic</article-title>
          <source>Biochem Med (Zagreb)</source>
          <year>2012</year>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>276</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23092060"/>
          </comment>
          <pub-id pub-id-type="medline">23092060</pub-id>
          <pub-id pub-id-type="pmcid">PMC3900052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Belur</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tompson</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Thornton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Interrater reliability in systematic review methodology: exploring variation in coder decision-making</article-title>
          <source>Sociol Methods Res</source>
          <year>2018</year>
          <month>09</month>
          <day>24</day>
          <volume>50</volume>
          <issue>2</issue>
          <fpage>837</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1177/0049124118799372</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>CLOSM - data catalogue (beta version)</article-title>
          <source>Health Data Dictionary</source>
          <year>2024</year>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://healthdatadictionary.ca/">https://healthdatadictionary.ca/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <article-title>Datahub-project</article-title>
          <source>GitHub</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/datahub-project/datahub">https://github.com/datahub-project/datahub</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <article-title>CKAN</article-title>
          <source>GitHub</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/ckan">https://github.com/ckan</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <article-title>Rstudio/shiny</article-title>
          <source>GitHub</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/rstudio/shiny">https://github.com/rstudio/shiny</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Neumann</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>FAIR data infrastructure</article-title>
          <source>Adv Biochem Eng Biotechnol</source>
          <year>2022</year>
          <volume>182</volume>
          <fpage>195</fpage>
          <lpage>207</lpage>
          <pub-id pub-id-type="doi">10.1007/10_2021_193</pub-id>
          <pub-id pub-id-type="medline">35091812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salunke</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Ouda</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A performance benchmark for the PostgreSQL and MySQL databases</article-title>
          <source>Future Internet</source>
          <year>2024</year>
          <month>10</month>
          <day>19</day>
          <volume>16</volume>
          <issue>10</issue>
          <fpage>382</fpage>
          <pub-id pub-id-type="doi">10.3390/fi16100382</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <article-title>Population Data BC homepage</article-title>
          <source>Population Data BC</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.popdata.bc.ca/">https://www.popdata.bc.ca/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <article-title>Data</article-title>
          <source>Statistics Canada</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www150.statcan.gc.ca/n1/en/type/data">https://www150.statcan.gc.ca/n1/en/type/data</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Bouchard</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Reaume</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rhodes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sucha</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Guerin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Prud'homme</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Manuel</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Tanuseputro</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Ascertaining the Francophone population in Ontario: validating the language variable in health data</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2024</year>
          <month>04</month>
          <day>27</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>98</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-024-02220-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-024-02220-7</pub-id>
          <pub-id pub-id-type="medline">38678174</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-024-02220-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC11055282</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Enns</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McGrail</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bakal</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Challenges associated with cross-jurisdictional analyses using administrative health data and primary care electronic medical records in Canada</article-title>
          <source>Int J Popul Data Sci</source>
          <year>2018</year>
          <month>10</month>
          <day>05</day>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>437</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34095523"/>
          </comment>
          <pub-id pub-id-type="doi">10.23889/ijpds.v3i3.437</pub-id>
          <pub-id pub-id-type="medline">34095523</pub-id>
          <pub-id pub-id-type="pii">S2399490818004378</pub-id>
          <pub-id pub-id-type="pmcid">PMC8142948</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <article-title>Become an ICES Scientist</article-title>
          <source>Institute for Clinical Evaluative Sciences (ICES)</source>
          <access-date>2025-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ices.on.ca/join-our-research-community/become-an-ices-scientist/">https://www.ices.on.ca/join-our-research-community/become-an-ices-scientist/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
