Presentation-Ready Analytic Result Tables Using gtsummary

Table of Contents

⚠️ Make Sure You Understand the Code Before Using It ⚠️

⚠️ This code is intended to generate tables for the PDF file and is currently under construction. ⚠️

Library and Data Prep

The veteran dataset contains survival data from a two-treatment, randomized trial for lung cancer. The variables in veteran are:

  • trt: 1=standard 2=test
  • celltype: squamous, small cell, adeno, large
  • time: survival time in days
  • status: censoring status 0=censored, 1=death
  • karno: Karnofsky performance score (100=good)
  • diagtime: months from diagnosis to randomization
  • age: in years
  • prior: prior therapy 0=no, 10=yes

Task: Examine the associations of treatment, cell type, and prior therapy with overall survival.

options(tidyverse.quiet = TRUE)
library(tidyverse)
library(gtsummary)
library(survival)

veteran <- survival::veteran

data_veteran = veteran %>% 
  mutate(
    trt_factor = factor(trt, levels = c(1,2), labels = c("Control", "Treatment")),
    prior_factor = factor(prior, levels = c(0,10), labels = c("no", "yes")),
    time = time/30.437 # One month = 30.437 days
  )

The mtcars dataset is ideal for testing binary outcomes. The variables in mtcars are:

  • vs: engine shape (0 = V-shaped, 1 = straight).
  • wt: car weight (1000 lbs)
  • disp: displacement (cu.in.)

Task: Predict the probability of a car having a straight engine based on its weight (wt) and displacement (disp).

data_mtcars = mtcars

Survival

Univariable Model

os_univariable_table = tbl_uvregression(
    data_veteran %>% dplyr::select(time, status, trt_factor, celltype, prior_factor), # Retain only the relevant variables.
    method = coxph,
    y = Surv(time, status),
    exponentiate = TRUE,
    # Labeling
    label = list(trt_factor = "Treatment", 
                 celltype = "Cell Type",
                 prior_factor = "Prior Therapy"),
    pvalue_fun = function(x) style_pvalue(x, digits = 3) # Formatting p-value
    ) %>% 
  add_global_p(include=c("celltype"),keep=TRUE) %>% # Add an overall p-value when certain variable contains multiple levels
  modify_column_merge( # aesthetic adjustment from this line to the end.
    pattern = "{estimate} [{conf.low}, {conf.high}]",
    rows = !reference_row %in% TRUE & !is.na(conf.low)
  ) %>% 
  modify_table_styling(
    columns = c(estimate),
    rows = reference_row %in% TRUE,
    missing_symbol = "REF"
  ) %>% 
  modify_header(estimate = "**HR [95% CI]**") %>% 
  modify_column_hide("stat_n")
  # modify_caption(caption = "OS") %>% 
  # modify_spanning_header(c(stat_n, estimate, conf.low, p.value) ~ "**Univariable**")

### To check the header names such as estimate, conf.low, conf.high
# show_header_names()

# Hard coding, manually remove level-specific p-values of multi-level variables.
os_univariable_table$table_body$p.value[c(6,7,8)] = NA

Multivariable Model

  • For multivariable analyses, first fit the model, then summarize its results using tbl_regression().
os_multivariable_table <- coxph(
   Surv(time, status) ~ trt_factor+celltype+prior_factor, data = data_veteran
   ) %>%
  tbl_regression(
      exponentiate=TRUE,
      # Two tables must have the same labels to be merged
      label = list(trt_factor = "Treatment",
                   celltype = "Cell Type",
                   prior_factor = "Prior Therapy"),
      pvalue_fun = function(x) style_pvalue(x, digits = 3)
   ) %>% 
  add_global_p(include=c("celltype"),keep=TRUE) %>% 
  modify_column_merge(
    pattern = "{estimate} [{conf.low}, {conf.high}]",
    rows = !reference_row %in% TRUE & !is.na(conf.low)
  ) %>% 
  modify_table_styling(
    columns = c(estimate),
    rows = reference_row %in% TRUE,
    missing_symbol = "REF"
  ) %>% 
  modify_header(estimate = "**HR [95% CI]**")

os_multivariable_table$table_body$p.value[c(6,7,8)] = NA

Merge Two Tables Side by Side

os_table_combined = tbl_merge(tbls = list(os_univariable_table, os_multivariable_table),
                     tab_spanner = c("**Univariable Models**", "**Multivariable Model**"))

os_table_combined
Characteristic
Univariable Models
Multivariable Model
HR [95% CI]p-valueHR [95% CI]p-value
Treatment



    ControlREF
REF
    Treatment1.02 [0.71, 1.45]0.9221.22 [0.83, 1.79]0.321
Cell Type
<0.001
<0.001
    squamousREF
REF
    smallcell2.72 [1.66, 4.47]
3.00 [1.76, 5.14]
    adeno3.15 [1.77, 5.59]
3.24 [1.80, 5.84]
    large1.26 [0.73, 2.17]
1.35 [0.77, 2.36]
Prior Therapy



    noREF
REF
    yes0.87 [0.59, 1.28]0.4761.03 [0.69, 1.54]0.885
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio

GLM - Binary Outcome

Univariable Model

  • Binary outcome variable can be either numeric 0 and 1 or as a factor with two levels
or_univariable_table = tbl_uvregression(
    data_mtcars %>% dplyr::select(vs, wt, disp), 
    method = glm,
    y = vs,
    method.args = list(family = binomial),
    label = list(wt = "Car Weight (1000 lbs)", disp = "Displacement (cu.in.)" 
                 # labeling
                 ),
    exponentiate = TRUE,
    pvalue_fun = function(x) style_pvalue(x, digits = 3)
    ) %>% 
  modify_column_merge( # aesthetic adjustment from this line to the end.
    pattern = "{estimate} [{conf.low}, {conf.high}]",
    rows = !reference_row %in% TRUE & !is.na(conf.low)
  ) %>% 
  modify_table_styling(
    columns = c(estimate),
    rows = reference_row %in% TRUE,
    missing_symbol = "REF"
  ) %>% 
  modify_header(estimate = "**OR [95% CI]**") %>% 
  modify_column_hide("stat_n")

Multivariable Model

or_multivariable_table <- glm(
   vs ~ wt+disp, # <- you can add more variables in the formula
   data = data_mtcars, family = binomial
   ) %>% # use the summary(glm(outcome ~ var1 + var2, data = data, family = binomial)) to confirm the results
  tbl_regression(
      exponentiate=TRUE,
      label = list(wt = "Car Weight (1000 lbs)", disp = "Displacement (cu.in.)"),
      pvalue_fun = function(x) style_pvalue(x, digits = 3)
   ) %>% 
  modify_column_merge(
    pattern = "{estimate} [{conf.low}, {conf.high}]",
    rows = !reference_row %in% TRUE & !is.na(conf.low)
  ) %>% 
  modify_table_styling(
    columns = c(estimate),
    rows = reference_row %in% TRUE,
    missing_symbol = "REF"
  ) %>% 
  modify_header(estimate = "**OR [95% CI]**")

Merge Two Tables

or_table <- tbl_merge(tbls = list(or_univariable_table, or_multivariable_table),
                     tab_spanner = c("**Univariable Models**", "**Multivariable Model**")) %>% 
  modify_caption(caption = "Estimate the odds of a car having a straight engine based on its weight and displacement.") %>% 
  as_gt() %>% # as_gt() if you are knitting to PDF, otherwise comment out from this line to the last.
  # gt::cols_width(
  #   label	~ px(180),
  #   everything() ~ px(80)
  # ) %>% 
  gt::tab_spanner( # Adding a header above
    columns = c(estimate_1, conf.low_1, p.value_1, estimate_2, conf.low_2, p.value_2),
    label = "having a straight engine",
    gather = FALSE
  ) %>% 
  gt::tab_style( # Make the new header bold.
    style = gt::cell_text(weight = "bold"),
    locations = list(
      gt::cells_column_spanners(matches("having a straight engine"))
      )
    ) %>% 
  gt::tab_options(latex.tbl.pos = "ht",table.width = gt::pct(90)) # reduce the table width
or_table
(\#tab:unnamed-chunk-9)Estimate the odds of a car having a straight engine based on its weight and displacement.
having a straight engine
Characteristic
Univariable Models
Multivariable Model
OR [95% CI]p-valueOR [95% CI]p-value
Car Weight (1000 lbs)0.15 [0.03, 0.49]0.0095.09 [0.32, 147]0.275
Displacement (cu.in.)0.98 [0.96, 0.99]0.0020.97 [0.93, 0.99]0.025
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

⚠️ ANOVA, t-test, attach one table to another - TBA ⚠️