Presentation-Ready Analytic Result Tables Using gtsummary
Table of Contents
⚠️ Make Sure You Understand the Code Before Using It ⚠️
⚠️ This code is intended to generate tables for the PDF file and is currently under construction. ⚠️
Library and Data Prep
The veteran dataset contains survival data from a two-treatment, randomized trial for lung cancer. The variables in veteran are:
trt: 1=standard 2=testcelltype: squamous, small cell, adeno, largetime: survival time in daysstatus: censoring status 0=censored, 1=deathkarno: Karnofsky performance score (100=good)diagtime: months from diagnosis to randomizationage: in yearsprior: prior therapy 0=no, 10=yes
Task: Examine the associations of treatment, cell type, and prior therapy with overall survival.
options(tidyverse.quiet = TRUE)
library(tidyverse)
library(gtsummary)
library(survival)
veteran <- survival::veteran
data_veteran = veteran %>%
mutate(
trt_factor = factor(trt, levels = c(1,2), labels = c("Control", "Treatment")),
prior_factor = factor(prior, levels = c(0,10), labels = c("no", "yes")),
time = time/30.437 # One month = 30.437 days
)
The mtcars dataset is ideal for testing binary outcomes. The variables in mtcars are:
vs: engine shape (0 = V-shaped, 1 = straight).wt: car weight (1000 lbs)disp: displacement (cu.in.)
Task: Predict the probability of a car having a straight engine based on its weight (wt) and displacement (disp).
data_mtcars = mtcars
Survival
Univariable Model
os_univariable_table = tbl_uvregression(
data_veteran %>% dplyr::select(time, status, trt_factor, celltype, prior_factor), # Retain only the relevant variables.
method = coxph,
y = Surv(time, status),
exponentiate = TRUE,
# Labeling
label = list(trt_factor = "Treatment",
celltype = "Cell Type",
prior_factor = "Prior Therapy"),
pvalue_fun = function(x) style_pvalue(x, digits = 3) # Formatting p-value
) %>%
add_global_p(include=c("celltype"),keep=TRUE) %>% # Add an overall p-value when certain variable contains multiple levels
modify_column_merge( # aesthetic adjustment from this line to the end.
pattern = "{estimate} [{conf.low}, {conf.high}]",
rows = !reference_row %in% TRUE & !is.na(conf.low)
) %>%
modify_table_styling(
columns = c(estimate),
rows = reference_row %in% TRUE,
missing_symbol = "REF"
) %>%
modify_header(estimate = "**HR [95% CI]**") %>%
modify_column_hide("stat_n")
# modify_caption(caption = "OS") %>%
# modify_spanning_header(c(stat_n, estimate, conf.low, p.value) ~ "**Univariable**")
### To check the header names such as estimate, conf.low, conf.high
# show_header_names()
# Hard coding, manually remove level-specific p-values of multi-level variables.
os_univariable_table$table_body$p.value[c(6,7,8)] = NA
Multivariable Model
- For multivariable analyses, first fit the model, then summarize its results using tbl_regression().
os_multivariable_table <- coxph(
Surv(time, status) ~ trt_factor+celltype+prior_factor, data = data_veteran
) %>%
tbl_regression(
exponentiate=TRUE,
# Two tables must have the same labels to be merged
label = list(trt_factor = "Treatment",
celltype = "Cell Type",
prior_factor = "Prior Therapy"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
) %>%
add_global_p(include=c("celltype"),keep=TRUE) %>%
modify_column_merge(
pattern = "{estimate} [{conf.low}, {conf.high}]",
rows = !reference_row %in% TRUE & !is.na(conf.low)
) %>%
modify_table_styling(
columns = c(estimate),
rows = reference_row %in% TRUE,
missing_symbol = "REF"
) %>%
modify_header(estimate = "**HR [95% CI]**")
os_multivariable_table$table_body$p.value[c(6,7,8)] = NA
Merge Two Tables Side by Side
os_table_combined = tbl_merge(tbls = list(os_univariable_table, os_multivariable_table),
tab_spanner = c("**Univariable Models**", "**Multivariable Model**"))
os_table_combined
| Characteristic | Univariable Models | Multivariable Model | ||
|---|---|---|---|---|
| HR [95% CI] | p-value | HR [95% CI] | p-value | |
| Treatment | ||||
| Control | REF | REF | ||
| Treatment | 1.02 [0.71, 1.45] | 0.922 | 1.22 [0.83, 1.79] | 0.321 |
| Cell Type | <0.001 | <0.001 | ||
| squamous | REF | REF | ||
| smallcell | 2.72 [1.66, 4.47] | 3.00 [1.76, 5.14] | ||
| adeno | 3.15 [1.77, 5.59] | 3.24 [1.80, 5.84] | ||
| large | 1.26 [0.73, 2.17] | 1.35 [0.77, 2.36] | ||
| Prior Therapy | ||||
| no | REF | REF | ||
| yes | 0.87 [0.59, 1.28] | 0.476 | 1.03 [0.69, 1.54] | 0.885 |
| Abbreviations: CI = Confidence Interval, HR = Hazard Ratio | ||||
GLM - Binary Outcome
Univariable Model
- Binary outcome variable can be either numeric 0 and 1 or as a factor with two levels
or_univariable_table = tbl_uvregression(
data_mtcars %>% dplyr::select(vs, wt, disp),
method = glm,
y = vs,
method.args = list(family = binomial),
label = list(wt = "Car Weight (1000 lbs)", disp = "Displacement (cu.in.)"
# labeling
),
exponentiate = TRUE,
pvalue_fun = function(x) style_pvalue(x, digits = 3)
) %>%
modify_column_merge( # aesthetic adjustment from this line to the end.
pattern = "{estimate} [{conf.low}, {conf.high}]",
rows = !reference_row %in% TRUE & !is.na(conf.low)
) %>%
modify_table_styling(
columns = c(estimate),
rows = reference_row %in% TRUE,
missing_symbol = "REF"
) %>%
modify_header(estimate = "**OR [95% CI]**") %>%
modify_column_hide("stat_n")
Multivariable Model
or_multivariable_table <- glm(
vs ~ wt+disp, # <- you can add more variables in the formula
data = data_mtcars, family = binomial
) %>% # use the summary(glm(outcome ~ var1 + var2, data = data, family = binomial)) to confirm the results
tbl_regression(
exponentiate=TRUE,
label = list(wt = "Car Weight (1000 lbs)", disp = "Displacement (cu.in.)"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
) %>%
modify_column_merge(
pattern = "{estimate} [{conf.low}, {conf.high}]",
rows = !reference_row %in% TRUE & !is.na(conf.low)
) %>%
modify_table_styling(
columns = c(estimate),
rows = reference_row %in% TRUE,
missing_symbol = "REF"
) %>%
modify_header(estimate = "**OR [95% CI]**")
Merge Two Tables
or_table <- tbl_merge(tbls = list(or_univariable_table, or_multivariable_table),
tab_spanner = c("**Univariable Models**", "**Multivariable Model**")) %>%
modify_caption(caption = "Estimate the odds of a car having a straight engine based on its weight and displacement.") %>%
as_gt() %>% # as_gt() if you are knitting to PDF, otherwise comment out from this line to the last.
# gt::cols_width(
# label ~ px(180),
# everything() ~ px(80)
# ) %>%
gt::tab_spanner( # Adding a header above
columns = c(estimate_1, conf.low_1, p.value_1, estimate_2, conf.low_2, p.value_2),
label = "having a straight engine",
gather = FALSE
) %>%
gt::tab_style( # Make the new header bold.
style = gt::cell_text(weight = "bold"),
locations = list(
gt::cells_column_spanners(matches("having a straight engine"))
)
) %>%
gt::tab_options(latex.tbl.pos = "ht",table.width = gt::pct(90)) # reduce the table width
or_table
having a straight engine | ||||
|---|---|---|---|---|
| Characteristic | Univariable Models | Multivariable Model | ||
| OR [95% CI] | p-value | OR [95% CI] | p-value | |
| Car Weight (1000 lbs) | 0.15 [0.03, 0.49] | 0.009 | 5.09 [0.32, 147] | 0.275 |
| Displacement (cu.in.) | 0.98 [0.96, 0.99] | 0.002 | 0.97 [0.93, 0.99] | 0.025 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | ||||
⚠️ ANOVA, t-test, attach one table to another - TBA ⚠️