Skip to contents

use gtsummary to faster descriptive statistics a dataframe.In order to reduce the amount of computation, it is usually necessary to remove very discrete classification variables, such as patient ID.

Usage

calcu(
  data,
  sp_conts = NULL,
  cate_stat = "{n} ({p}%)",
  cont_stat = "{mean} ({sd})"
)

Arguments

data

a dataframe

sp_conts

default is null;due to the automatic mechanism of gtsummary, this parameter is used to define very discrete continuous variables as continuous variables.

cate_stat

default is "n (p%)".See gtsummary::tbl_summary () for more Details.

cont_stat

default is "mean (sd)".See gtsummary::tbl_summary () for more Details.

Value

gtsummary$table_body

Examples

if (FALSE) {
data(data_med)
lab_wider = data_med$lab  %>%
  tr(.,c("test_date"),"dat") %>%
  group_by(patient_id,lab_name) %>%
  arrange(test_date) %>%  slice_tail(n =1) %>%  ungroup() %>%
  select(patient_id,lab_name ,lab_va) %>%
  tr(.,c("lab_va"),"num") %>%
  spread(lab_name,lab_va) %>%  distinct()

HbA1c <- c(0,6.5,7.0,8.0,9.0,Inf)
TC <- c(0,5.2,6.2,Inf)
LDL <- c(0,3.4,4.1,Inf)
HDL <- c(0,1.0,Inf)
TG <- c(0,1.7,2.3,Inf)
WBC <- c(0,4,10,Inf)
#'
## keep only the columns related to the analysis
lib_name_list <- names(lab_wider)[-1]
## this step need to adjust the order of list subsets in name_list order
list_cut <- list(HbA1c,HDL,LDL,TC,TG,WBC)
## mutate multiple split variable columns
lab_wider_cut = mmc(lab_wider,lib_name_list,list_cut,digits=2)
## Add a missing data to fully demonstrate the function of the function
lab_wider_cut = lab_wider_cut %>%
  rbind(.,
        matrix(NA,nrow = 1,ncol =dim(lab_wider_cut)[2]) %>% data.frame() %>%
          rename_at(vars(names(.)) ,~ names(lab_wider_cut)) ) %>%
  mutate(patient_id = replace_na(patient_id, "test_id"))

## faster descriptive statistics.
data_lab_calcu <- calcu(lab_wider_cut[,-1],names(lab_wider)[-1])
}