首先致敬前辈 科研行者
介绍一下最近的新宠「autoReg包」,不仅可以快捷完成基线表的制作,还可以直接一行代码输出回归分析(支持线性模型、广义线性模型和比例风险模型)的表格,我们还是以上次的示例数据来做演示。
安装并加载需要用的R包
install.packages("devtools")
install.packages("remotes) # 如果devtools包是旧有的,可能需要更新,否则有可能报错
library(remotes)
update(package_deps("devtools")) #更新devtools的依赖包
devtools::install_github("cardiomoon/autoReg") #从github上下载autoReg包
install.package("survival") #获取survival包中内置示例数据
library(autoReg)
library(survival)
读取示例数据
data(pbc, package = "survival")
head(pbc) #查看数据概况
id time status trt age sex ascites hepato spiders edema bili chol albumin copper alk.phos ast trig platelet protime stage
1 1 400 2 1 58.76523 f 1 1 1 1.0 14.5 261 2.60 156 1718.0 137.95 172 190 12.2 4
2 2 4500 0 1 56.44627 f 0 1 1 0.0 1.1 302 4.14 54 7394.8 113.52 88 221 10.6 3
3 3 1012 2 1 70.07255 m 0 0 0 0.5 1.4 176 3.48 210 516.0 96.10 55 151 12.0 4
4 4 1925 2 1 54.74059 f 0 1 1 0.5 1.8 244 2.54 64 6121.8 60.63 92 183 10.3 4
5 5 1504 1 2 38.10541 f 0 1 1 0.0 3.4 279 3.53 143 671.0 113.15 72 136 10.9 3
6 6 2503 2 2 66.25873 f 0 1 0 0.0 0.8 248 3.98 50 944.0 93.00 63 NA 11.0 3
整理数据
pbc=na.omit(pbc) #删掉缺失数据,方便演示
pbc$trt=factor(pbc$trt,levels = c(1,2), labels = c("Surgery","Chemotherapy")) #设定治疗手段为分类变量
pbc$sex=factor(pbc$sex, levels = c("f","m"),labels =c("Female", "Male")) #设定为性别分类变量
pbc$ascites=factor(pbc$ascites, levels = c(0,1),labels = c("with ascites", "no ascites")) #设定为腹水分类变量
pbc$edema=factor(pbc$edema, levels = c(0,0.5,1),labels =c("light","moderate","severe")) #设定为水肿分类变量
pbc$stage=factor(pbc$stage,levels = c(1,2,3,4), labels = c("I","II","III","IV")) #设定分期为分类变量
pbc$status=factor(pbc$status,levels = c(0,1,2), labels = c("alive","death","recurrance")) #设定状态为分类变量
基线统计表
baseline_table1=gaze(trt~.,data=pbc)
print(baseline_table1)
# 以trt作为表头分类统计。 .表示使用其他所有变量。可以人为指定想要统计的变量。
# 比如gaze(trt+sex~ascites+edema+stage,data=pbc)
——————————————————————————————————————————————————————————————————————————
Dependent:trt levels Surgery Chemotherapy p
(N) (N=136) (N=140)
——————————————————————————————————————————————————————————————————————————
id Mean ± SD 162.4 ± 90.3 154.9 ± 93.1 .495
time Mean ± SD 1957.4 ± 1088.4 2000.3 ± 1138.7 .749
status Mean ± SD 0.9 ± 1.0 0.8 ± 1.0 .553
age Mean ± SD 51.2 ± 11.0 48.5 ± 9.9 .033
sex Female 116 (85.3%) 126 (90%) .314
Male 20 (14.7%) 14 (10%)
ascites with ascites 125 (91.9%) 132 (94.3%) .588
no ascites 11 (8.1%) 8 (5.7%)
hepato Mean ± SD 0.5 ± 0.5 0.6 ± 0.5 .151
spiders Mean ± SD 0.3 ± 0.5 0.3 ± 0.5 .878
edema light 112 (82.4%) 122 (87.1%) .490
moderate 15 (11%) 10 (7.1%)
severe 9 (6.6%) 8 (5.7%)
bili Mean ± SD 3.0 ± 3.7 3.7 ± 5.3 .178
chol Mean ± SD 366.1 ± 212.1 376.3 ± 255.5 .719
albumin Mean ± SD 3.5 ± 0.4 3.5 ± 0.4 .375
copper Mean ± SD 103.3 ± 94.7 98.3 ± 81.8 .633
alk.phos Mean ± SD 2016.7 ± 2132.4 1977.1 ± 2106.4 .877
ast Mean ± SD 121.8 ± 52.6 126.4 ± 60.6 .501
trig Mean ± SD 123.8 ± 71.6 126.1 ± 58.8 .769
platelet Mean ± SD 258.1 ± 97.8 265.4 ± 88.5 .515
protime Mean ± SD 10.7 ± 0.9 10.8 ± 1.1 .239
stage I 9 (6.6%) 3 (2.1%) .262
II 31 (22.8%) 28 (20%)
III 51 (37.5%) 60 (42.9%)
IV 45 (33.1%) 49 (35%)
——————————————————————————————————————————————————————————————————————————
baseline_table2=gaze(trt+sex~.,data=pbc) #trt再细分不同性别。
print(baseline_table2)
———————————————————————————————————————————————————————————————————————————————————————————————————————————————
trt (N) Surgery (N=136) Chemotherapy (N=140)
———————————————————————————————————————————————————————————————————————————————————————————————————————————————
Dependent:sex levels Female Male p Female Male p
(N) (N=116) (N=20) (N=126) (N=14)
———————————————————————————————————————————————————————————————————————————————————————————————————————————————
id Mean ± SD 163.6 ± 90.0 155.7 ± 93.9 .719 157.9 ± 94.4 128.2 ± 78.2 .260
time Mean ± SD 2008.6 ± 1080.8 1660.2 ± 1112.7 .187 1975.5 ± 1102.1 2223.4 ± 1457.8 .442
status Mean ± SD 0.8 ± 1.0 1.6 ± 0.8 .001 0.8 ± 0.9 1.0 ± 1.0 .500
age Mean ± SD 50.3 ± 10.6 56.1 ± 12.7 .029 47.5 ± 9.3 57.2 ± 10.4 <.001
ascites with ascites 106 (91.4%) 19 (95%) .917 119 (94.4%) 13 (92.9%) 1.000
no ascites 10 (8.6%) 1 (5%) 7 (5.6%) 1 (7.1%)
hepato Mean ± SD 0.4 ± 0.5 0.6 ± 0.5 .212 0.6 ± 0.5 0.6 ± 0.5 .910
spiders Mean ± SD 0.3 ± 0.5 0.1 ± 0.4 .127 0.3 ± 0.5 0.1 ± 0.3 .008
edema light 96 (82.8%) 16 (80%) .801 110 (87.3%) 12 (85.7%) .971
moderate 12 (10.3%) 3 (15%) 9 (7.1%) 1 (7.1%)
severe 8 (6.9%) 1 (5%) 7 (5.6%) 1 (7.1%)
bili Mean ± SD 2.9 ± 3.9 3.1 ± 2.1 .785 3.8 ± 5.6 2.9 ± 2.5 .269
chol Mean ± SD 358.3 ± 212.9 411.5 ± 206.8 .302 384.6 ± 265.7 301.0 ± 111.3 .035
albumin Mean ± SD 3.5 ± 0.4 3.6 ± 0.4 .201 3.5 ± 0.4 3.6 ± 0.4 .572
copper Mean ± SD 90.1 ± 86.6 180.0 ± 105.4 <.001 94.6 ± 80.4 131.3 ± 89.5 .111
alk.phos Mean ± SD 1965.2 ± 2106.2 2316.0 ± 2312.8 .499 1996.0 ± 2061.8 1806.3 ± 2555.3 .751
ast Mean ± SD 120.0 ± 53.4 132.3 ± 47.1 .334 127.6 ± 62.2 115.5 ± 44.5 .482
trig Mean ± SD 119.8 ± 73.1 147.2 ± 57.7 .114 127.4 ± 60.5 114.9 ± 39.6 .452
platelet Mean ± SD 262.4 ± 97.7 232.7 ± 97.4 .210 267.5 ± 90.1 246.1 ± 72.8 .393
protime Mean ± SD 10.6 ± 0.8 10.9 ± 1.0 .276 10.8 ± 1.1 11.2 ± 1.0 .131
stage I 8 (6.9%) 1 (5%) .908 2 (1.6%) 1 (7.1%) .470
II 27 (23.3%) 4 (20%) 26 (20.6%) 2 (14.3%)
III 44 (37.9%) 7 (35%) 55 (43.7%) 5 (35.7%)
IV 37 (31.9%) 8 (40%) 43 (34.1%) 6 (42.9%)
———————————————————————————————————————————————————————————————————————————————————————————————————————————————
回归分析
> fit=glm(status~trt+sex+ascites+edema+stage+bili+chol+albumin+ast,data = pbc, family = "binomial")
> glm(status~trt+sex+ascites+edema+stage+bili+chol+albumin+ast,data = pbc, family = "binomial")
Call: glm(formula = status ~ trt + sex + ascites + edema + stage +
bili + chol + albumin + ast, family = "binomial", data = pbc)
Coefficients:
(Intercept) trtChemotherapy sexMale ascitesno ascites edemamoderate edemasevere
-2.216433 -0.305154 1.210329 1.088021 0.686944 1.119620
stageII stageIII stageIV bili chol albumin
1.614696 2.125737 2.498416 0.278962 0.000418 -0.406884
ast
0.003200
Degrees of Freedom: 275 Total (i.e. Null); 263 Residual
Null Deviance: 381.4
Residual Deviance: 282.5 AIC: 308.5
回归分析统计表
autoReg(fit) #只显示多因素回归
————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Dependent: status alive (N=147) death (N=18) recurrance (N=111) OR (multivariable)
————————————————————————————————————————————————————————————————————————————————————————————————————————————————
trt Surgery 70 (47.6%) 9 (50%) 57 (51.4%)
Chemotherapy 77 (52.4%) 9 (50%) 54 (48.6%) 0.74 (0.41-1.31, p=.301)
sex Female 137 (93.2%) 15 (83.3%) 90 (81.1%)
Male 10 (6.8%) 3 (16.7%) 21 (18.9%) 3.35 (1.39-8.09, p=.007)
ascites with ascites 146 (99.3%) 18 (100%) 93 (83.8%)
no ascites 1 (0.7%) 0 (0%) 18 (16.2%) 2.97 (0.20-44.35, p=.430)
edema light 139 (94.6%) 16 (88.9%) 79 (71.2%)
moderate 7 (4.8%) 2 (11.1%) 16 (14.4%) 1.99 (0.68-5.77, p=.207)
severe 1 (0.7%) 0 (0%) 16 (14.4%) 3.06 (0.22-43.27, p=.407)
stage I 11 (7.5%) 0 (0%) 1 (0.9%)
II 42 (28.6%) 3 (16.7%) 14 (12.6%) 5.03 (0.44-57.65, p=.195)
III 62 (42.2%) 8 (44.4%) 41 (36.9%) 8.38 (0.76-92.62, p=.083)
IV 32 (21.8%) 7 (38.9%) 55 (49.5%) 12.16 (1.08-137.02, p=.043)
bili Mean ± SD 1.6 ± 1.8 3.2 ± 2.0 5.7 ± 6.2 1.32 (1.12-1.57, p=.001)
chol Mean ± SD 326.9 ± 168.1 439.5 ± 335.5 418.9 ± 277.9 1.00 (1.00-1.00, p=.634)
albumin Mean ± SD 3.6 ± 0.3 3.6 ± 0.4 3.4 ± 0.5 0.67 (0.29-1.52, p=.334)
ast Mean ± SD 110.2 ± 54.4 130.2 ± 38.0 141.5 ± 57.7 1.00 (1.00-1.01, p=.287)
————————————————————————————————————————————————————————————————————————————————————————————————————————————————
> fit2=glm(trt~sex+ascites+edema+stage+bili+chol+albumin+ast,data = pbc, family = "binomial")
> autoReg(fit2, uni=TRUE) #uni=TRUE, 显示单因素,先进行单因素挑选统计意义显著的解释变量进入多因素分析
———————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Dependent: trt Surgery (N=136) Chemotherapy (N=140) OR (univariable) OR (multivariable)
———————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
sex Female 116 (85.3%) 126 (90%)
Male 20 (14.7%) 14 (10%) 0.64 (0.31-1.33, p=.237)
ascites with ascites 125 (91.9%) 132 (94.3%)
no ascites 11 (8.1%) 8 (5.7%) 0.69 (0.27-1.77, p=.438)
edema light 112 (82.4%) 122 (87.1%)
moderate 15 (11%) 10 (7.1%) 0.61 (0.26-1.42, p=.252)
severe 9 (6.6%) 8 (5.7%) 0.82 (0.30-2.19, p=.686)
stage I 9 (6.6%) 3 (2.1%)
II 31 (22.8%) 28 (20%) 2.71 (0.67-11.02, p=.164) 2.65 (0.65-10.77, p=.174)
III 51 (37.5%) 60 (42.9%) 3.53 (0.91-13.74, p=.069) 3.34 (0.86-13.05, p=.083)
IV 45 (33.1%) 49 (35%) 3.27 (0.83-12.83, p=.090) 2.92 (0.73-11.64, p=.128)
bili Mean ± SD 3.0 ± 3.7 3.7 ± 5.3 1.04 (0.98-1.09, p=.184) 1.03 (0.98-1.09, p=.266)
chol Mean ± SD 366.1 ± 212.1 376.3 ± 255.5 1.00 (1.00-1.00, p=.719)
albumin Mean ± SD 3.5 ± 0.4 3.5 ± 0.4 1.31 (0.73-2.35, p=.373)
ast Mean ± SD 121.8 ± 52.6 126.4 ± 60.6 1.00 (1.00-1.01, p=.500)
———————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
#当然也可以设定所有的因素全部进入多变量回归分析,设置参数threshold=1
autoReg(fit2, uni=TRUE, threshold=1) %>% myft() #myft()函数生成发表级别图片
另外,设置参数imputed=TRUE可以应用于多重插补数据的模型比较。这个我们后面再学习。
表格与森林图导出
install.packages("rrtable")
library(rrtable)
result=autoReg(fit2, uni=TRUE, threshold=1) %>% myft()
table2pptx(result) #导出到ppt,可编辑数据
table2docx(result) #导出到docx,可编辑数据
#多因素回归统计森林图
modelPlot(fit2)
wechat 搜:科研行者
modelPlot(fit2,uni=TRUE,threshold=1,show.ref=FALSE)
#将图片导出至ppt编辑
p1=modelPlot(fit2)
rrtable::plot2pptx(print(p1))