目的:如何生成如下图所示的人口统计学的表格
要点:
1、连续型变量(基线体重、基线身高等)需要展示例数、均值、中位值、最小值、最大值;离散型变量(性别、民族等)需要展示例数和百分比。这些统计量如何计算?
2、统计量需要保留几位小数?
3、如何令前3个剂量组、后5个剂量组分别属于共同的列名SC(肌肉注射)和IV(静脉滴注)?
4、如何在不同变量之间添加空行(如体重和性别之间用空行分割)?
步骤:
1、整合数据集,得到需要的变量:各受试者的组别、体重、性别;
/*1-rawdata*/
data dm1;
merge dm rand wt(where=(visit='D-1'));
by subjid;
keep subjid sex weight grpn;
run;
/*2-generate total column*/
data dm2;
set dm1;
ouput;
grpn=9;
output;
run;
/*3-counts number of every group*/
proc sql noprint;
select count(*) into:n1-:n9
group by grpn;
quit;
/*4-builds relation between grpn and &nx.*/
proc format;
value total;
1 = &n1.
2 = &n2.
3 = &n3.
4 = &n4.
5 = &n5.
6 = &n6.
7 = &n7.
8 = &n8.
9 = &n9.;
run;
data dm3;
set dm2;
var1='weight';/*the purpose for merging statistics with maxlen*/
total=input(put(grpn,&total.),best.);
if find(weight,'.') then len1=length(scan(weight,2,'.'));
else len1=0;
/*5-generates different output format according to types of statistics:maxumum decimal places 4*/
proc sql noprint;
create table maxlen1 as
select "weight" as var1, max(len1) as maxl,
/*for medium, mean*/
case when (calculated maxl le 3) then
strip(put(8 + 0.1*(calculated maxl + 1),best.))
else strip(put(8 + 0.1*4,best.)) as maxl1,
/*for std*/
case when (calculated maxl le 2) then
strip(put(8 + 0.1*(calculated maxl + 2),best.))
else strip(put(8 + 0.1*4,best.)) as maxl2,
/*for n, min, max*/
strip(put(8 + 0.1*(calculated maxl),best.)) as maxl0
quit;
2、衍生变量:对连续型和离散型变量分别应用不同的proc步得到相应的统计量;
/*1-for continuous variables*/
proc sort data=dm3;
by var1, grpn;
run;
proc summary data=dm3;
by var1, grpn;
var weight;
output out=weight/n = _n mean = _mean median = _median min = _min max = _max;
run;
/*for discrete variables*/
proc freq data=dm3;
by grpn;
tables sex/out=sex;
run;
3、美化数据集:用cat和put/putn函数将Mock中需要的统计量展示方式进行组合;生成排序变量,保证最终的变量按照Mock中想要的顺序展示;
/*1-for continuous variables*/
data weight1;
merge weight maxlen1;
by var1;
/*combine statistics according to _n*/
n = strip(put(_n, best.));
if _n >1 then do;
msd = cat(strip(putn(_mean,maxl1)), ' (',
strip(putn(_std,maxl2)), ')');
med = cat(strip(putn(_median,maxl1)),' (',
strip(putn(_min,maxl0)), ', ',
strip(putn(_max,maxl0)),')');end;
else do;
msd = strip(putn(_mean,maxl1));
med = strip(putn(_median,maxl1));end;
run;
proc transpose data=weight1 out=weight2 prefix=group;
id grpn;
var n msd med;
run;
data weight3;
set weight2;
variable = '体重(kg)';
if _name_ = 'N' then stat = 'n';
else if _name_ = 'MSD' then stat = '均值(标准差)';
else if _name_ = 'MED' then stat = '中位值(最小值,最大值)';
run;
/*2-for discrete variables*/
proc sql noprint;
create table sex1 as
select '性别' as variable format = $100., grpn, sex as stat,
cat(strip(put(count,best.)), ' (',
strip(put(round(count/total*100,0.1),8.1.)), '%') as pct length = 100
from sex
order by variable, stat, grpn;
quit;
/*3-set datasets*/
data z;
retain ord variable stat group1-group9 blk;
set weight3 sex1;
blk='';/*for breaking SC and IV*/
if variable='体重(kg)' then ord = 1;
else ord = 2;
keep ord variable stat group1-group9 blk;
4、用proc report将美化后的数据集输出成rtf格式。
/*1-initialize rtf settings*/
options validvarname = upcase nodate nonumber nomprint orientation = landscape papersize = letter;
options topmargin = 2.54cm bottommargin = 2.54cm leftmargin = 2.54cm rightmargin = 2.54cm;
title;
ods escapechar="@";
ods listing close;
ods rtf file="C:\cdisc\tfl\dm.rtf" style=tab;
/*2-generate title*/
%let title=%str(表1 受试者人口学特征描述性统计结果);
ods rtf text="@R/RTF'\ql\fs21\outlinelevel1\b\pnhang\fi-1200\li1200' &title.";
/*3-genarate complete table*/
proc report data = z
style(report) = {frame = hsides rules = groups cellpadding = 0 outputwidth = 100%}
style(header) = {backgroundcolor = white fontweight = medium};
column ord variable stat ("@R'\brdrb\brdrs' SC" group1-group3) blk ("@R'\brdrb\brdrs' IV" group4-group8) group9;
define ord/order order=interval noprint;
define variable/order order=data style(column)={just=left cellwidth=7%} style(header)={just=left} "变量";
define stat /order order=data style(column)={just=center cellwidth=12%} style(header)={just=center} "统计量";
define group1/style(column)={just=center cellwidth=8%} style(header)={just=center} "3 mg";
define group2/style(column)={just=center cellwidth=8%} style(header)={just=center} "10 mg";
define group3/style(column)={just=center cellwidth=8%} style(header)={just=center} "20 mg";
define blk /style(column)={cellwidth=0.5%} style(header)={cellwidth=0.5%} ""
define group4/style(column)={just=center cellwidth=8%} style(header)={just=center} "30 mg";
define group5/style(column)={just=center cellwidth=8%} style(header)={just=center} "40 mg";
define group6/style(column)={just=center cellwidth=8%} style(header)={just=center} "50 mg";
define group7/style(column)={just=center cellwidth=8%} style(header)={just=center} "60 mg";
define group8/style(column)={just=center cellwidth=8%} style(header)={just=center} "100 mg";
define group9/style(column)={just=center cellwidth=8%} style(header)={just=center} "合计";
compute after variable;
varlen = ifn(ord=1,100,0);
line blk $varying. varlen;
endcomp;
run;
ods listing;
ods rtf colse;
拓展:
1、putn、putc、put、inputn、inputc、input的区别。
函数 | 定义 |
---|---|
putc | enables you to specify a character format at run time |
putn | enables you to specify a numeric format at run time |
put | returns a value using a specified format |
inputc | enables you to specify a character informat at run time |
inputn | enables you to specify a numeric informat at run time |
input | returns the value that is produced when SAS converts an expression by using the specified informat |
从SAS Help中可以看到inputc函数和input函数进行对比,input函数在编译阶段就执行,而inputc函数在运行阶段执行,所以input函数比inputc函数更快执行(其实适用于input/put函数和与之对应的表格中的四个函数中,因为在这四个函数的定义中都可以看到他们都是在运行阶段执行的)
因此,put函数在编译阶段就需要指定一个format,而putc或putn函数在运行阶段才要format。那么,这个format可以是运行阶段生成的变量。如果用于宏函数,就只能用putn或putc。
下面是SAS Help中的例子。
/*putn*/
proc format;
value writfmt 1 = 'date9.'
2 = 'mmddyy10.';
run;
data dates;
input number key;
datefmt = put(key,writfmt.);
date = putn(number,datefmt);
datalines;
15756 1
14552 2
;
run;
/*putc*/
proc format;
value typefmt 1='$groupx'
2='$groupy';
value $groupx 'positive'='agree'
'negtive' ='disagree';
value $groupy 'positive'='pass'
'negtive' ='fail';
run;
data answers;
length word $ 8;
input type response $;
respfmt=put(type,typefmt.);
word=putc(response,respfmt);
datalines;
1 positive
1 negtive
2 negtive
2 positive
;
run;
/*inputn*/
proc format;
value readfmt 1 = 'date9.'
2 = 'mmddyy10.';
run;
data dates;
input number key;
datefmt = put(key,readfmt .);
date = inputn(number,datefmt);
datalines;
15756 1
14552 2
;
run;
/*inputc*/
proc format;
value typefmt 1='$groupx'
2='$groupy';
invalue $groupx 'positive'='agree'
'negtive' ='disagree';
invalue $groupy 'positive'='pass'
'negtive' ='fail';
run;
data answers;
length word $ 8;
input type response $;
respfmt=put(type,typefmt.);
word=inputc(response,respfmt);
datalines;
1 positive
1 negtive
2 negtive
2 positive
;
run;
2、$varying格式及在proc report中插入新行的应用
/*adds new line afer every groups*/
define var /order|group;
compute after var;
line "";
endcomp;
/*adds new line after specified groups*/
define var /order|group;
compute after var;
blk="";
if var in("1","3") then varlen=100;
else varlen=0;
line blk $varying. varlen;
endcomp;
3、RTF中常用的格式设置
\ql 左对齐(默认)
\fsN 以0.5磅为单位的字体大小(font size in half-points)\fs21就是10.5pt,也就是小五号字
\outlinelevelN 段落的大纲级别。N取值范围0-8.
\b 加粗
\b0 不加粗
\pnhang 段落悬挂缩进
\fiN 首行缩进N个twips,twips与磅之间的换算公式为:1 twip=1/20 pt。要求缩进2个字符,2个字符一般是5号字,也就是缩进10.5pt,那么就应该是\fi-210
\liN 左边缩进N个twips
/*要想悬挂缩进,一般是\pnhang\fiN\liN三个选项连用,pnhang表示需要进行段落悬挂缩进,li表示左边缩进N个,fi-N表示首行往前N个字符。这样从第2行开始的就是悬挂缩进,而保持第一行不缩进*/
\brdrb 下框线
\brdrs 单条线
\ul 下划线
/**/