SAS：从零开始用proc report出人口统计学表

目的：如何生成如下图所示的人口统计学的表格
在这里插入图片描述

要点：
1、连续型变量（基线体重、基线身高等）需要展示例数、均值、中位值、最小值、最大值；离散型变量（性别、民族等）需要展示例数和百分比。这些统计量如何计算？
2、统计量需要保留几位小数？
3、如何令前3个剂量组、后5个剂量组分别属于共同的列名SC（肌肉注射）和IV（静脉滴注）？
4、如何在不同变量之间添加空行（如体重和性别之间用空行分割）？

步骤：
1、整合数据集，得到需要的变量：各受试者的组别、体重、性别；

/*1-rawdata*/
data dm1;
	merge dm rand wt(where=(visit='D-1'));
	by subjid;
	keep subjid sex weight grpn;
run;
/*2-generate total column*/
data dm2;
	set dm1;
	ouput;
	grpn=9;
	output;
run;
/*3-counts number of every group*/
proc sql noprint;
	select count(*) into:n1-:n9 
	group by grpn;
quit;
/*4-builds relation between grpn and &nx.*/
proc format;
	value total;
	1 = &n1.
	2 = &n2.
	3 = &n3.
	4 = &n4.
	5 = &n5.
	6 = &n6.
	7 = &n7.
	8 = &n8.
	9 = &n9.;
run;
data dm3;
	set dm2;
	var1='weight';/*the purpose for merging statistics with maxlen*/
	total=input(put(grpn,&total.),best.);
	if find(weight,'.') then len1=length(scan(weight,2,'.'));
	else len1=0;
/*5-generates different output format according to types of statistics:maxumum decimal places 4*/
proc sql noprint;
	create table maxlen1 as
	select "weight" as var1, max(len1) as maxl,
		   /*for medium, mean*/
		   case when (calculated maxl le 3) then
		   			 strip(put(8 + 0.1*(calculated maxl + 1),best.))
		        else strip(put(8 + 0.1*4,best.)) as maxl1,
		   /*for std*/
		   case when (calculated maxl le 2) then
		   			 strip(put(8 + 0.1*(calculated maxl + 2),best.))
		        else strip(put(8 + 0.1*4,best.)) as maxl2,
		  /*for n, min, max*/
		  strip(put(8 + 0.1*(calculated maxl),best.)) as maxl0
quit;

2、衍生变量：对连续型和离散型变量分别应用不同的proc步得到相应的统计量；

/*1-for continuous variables*/
proc sort data=dm3;
	by var1, grpn;
run;
proc summary data=dm3;
	by var1, grpn;
	var weight;
	output out=weight/n = _n mean = _mean median = _median min = _min max = _max;
run;
/*for discrete variables*/
proc freq data=dm3;
	by grpn;
	tables sex/out=sex;
run;

3、美化数据集：用cat和put/putn函数将Mock中需要的统计量展示方式进行组合；生成排序变量，保证最终的变量按照Mock中想要的顺序展示；

/*1-for continuous variables*/
data weight1;
	merge weight maxlen1;
	by var1;
	/*combine statistics according to _n*/
	n = strip(put(_n, best.));
	if _n >1 then do;
		msd = cat(strip(putn(_mean,maxl1)), ' (',
			      strip(putn(_std,maxl2)), ')');
	    med = cat(strip(putn(_median,maxl1)),' (',
	    		  strip(putn(_min,maxl0)), ', ',
	    		  strip(putn(_max,maxl0)),')');end;
	else do;
		msd = strip(putn(_mean,maxl1));
		med = strip(putn(_median,maxl1));end;
run;
proc transpose data=weight1 out=weight2 prefix=group;
	id grpn;
	var n msd med;
run;
data weight3;
	set weight2;
	variable = '体重（kg）';
	if _name_ = 'N' then stat = 'n';
	else if _name_ = 'MSD' then stat = '均值（标准差）';
	else if _name_ = 'MED' then stat = '中位值（最小值，最大值）';
run;
/*2-for discrete variables*/
proc sql noprint;
	create table sex1 as
	select '性别' as variable format = $100., grpn, sex as stat,
			cat(strip(put(count,best.)), ' (',
			strip(put(round(count/total*100,0.1),8.1.)), '%') as pct length = 100	
	from sex
	order by variable, stat, grpn;
quit;
/*3-set datasets*/
data z;
 	retain ord variable stat group1-group9 blk;
	set weight3 sex1;
	blk='';/*for breaking SC and IV*/
	if variable='体重（kg）' then ord = 1;
	else ord = 2;
	keep ord variable stat group1-group9 blk;

4、用proc report将美化后的数据集输出成rtf格式。

/*1-initialize rtf settings*/
options validvarname = upcase nodate nonumber nomprint orientation = landscape papersize = letter;
options topmargin = 2.54cm bottommargin = 2.54cm leftmargin = 2.54cm rightmargin = 2.54cm;
title;
ods escapechar="@";
ods listing close;
ods rtf file="C:\cdisc\tfl\dm.rtf" style=tab;
/*2-generate title*/
%let title=%str(表1 受试者人口学特征描述性统计结果);
ods rtf text="@R/RTF'\ql\fs21\outlinelevel1\b\pnhang\fi-1200\li1200' &title.";
/*3-genarate complete table*/
proc report data = z 
	style(report) = {frame = hsides rules = groups cellpadding = 0 outputwidth = 100%}
	style(header) = {backgroundcolor = white fontweight = medium};
	column ord variable stat ("@R'\brdrb\brdrs' SC" group1-group3) blk ("@R'\brdrb\brdrs' IV" group4-group8) group9;
	define ord/order order=interval noprint;
	define variable/order order=data style(column)={just=left cellwidth=7%} style(header)={just=left} "变量";
	define stat    /order order=data style(column)={just=center cellwidth=12%} style(header)={just=center} "统计量";
	define group1/style(column)={just=center cellwidth=8%} style(header)={just=center} "3 mg";
	define group2/style(column)={just=center cellwidth=8%} style(header)={just=center} "10 mg";
	define group3/style(column)={just=center cellwidth=8%} style(header)={just=center} "20 mg";
	define blk   /style(column)={cellwidth=0.5%} style(header)={cellwidth=0.5%} ""
	define group4/style(column)={just=center cellwidth=8%} style(header)={just=center} "30 mg";
	define group5/style(column)={just=center cellwidth=8%} style(header)={just=center} "40 mg";
	define group6/style(column)={just=center cellwidth=8%} style(header)={just=center} "50 mg";
	define group7/style(column)={just=center cellwidth=8%} style(header)={just=center} "60 mg";
	define group8/style(column)={just=center cellwidth=8%} style(header)={just=center} "100 mg";
	define group9/style(column)={just=center cellwidth=8%} style(header)={just=center} "合计";
	
	compute after variable;
		varlen = ifn(ord=1,100,0);
		line blk $varying. varlen;
	endcomp;
run;

ods listing;
ods rtf colse;

拓展：
1、putn、putc、put、inputn、inputc、input的区别。

函数	定义
putc	enables you to specify a character format at run time
putn	enables you to specify a numeric format at run time
put	returns a value using a specified format
inputc	enables you to specify a character informat at run time
inputn	enables you to specify a numeric informat at run time
input	returns the value that is produced when SAS converts an expression by using the specified informat

从SAS Help中可以看到inputc函数和input函数进行对比，input函数在编译阶段就执行，而inputc函数在运行阶段执行，所以input函数比inputc函数更快执行（其实适用于input/put函数和与之对应的表格中的四个函数中，因为在这四个函数的定义中都可以看到他们都是在运行阶段执行的）在这里插入图片描述
因此，put函数在编译阶段就需要指定一个format，而putc或putn函数在运行阶段才要format。那么，这个format可以是运行阶段生成的变量。如果用于宏函数，就只能用putn或putc。
下面是SAS Help中的例子。

/*putn*/
proc format;
	value writfmt 1 = 'date9.'
				  2 = 'mmddyy10.';
run;

data dates;
	input number key;
	datefmt = put(key,writfmt.);
	date = putn(number,datefmt);
	datalines;
	15756 1
	14552 2
	;
run;

在这里插入图片描述

/*putc*/
proc format;
	value typefmt 1='$groupx'
				  2='$groupy';
	value $groupx 'positive'='agree'
		  		  'negtive' ='disagree';
	value $groupy 'positive'='pass'
		  		  'negtive' ='fail';
run;

data answers;
	length word $ 8;
	input type response $;
	respfmt=put(type,typefmt.);
	word=putc(response,respfmt);
	datalines;
	1 positive
	1 negtive
	2 negtive
	2 positive
	;
run;

在这里插入图片描述

/*inputn*/
proc format;
	value readfmt 1 = 'date9.'
				  2 = 'mmddyy10.';
run;

data dates;
	input number key;
	datefmt = put(key,readfmt .);
	date = inputn(number,datefmt);
	datalines;
	15756 1
	14552 2
	;
run;

/*inputc*/
proc format;
	value typefmt 1='$groupx'
				  2='$groupy';
	invalue $groupx 'positive'='agree'
		  		    'negtive' ='disagree';
    invalue $groupy 'positive'='pass'
		  		    'negtive' ='fail';
run;

data answers;
	length word $ 8;
	input type response $;
	respfmt=put(type,typefmt.);
	word=inputc(response,respfmt);
	datalines;
	1 positive
	1 negtive
	2 negtive
	2 positive
	;
run;

在这里插入图片描述

2、$varying格式及在proc report中插入新行的应用
在这里插入图片描述

/*adds new line afer every groups*/
define var /order|group;
compute after var;
	line "";
endcomp;

/*adds new line after specified groups*/
define var /order|group;
compute after var;
	blk="";
	if var in("1","3") then varlen=100;
	else varlen=0;
	line blk $varying. varlen;
endcomp;

3、RTF中常用的格式设置


\ql 左对齐（默认）
\fsN 以0.5磅为单位的字体大小（font size in half-points）\fs21就是10.5pt，也就是小五号字
\outlinelevelN 段落的大纲级别。N取值范围0-8.
\b 加粗
\b0 不加粗
\pnhang 段落悬挂缩进
\fiN 首行缩进N个twips，twips与磅之间的换算公式为：1 twip=1/20 pt。要求缩进2个字符，2个字符一般是5号字，也就是缩进10.5pt，那么就应该是\fi-210
\liN 左边缩进N个twips
/*要想悬挂缩进，一般是\pnhang\fiN\liN三个选项连用，pnhang表示需要进行段落悬挂缩进，li表示左边缩进N个，fi-N表示首行往前N个字符。这样从第2行开始的就是悬挂缩进，而保持第一行不缩进*/
\brdrb 下框线
\brdrs 单条线
\ul 下划线
/**/