************************************************************* * SAS MACRO/IML KAPPA PROCEDURE * * * * Description: * * Kappa statistics is a measure of agreement with desirable properties. * * This SAS macro/IML procedure can calculate weighted/unweighted * * kappa for incomplete data (i.e., non-square table or irregular square * * table with some diagonal value corresponding to different levels of * * answer from two raters) and treats the regular square table as a * * special case. This procedure provides three different weighting * * schemes, test of null hypothesis of Kappa=0 and four levels of * * confidence interval. The format of the input data can be either case * * wide or frequency table. * * * * Usage: * * Use %include statement to bring the entire code to your program, then * * type: * * kappa(x,y,level,f,w,ci,dataset); * * Where * * x is the variable for rater one and y for rater two. * * f is the frequency of your data. For usual case wide data, enter * * value 1. Because this procedure can handle incomplete data, thus * * for case wide data, the value of x and y are required to be integers * * which correspond to the level of rating in the complete data situation. * * For freq table data, f is the variable containing the value of the * * frequency for each cell. * * w is the weights for Kappa. For unweighted Kappa, just simply * * enter uwt. For weighted Kappa (defined by Cicchetti and Allison, * * 1971) enter wt. For customer defined weights, just enter the weights * * on one line with space in between. For example,a 3x3 tables could * * could have customer defined weights like: * * 1 0.5 0.2 0.5 1 0.5 0.2 0.5 1 * * dataset is the name of the data on which you will run the analysis. * * Default is the last active data set just as other SAS procedure does. * * Example: data1 is a usual case format data and has vars r1 and * * r2 containing results from two raters with possible levels * * of 4. To get the unweighted kappa just simply type in * * your program: * * %kappa(r1,r2,4,1,uw,95,data1) * * * * Copyright @ * * * * Reference: * * (1) Joseph L. Fleiss, 1981. Statistical method for * * rates and proportions, John Wiley, New York * * * * (2) Liu, H. H., Hays, R. Measurement of Interrater Agreement: * * A SAS/IML Macro Kappa Procedure for Handling Incomplete * * data, Proceedings of the Twenty-fourth Annual SAS Users * * Group International (SUGI), 1999, 1620-1625. * * * *************************************************************; %macro kappa(x,y,level,f,w,ci,dataset); *******get working dataset DATAK ready**********; %if %length(&dataset)>0 %then %do; data datak; set &dataset; %end; %else %if %length(%dataset)=0 %then %do; data datak; set _last_; %end; *****************************************************; * for case wide data, directly start freq *; * for freq input data, get integer values of X & Y *; * first, then start freq X & Y *; *****************************************************; %if &f=1 %then %do; proc freq data=datak; tables &x*&y/out=rdata; %end; %else %do; proc sort data=datak; by &x &y; data datak; set datak; by &x &y; retain xn 0; if first.&x then xn=xn+1; proc sort data=datak; by &y &x; data datak; set datak; by &y &x; retain yn 0; if first.&y then yn=yn+1; &x=xn; &y=yn; drop xn yn; proc freq data=datak; tables &x*&y/out=rdata; weight &f; %end; proc sort data=rdata; by &x &y; /* %*****creating macro level variable; proc sort data=rdata; by &x; data datal; set rdata end=last_obs; by &x; retain le 0; if first.&x then le=le+1; if last_obs then do; call symput('level',le); end; ************************; proc print data=datal; title 'data1 for le of level'; %put 'level=' &level; */ data rdataa; do i=1 to &level; &x=i; do j=1 to &level; &y=j; count=0; percent=0; output; end; end; data rdataa; set rdataa; keep &x &y count percent; proc sort data=rdataa; by &x &y; data rdata; merge rdataa(in=inra) rdata(in=inr); by &x &y; **********************; %********creating sample size n**********; data rdata; set rdata; percent=percent/100; proc summary data=rdata; /* get the sample size sum_n */ var count; output out=rdatac sum=sum_n; proc sort data=rdata; by &x; proc summary data=rdata; var percent; by &x; output out=rdatax sum=sum_x; proc sort data=rdata; by &y; proc summary; var percent; by &y; output out=rdatay sum=sum_y; %********creating weight vector WTV***********; %if %length(&w)=3 %then %do; proc iml; im=I(&level); do m=1 to &level; col=im[,m]; coll=coll//col; title 'creating weight for unweighted kappa'; end; create dataw from coll(|colname='wcn'|); append from coll; quit; %end; %else %if %length(&w)=2 %then %do; data dataw; do m=1 to &level; do n=1 to &level; wcn=1-abs(m-n)/(&level-1); output; end; end; %end; %else %do; data dataw; %let wl=%length(&w); length wcva wc $&wl; wcva=symget('w'); l=0; do until (wc=wcva); wc=scan(wcva,1,' '); wcn=wc+0; output; l=(length(wc)+2); if wc=wcva then goto done; wcva=left(wcva); wcva=substr(wcva,l); end; done: %end; ***** make data dataw1 connecting weights with percentages, x and y; proc sort data=rdata; by &x; /* sort rdata in the order as dataw */ data dataw1; merge rdata dataw; keep &x &y wcn; ******creating idx; data datawxy; set dataw1; proc sort data=datawxy; by &x &y; data datawxy; set datawxy; by &x &y; retain idx 0; if first.&x then idx=idx+1; data datawyx; set dataw1; proc sort data=datawyx; by &y &x; data datawyx; set datawyx; by &y &x; retain idy 0; if first.&y then idy=idy+1; proc sort data=rdata; by &x; proc iml; use rdata; read all var{percent} into ptv; /*ptv is the percentage of pij */ close rdata; use rdatax; read all var{sum_x} into xv; /* xv the row marginal propabilities */ close rdatax; use rdatay; read all var{sum_y} into yv; /* yv is column marginal probabilities */ close rdatay; pt_mg_v=xv@yv; /* pt_mg_v is the product of marginal probabilities */ use dataw; /* make the vector of weights */ read all var{wcn} into wtv; close dataw; use rdatac; /* make the vector n containing the total sample size */ read all var{sum_n} into n; close rdatac; pow=t(wtv)*ptv; pew=t(wtv)*pt_mg_v; ********calculate Kappa statistics; kappa=(pow-pew)/(1-pew); *****calculate standard error of the Kappa statistics; use datawxy; Do i=1 to &level; read all var{wcn} into wcnx where (idx=i); pi=t(wcnx)*yv; wi=repeat(pi,&level); wil=wil//wi; end; close datawxy; use datawyx; do j=1 to &level; read all var{wcn} into wcny where (idy=j); pj=t(wcny)*xv; wj=wj//pj; end; close datawyx; wjl=repeat(wj,&level); wtvn=(wtv-wil-wjl)##2; se=(1/((1-pew)*sqrt(n)))*sqrt(t(pt_mg_v)*wtvn-pew**2); ***** calculate z_statistics for the null hypothsis Kappa=0; z=kappa/se; create datakp from kappa(|colname='kappa'|); append from kappa; create datase from se(|colname='se'|); append from se; create dataz from z(|colname='z_value'|); append from z; quit; ****** calculate p_value and confidence interval *****; data final; merge datakp datase dataz; if kappa=1 then do; se=0; p_value=0; end; else do; se=se; p_value=(1-probnorm(z_value)); end; %if %eval(&ci)=85 %then %do; lower_85=kappa-1.44*se; upper_85=kappa+1.44*se; %end; %else %if %eval(&ci)=90 %then %do; lower_90=kappa-1.65*se; upper_90=kappa+1.65*se; %end; %else %if %eval(&ci)=95 %then %do; lower_95=kappa-1.96*se; upper_95=kappa+1.96*se; %end; %else %if %eval(&ci)=99 %then %do; lower_99=kappa-2.58*se; upper_99=kappa+2.58*se; %end; proc print noobs; var kappa se p_value lower_95 upper_95; title "KAPPA Statistics with &ci% Confidence Interval"; quit; %mend kappa;