Hays Files

*************************************************************
* SAS MACRO/IML KAPPA PROCEDURE *
* *
* Description: *
* Kappa statistics is a measure of agreement with desirable properties. *
* This SAS macro/IML procedure can calculate weighted/unweighted *
* kappa for incomplete data (i.e., non-square table or irregular square *
* table with some diagonal value corresponding to different levels of *
* answer from two raters) and treats the regular square table as a *
* special case. This procedure provides three different weighting *
* schemes, test of null hypothesis of Kappa=0 and four levels of *
* confidence interval. The format of the input data can be either case *
* wide or frequency table. *
* *
* Usage: *
* Use %include statement to bring the entire code to your program, then *
* type:                                *
* kappa(x,y,level,f,w,ci,dataset); *
* Where *
* x is the variable for rater one and y for rater two. *
* f is the frequency of your data. For usual case wide data, enter *
* value 1. Because this procedure can handle incomplete data, thus *
* for case wide data, the value of x and y are required to be integers *
* which correspond to the level of rating in the complete data situation. *
* For freq table data, f is the variable containing the value of the *
* frequency for each cell. *
* w is the weights for Kappa. For unweighted Kappa, just simply *
* enter uwt. For weighted Kappa (defined by Cicchetti and Allison, *
* 1971) enter wt. For customer defined weights, just enter the weights *
* on one line with space in between. For example,a 3x3 tables could *
* could have customer defined weights like: *
* 1 0.5 0.2 0.5 1 0.5 0.2 0.5 1 *
* dataset is the name of the data on which you will run the analysis. *
* Default is the last active data set just as other SAS procedure does. *
* Example: data1 is a usual case format data and has vars r1 and *
* r2 containing results from two raters with possible levels *
* of 4. To get the unweighted kappa just simply type in *
* your program: *
* %kappa(r1,r2,4,1,uw,95,data1) *
* *
* Copyright @ *
* *
* Reference: *
* (1) Joseph L. Fleiss, 1981. Statistical method for *
* rates and proportions, John Wiley, New York *
*           *
* (2) Liu, H. H., Hays, R. Measurement of Interrater Agreement: *
* A SAS/IML Macro Kappa Procedure for Handling Incomplete *
* data, Proceedings of the Twenty-fourth Annual SAS Users *
* Group International (SUGI), 1999, 1620-1625.           *
*                          *
*************************************************************;

%macro kappa(x,y,level,f,w,ci,dataset);

*******get working dataset DATAK ready**********;

%if %length(&dataset)>0 %then %do;
data datak; set &dataset;
%end;
%else %if %length(%dataset)=0 %then %do;
data datak; set _last_;
%end;

*****************************************************;
* for case wide data, directly start freq *;
* for freq input data, get integer values of X & Y *;
* first, then start freq X & Y *;
*****************************************************;

%if &f=1 %then %do;
proc freq data=datak;
tables &x*&y/out=rdata;
%end;

%else %do;
proc sort data=datak; by &x &y;
data datak; set datak; by &x &y;
retain xn 0;
if first.&x then xn=xn+1;

proc sort data=datak; by &y &x;
data datak; set datak; by &y &x;
retain yn 0;
if first.&y then yn=yn+1;
&x=xn;
&y=yn;
drop xn yn;

proc freq data=datak;
tables &x*&y/out=rdata;
weight &f;
%end;
proc sort data=rdata; by &x &y;

/*
%*****creating macro level variable;
proc sort data=rdata; by &x;
data datal; set rdata end=last_obs; by &x;
retain le 0;
if first.&x then le=le+1;
if last_obs then do;
call symput('level',le);
end;
************************;
proc print data=datal;
title 'data1 for le of level';
%put 'level=' &level;
*/

data rdataa;
do i=1 to &level;
&x=i;
do j=1 to &level;
&y=j; count=0; percent=0;
output;
end;
end;
data rdataa; set rdataa;
keep &x &y count percent;
proc sort data=rdataa; by &x &y;

data rdata; merge rdataa(in=inra) rdata(in=inr); by &x &y;

**********************;

%********creating sample size n**********;
data rdata; set rdata; percent=percent/100;
proc summary data=rdata; /* get the sample size sum_n */
var count;
output out=rdatac sum=sum_n;

proc sort data=rdata; by &x;
proc summary data=rdata;
var percent;
by &x;
output out=rdatax sum=sum_x;

proc sort data=rdata; by &y;
proc summary;
var percent;
by &y;
output out=rdatay sum=sum_y;

%********creating weight vector WTV***********;

%if %length(&w)=3 %then %do;
proc iml;
im=I(&level);
do m=1 to &level;
col=im[,m];
coll=coll//col;
title 'creating weight for unweighted kappa';
end;
create dataw from coll(|colname='wcn'|);
append from coll;
quit;
%end;

%else %if %length(&w)=2 %then %do;

data dataw;
do m=1 to &level;
do n=1 to &level;
wcn=1-abs(m-n)/(&level-1);
output;
end;
end;
%end;

%else %do;
data dataw;
%let wl=%length(&w);
length wcva wc $&wl;
wcva=symget('w');
l=0;
do until (wc=wcva);
wc=scan(wcva,1,' ');
wcn=wc+0;
output;
l=(length(wc)+2);
if wc=wcva then goto done;
wcva=left(wcva);
wcva=substr(wcva,l);
end;
done:
%end;

***** make data dataw1 connecting weights with percentages, x and y;

proc sort data=rdata; by &x; /* sort rdata in the order as dataw */
data dataw1;
merge rdata dataw;
keep &x &y wcn;

******creating idx;
data datawxy; set dataw1;
proc sort data=datawxy; by &x &y;

data datawxy; set datawxy; by &x &y;
retain idx 0;
if first.&x then idx=idx+1;

data datawyx; set dataw1;
proc sort data=datawyx; by &y &x;

data datawyx; set datawyx; by &y &x;
retain idy 0;
if first.&y then idy=idy+1;

proc sort data=rdata; by &x;

proc iml;
use rdata;
read all var{percent} into ptv; /*ptv is the percentage of pij */
close rdata;

use rdatax;
read all var{sum_x} into xv; /* xv the row marginal propabilities */
close rdatax;

use rdatay;
read all var{sum_y} into yv; /* yv is column marginal probabilities */
close rdatay;

pt_mg_v=xv@yv; /* pt_mg_v is the product of marginal probabilities */

use dataw; /* make the vector of weights */
read all var{wcn} into wtv;
close dataw;

use rdatac; /* make the vector n containing the total sample size */
read all var{sum_n} into n;
close rdatac;

pow=t(wtv)*ptv;
pew=t(wtv)*pt_mg_v;

********calculate Kappa statistics;
kappa=(pow-pew)/(1-pew);

*****calculate standard error of the Kappa statistics;
use datawxy;
Do i=1 to &level;
read all var{wcn} into wcnx where (idx=i);
pi=t(wcnx)*yv;
wi=repeat(pi,&level);
wil=wil//wi;
end;
close datawxy;

use datawyx;
do j=1 to &level;
read all var{wcn} into wcny where (idy=j);
pj=t(wcny)*xv;
wj=wj//pj;
end;
close datawyx;
wjl=repeat(wj,&level);

wtvn=(wtv-wil-wjl)##2;
se=(1/((1-pew)*sqrt(n)))*sqrt(t(pt_mg_v)*wtvn-pew**2);

***** calculate z_statistics for the null hypothsis Kappa=0;

z=kappa/se;

quit;

****** calculate p_value and confidence interval *****;

data final;
merge datakp datase dataz;
if kappa=1 then do; se=0; p_value=0;
end;
else do; se=se;
p_value=(1-probnorm(z_value));
end;
%if %eval(&ci)=85 %then %do;
lower_85=kappa-1.44*se;
upper_85=kappa+1.44*se;
%end;
%else %if %eval(&ci)=90 %then %do;
lower_90=kappa-1.65*se;
upper_90=kappa+1.65*se;
%end;
%else %if %eval(&ci)=95 %then %do;
lower_95=kappa-1.96*se;
upper_95=kappa+1.96*se;
%end;
%else %if %eval(&ci)=99 %then %do;
lower_99=kappa-2.58*se;
upper_99=kappa+2.58*se;
%end;
proc print noobs;
var kappa se p_value lower_95 upper_95;
title "KAPPA Statistics with &ci% Confidence Interval";
quit;
%mend kappa;