*************************************************************
*              SAS MACRO/IML KAPPA PROCEDURE                                  *
*                                                                                                                      *
*  Description:                                                                                                *
*    Kappa statistics is a measure of agreement with desirable properties.    *
*    This SAS macro/IML procedure can calculate weighted/unweighted    *
*    kappa for incomplete data (i.e.,  non-square table or irregular square    *
*    table with some diagonal value corresponding to different levels of      *
*    answer  from two raters) and treats the regular square table as a            *
*    special case. This procedure provides three different weighting              *
*    schemes,  test of null hypothesis of Kappa=0 and four levels of              *
*    confidence interval. The format of the input data can be either case      *
*    wide or frequency table.                                                                          *
*                                                                                                                      *
*  Usage:                                                                                                          *
*    Use %include statement to bring the entire code to your program, then  *
*    type:                                    *
*                                  kappa(x,y,level,f,w,ci,dataset);                                    *
*    Where                                                                                                        *
*      x is the variable for rater one and  y for rater two.                                  *
*      f is the frequency of your data. For usual case wide data, enter              *
*      value 1. Because this procedure can handle incomplete data, thus          *
*      for case wide data, the value of x and y are required to be integers        *
*      which correspond to the level of rating in the complete data situation.  *
*      For freq table data, f is the variable containing the value of the              *
*      frequency for each cell.                                                                            *
*      w is the weights for Kappa. For unweighted Kappa, just simply            *
*      enter uwt. For weighted Kappa (defined by Cicchetti and Allison,        *
*      1971) enter wt. For customer defined  weights, just enter the weights    *
*      on one line with space in between.  For example,a 3x3 tables could      *
*      could have customer defined weights like:                                              *
*                                  1 0.5 0.2 0.5 1 0.5 0.2 0.5 1                                        *
*      dataset is the name of the data on which you will run the analysis.        *
*      Default is the last active data set just as other SAS procedure does.        *
*      Example: data1 is a usual case format data and has vars r1 and              *
*                      r2 containing results from two raters with possible levels      *
*                      of 4. To get the unweighted kappa just simply type in            *
*                      your program:                                                                          *
*                              %kappa(r1,r2,4,1,uw,95,data1)                                        *
*                                                                                                                        *
*    Copyright @                                                                                              *
*                                                                                                                        *
*    Reference:                                                                                                  *
*    (1) Joseph L. Fleiss, 1981. Statistical method for                                      *
*          rates and proportions, John Wiley, New York                                    *
*                                                                                                  *
*    (2) Liu, H. H., Hays, R. Measurement of Interrater Agreement:              *
*          A SAS/IML  Macro Kappa Procedure for Handling Incomplete        *
*          data, Proceedings of the  Twenty-fourth  Annual SAS Users              *
*          Group International (SUGI), 1999, 1620-1625.               *
*                                                          *
*************************************************************;

%macro kappa(x,y,level,f,w,ci,dataset);

*******get working dataset DATAK ready**********;

      %if %length(&dataset)>0 %then %do;
data datak; set &dataset;
                                    %end;
%else %if %length(%dataset)=0 %then %do;
data datak; set _last_;
                                    %end;

*****************************************************;
* for case wide data, directly start freq                                            *;
* for freq input data, get integer values of X & Y                          *;
* first, then start freq X & Y                                                            *;
*****************************************************;

      %if &f=1 %then %do;
proc freq data=datak;
tables &x*&y/out=rdata;
                    %end;

%else                %do;
proc sort data=datak; by &x &y;
data datak; set datak; by &x &y;
retain xn 0;
if first.&x then xn=xn+1;

proc sort data=datak; by &y &x;
data datak; set datak; by &y &x;
retain yn 0;
if first.&y then yn=yn+1;
&x=xn;
&y=yn;
drop xn yn;

proc freq data=datak;
tables &x*&y/out=rdata;
weight &f;
                    %end;
proc sort data=rdata; by &x &y;


      /*
%*****creating macro level variable;
proc sort data=rdata; by &x;
data datal; set rdata end=last_obs; by &x;
retain le 0;
  if first.&x then le=le+1;
if last_obs then do;
call symput('level',le);
                  end;
************************;
proc print data=datal;
title 'data1 for le of level';
%put 'level=' &level;
      */

data rdataa;
  do i=1 to &level;
    &x=i;
  do j=1 to &level;
    &y=j; count=0; percent=0;
    output;
  end;
  end;
data rdataa; set rdataa;
keep &x &y count percent;
proc sort data=rdataa; by &x &y;

data rdata; merge rdataa(in=inra) rdata(in=inr); by &x &y;

**********************;

%********creating sample size n**********;
data rdata; set rdata; percent=percent/100;
proc summary data=rdata; /* get the sample size sum_n */
var count;
output out=rdatac sum=sum_n;

proc sort data=rdata; by &x;
proc summary data=rdata;
var percent;
by &x;
output out=rdatax sum=sum_x;

proc sort data=rdata; by &y;
proc summary;
var percent;
by &y;
output out=rdatay sum=sum_y;

%********creating weight vector WTV***********;

      %if %length(&w)=3 %then %do;
proc iml;
  im=I(&level);
  do m=1 to &level;
    col=im[,m];
  coll=coll//col;
title 'creating weight for unweighted kappa';
  end;
create dataw from coll(|colname='wcn'|);
append from coll;
quit;
                                %end;

%else %if  %length(&w)=2  %then  %do;

data dataw;
    do  m=1 to &level;
      do  n=1  to &level;
      wcn=1-abs(m-n)/(&level-1);
      output;
      end;
    end;
                                                          %end;

                              %else %do;
data dataw;
  %let wl=%length(&w);
  length wcva wc $&wl;
  wcva=symget('w');
l=0;
          do until (wc=wcva);
  wc=scan(wcva,1,' ');
  wcn=wc+0;
output;
l=(length(wc)+2);
if wc=wcva then goto done;
wcva=left(wcva);
wcva=substr(wcva,l);
          end;
done:
                                %end;

***** make data dataw1 connecting weights with percentages, x and y;

proc sort data=rdata; by &x; /* sort rdata in the order as dataw */
data dataw1;
merge rdata dataw;
keep &x &y wcn;

******creating idx;
data datawxy; set dataw1;
proc sort data=datawxy; by &x &y;

data datawxy; set datawxy; by &x &y;
retain idx 0;
  if first.&x then idx=idx+1;

data datawyx; set dataw1;
proc sort data=datawyx; by &y &x;

data datawyx; set datawyx; by &y &x;
retain idy 0;
  if first.&y then idy=idy+1;

proc sort data=rdata; by &x;

proc iml;
use rdata;
read all var{percent} into ptv;  /*ptv is the percentage of pij */
close rdata;

use rdatax;
read all var{sum_x} into xv;  /* xv the row marginal propabilities */
close rdatax;

use rdatay;
read all var{sum_y} into yv; /* yv is column marginal probabilities */
close rdatay;

pt_mg_v=xv@yv;  /* pt_mg_v is the product of marginal probabilities */


use dataw;  /* make the vector of weights */
read all var{wcn} into wtv;
close dataw;


use rdatac; /* make the vector n containing the total sample size */
read all var{sum_n} into n;
close rdatac;

pow=t(wtv)*ptv;
pew=t(wtv)*pt_mg_v;

********calculate Kappa statistics;
kappa=(pow-pew)/(1-pew);

*****calculate standard error of the Kappa statistics;
use datawxy;
    Do i=1 to &level;
read all var{wcn} into wcnx where (idx=i);
  pi=t(wcnx)*yv;
  wi=repeat(pi,&level);
  wil=wil//wi;
    end;
close datawxy;

use datawyx;
  do j=1 to &level;
read all var{wcn} into wcny where (idy=j);
pj=t(wcny)*xv;
wj=wj//pj;
  end;
close datawyx;
wjl=repeat(wj,&level);

wtvn=(wtv-wil-wjl)##2;
se=(1/((1-pew)*sqrt(n)))*sqrt(t(pt_mg_v)*wtvn-pew**2);

***** calculate z_statistics for the null hypothsis Kappa=0;

z=kappa/se;

create datakp from kappa(|colname='kappa'|);
append from kappa;
create datase from se(|colname='se'|);
append from se;
create dataz from z(|colname='z_value'|);
append from z;

quit;

****** calculate p_value and confidence interval *****;

data final;
merge datakp datase dataz;
      if kappa=1 then do; se=0; p_value=0;
                      end;
else                do; se=se;
                  p_value=(1-probnorm(z_value));
                      end;
      %if %eval(&ci)=85 %then %do;
lower_85=kappa-1.44*se;
upper_85=kappa+1.44*se;
                              %end;
%else %if %eval(&ci)=90 %then %do;
lower_90=kappa-1.65*se;
upper_90=kappa+1.65*se;
                              %end;
%else %if %eval(&ci)=95 %then %do;
lower_95=kappa-1.96*se;
upper_95=kappa+1.96*se;
                              %end;
%else %if %eval(&ci)=99 %then %do;
lower_99=kappa-2.58*se;
upper_99=kappa+2.58*se;
                              %end;
proc print noobs;
var kappa se p_value lower_95 upper_95;
title "KAPPA Statistics with &ci% Confidence Interval";
quit;
%mend kappa;