%macro kappa(x,y,level,f,w,ci,dataset);
*******get working dataset DATAK ready**********;
%if %length(&dataset)>0 %then %do;
data datak; set &dataset;
%end;
%else %if %length(%dataset)=0 %then %do;
data datak; set _last_;
%end;
*****************************************************;
* for case wide data, directly start freq *;
* for freq input data, get integer values of X & Y *;
* first, then start freq X & Y *;
*****************************************************;
%if &f=1 %then %do;
proc freq data=datak;
tables &x*&y/out=rdata;
%end;
%else %do;
proc sort data=datak; by &x &y;
data datak; set datak; by &x &y;
retain xn 0;
if first.&x then xn=xn+1;
proc sort data=datak; by &y &x;
data datak; set datak; by &y &x;
retain yn 0;
if first.&y then yn=yn+1;
&x=xn;
&y=yn;
drop xn yn;
proc freq data=datak;
tables &x*&y/out=rdata;
weight &f;
%end;
proc sort data=rdata; by &x &y;
/*
%*****creating macro level variable;
proc sort data=rdata; by &x;
data datal; set rdata end=last_obs; by &x;
retain le 0;
if first.&x then le=le+1;
if last_obs then do;
call symput('level',le);
end;
************************;
proc print data=datal;
title 'data1 for le of level';
%put 'level=' &level;
*/
data rdataa;
do i=1 to &level;
&x=i;
do j=1 to &level;
&y=j; count=0; percent=0;
output;
end;
end;
data rdataa; set rdataa;
keep &x &y count percent;
proc sort data=rdataa; by &x &y;
data rdata; merge rdataa(in=inra) rdata(in=inr); by &x &y;
**********************;
%********creating sample size n**********;
data rdata; set rdata; percent=percent/100;
proc summary data=rdata; /* get the sample size sum_n */
var count;
output out=rdatac sum=sum_n;
proc sort data=rdata; by &x;
proc summary data=rdata;
var percent;
by &x;
output out=rdatax sum=sum_x;
proc sort data=rdata; by &y;
proc summary;
var percent;
by &y;
output out=rdatay sum=sum_y;
%********creating weight vector WTV***********;
%if %length(&w)=3 %then %do;
proc iml;
im=I(&level);
do m=1 to &level;
col=im[,m];
coll=coll//col;
title 'creating weight for unweighted kappa';
end;
create dataw from coll(|colname='wcn'|);
append from coll;
quit;
%end;
%else %if %length(&w)=2 %then %do;
data dataw;
do m=1 to &level;
do n=1 to &level;
wcn=1-abs(m-n)/(&level-1);
output;
end;
end;
%end;
%else %do;
data dataw;
%let wl=%length(&w);
length wcva wc $&wl;
wcva=symget('w');
l=0;
do until (wc=wcva);
wc=scan(wcva,1,' ');
wcn=wc+0;
output;
l=(length(wc)+2);
if wc=wcva then goto done;
wcva=left(wcva);
wcva=substr(wcva,l);
end;
done:
%end;
***** make data dataw1 connecting weights with percentages, x and y;
proc sort data=rdata; by &x; /* sort rdata in the order as dataw */
data dataw1;
merge rdata dataw;
keep &x &y wcn;
******creating idx;
data datawxy; set dataw1;
proc sort data=datawxy; by &x &y;
data datawxy; set datawxy; by &x &y;
retain idx 0;
if first.&x then idx=idx+1;
data datawyx; set dataw1;
proc sort data=datawyx; by &y &x;
data datawyx; set datawyx; by &y &x;
retain idy 0;
if first.&y then idy=idy+1;
proc sort data=rdata; by &x;
proc iml;
use rdata;
read all var{percent} into ptv; /*ptv is the percentage of pij */
close rdata;
use rdatax;
read all var{sum_x} into xv; /* xv the row marginal propabilities */
close rdatax;
use rdatay;
read all var{sum_y} into yv; /* yv is column marginal probabilities */
close rdatay;
pt_mg_v=xv@yv; /* pt_mg_v is the product of marginal probabilities */
use dataw; /* make the vector of weights */
read all var{wcn} into wtv;
close dataw;
use rdatac; /* make the vector n containing the total sample size */
read all var{sum_n} into n;
close rdatac;
pow=t(wtv)*ptv;
pew=t(wtv)*pt_mg_v;
********calculate Kappa statistics;
kappa=(pow-pew)/(1-pew);
*****calculate standard error of the Kappa statistics;
use datawxy;
Do i=1 to &level;
read all var{wcn} into wcnx where (idx=i);
pi=t(wcnx)*yv;
wi=repeat(pi,&level);
wil=wil//wi;
end;
close datawxy;
use datawyx;
do j=1 to &level;
read all var{wcn} into wcny where (idy=j);
pj=t(wcny)*xv;
wj=wj//pj;
end;
close datawyx;
wjl=repeat(wj,&level);
wtvn=(wtv-wil-wjl)##2;
se=(1/((1-pew)*sqrt(n)))*sqrt(t(pt_mg_v)*wtvn-pew**2);
***** calculate z_statistics for the null hypothsis Kappa=0;
z=kappa/se;
create datakp from kappa(|colname='kappa'|);
append from kappa;
create datase from se(|colname='se'|);
append from se;
create dataz from z(|colname='z_value'|);
append from z;
quit;
****** calculate p_value and confidence interval *****;
data final;
merge datakp datase dataz;
if kappa=1 then do; se=0; p_value=0;
end;
else do; se=se;
p_value=(1-probnorm(z_value));
end;
%if %eval(&ci)=85 %then %do;
lower_85=kappa-1.44*se;
upper_85=kappa+1.44*se;
%end;
%else %if %eval(&ci)=90 %then %do;
lower_90=kappa-1.65*se;
upper_90=kappa+1.65*se;
%end;
%else %if %eval(&ci)=95 %then %do;
lower_95=kappa-1.96*se;
upper_95=kappa+1.96*se;
%end;
%else %if %eval(&ci)=99 %then %do;
lower_99=kappa-2.58*se;
upper_99=kappa+2.58*se;
%end;
proc print noobs;
var kappa se p_value lower_95 upper_95;
title "KAPPA Statistics with &ci% Confidence Interval";
quit;
%mend kappa;