%macro su2( data = ,/* 入力データセット名 */ outside = ,/* 外的基準変数名(数値変数)*/ item = ,/* アイテム変数名(空白区切)*/ weight = ,/* ウエイト変数(あれば) */ dim = 1 ,/* 次元の数 */ out = _null_ ,/* サンプル数量の出力dataset */ newdata = ,/* 判別モデルを適用するデータ*/ outcate = _cate_ ,/* カテゴリ数量の出力dataset */ outitem = _null_ ,/* アイテム数量の出力dataset */ maxdec = ,/* 出力時の小数点以下桁数 */ vardef = n ,/* 分散計算時の除数 */ title = 2 ,/* %su2が使う title 行の番号 */ noprint = ,/* noprint で全ての印刷を抑止*/ workds = delete /* delete で一時データ削除 */ ) ; /*----------------------------------------------------------*/ /* SAS社提供「数量化2類マクロ」簡易版 */ /* */ /* %su2 [ Version 1.1 ] */ /*----------------------------------------------------------*/ /* <SAS社版からの主な修正点> */ /* su12doc.txt を参照. */ /* */ /* <制約> */ /* 1. 外的基準変数は数値変数とすること(proc glmmod の制約) */ /* 2. 入力データセットで使えない変数名 can: dim: col: _: */ /* */ /* <いろいろな指定例> */ /* %su2( data=x, outside=sick, item= a b c ) */ /* %su2( data=a, outside=y, item= x1-x3, dim = 3 ) */ /* %su2() この場合は簡単な指定方法の説明だけをLOGに表示する */ /* */ /* <動作確認> */ /* Ver. 1.0 : Windows 3.1, SAS 6.10 [ Base, STAT] */ /* Ver. 1.1 : Windows 95 , SAS 6.12 [ Base, STAT] */ /*----------------------------------------------------------*/ /* < Ver. 1.1 の変更点 > 1999.11.1 */ /* */ /* 1.見かけの判別率を出力するようにした. */ /* 2.newdata = データセット名,オプションを追加した. */ /* この指定があると,data = で指定したデータセットで */ /* 作成した判別モデル(判別関数)を newdata = のデータ */ /* に適用して判別結果を報告する. */ /* */ /*----------------------------------------------------------*/ /* by 鈴木督久(日経リサーチ) */ /*----------------------------------------------------------*/ /*---------- 必須パラメータが無い場合は使用法を出力して終了 */ %if &data = %str() or &outside = %str() or %quote(&item) = %str() %then %do ; %put ; %put USAGE: %nrstr(%su2)( data=, outside=, item= [, dim=, out=, outitem=, outcate=,; %put %str( weight=, workds=, maxdec=, title=, noprint= ] )); %put ; %put %str( data = データセット名 ); %put %str( outside = 外的基準の変数名 ); %put %str( item = アイテムの変数名リスト); %put ; %goto macroend ; %end ; /*----------------------------- weight 関連のマクロ変数定義 */ %if &weight = %str() %then %do ; %let weistmt = %str() ; %let vardef = &vardef ; %let sumgt = n ; %let catdef = n ; %end ; %else %do ; %if %upcase(&vardef) = N %then %let vardef = wgt ; %else %if %upcase(&vardef) = DF %then %let vardef = wdf ; %let weistmt = %str( weight &weight ; ) ; %let vardef = &vardef ; %let sumgt = sumwgt ; %let catdef = wgt ; %end ; /*---------------------- 印刷出力時の小数点以下の桁数の指定 */ %if &maxdec = %str() %then %let fw = %str() ; %else %do ; %let f = %eval( &maxdec + 3 ) ; %let fw = &f..&maxdec ; %let maxdec = %str( maxdec = &maxdec ) ; %end; /*-------------------------------- アイテム用 format の生成 */ proc contents data = &data out = _fmt_( keep = name label ) noprint ; run ; data _fmt_ ; rename name = start ; fmtname = 'item_f' ; type = 'c' ; set _fmt_ ; if label = '' then label = name ; run ; proc format cntlin = _fmt_ ; run ; /*------------------------ ダミー変数(デザイン)行列を作成 */ proc glmmod data = &data ( keep = &outside &item &weight ) outdesign = _design_ /* デザイン行列 */ outparm = _colpar_ /* カラムと変数の情報 */ noprint ; class &item ; model &outside = &item / noint ; &weistmt run ; /*-------------------------------------- 正準判別分析の実行 */ %if %upcase(&noprint) = NOPRINT %then %let canprint = noprint ; %else %let canprint = short ; proc candisc data = _design_ outstat = _stat_ out = _out_ ( keep = can1 - can&dim &outside &weight ) ncan = &dim &canprint ; class &outside ; var col: ; &weistmt run ; /*--------------------------- 正準係数をカテゴリ数量に変換 */ proc means data = _out_ noprint vardef = &catdef ; var can1 - can&dim ; output out = _std_ std = std1 - std&dim ; &weistmt run ; proc transpose data = _stat_ out = _score_ ; where _type_ = 'RAWSCORE' ; run ; data _score_ ; keep can1 - can&dim ; if _n_ = 1 then set _std_ ; set _score_ ( where = ( _name_ ne "%upcase(&outside)" ) ) ; %do i = 1 %to &dim ; can&i = can&i / std&i ; %end ; run ; proc summary data = _design_ ; var col: ; output out = _freq_( drop = _type_ _freq_ ) sum = ; &weistmt run ; proc transpose data = _freq_ out = _freq_ prefix = freq ; run ; data _score_ ; merge _score_ _freq_ ; run ; data _colpar_ ; length item $8 category $16 ; keep item category ; set _colpar_( drop = _colnum_ ) ; item = left( effname ) ; category = kstrcat( of &item ) ; run ; data _score_ ; merge _colpar_ _score_ ; run ; proc sort data = _score_ ; by item ; run ; proc means data = _score_ noprint ; by item ; var can1 - can&dim ; weight freq1 ; output out = _wmean_ mean = wmean1 - wmean&dim ; run ; data &outcate ; keep item category freq1 _name_ _type_ dim1 - dim&dim ; length _type_ $5 _name_ $6 ; _type_ = 'SCORE' ; format item $item_f. ; merge _score_ _wmean_( keep = wmean1 - wmean&dim item ) ; by item ; %do i = 1 %to &dim ; dim&i = can&i - wmean&i ; %end ; run ; /*----------------- カテゴリ数量からサンプル数量を線形合成 */ proc transpose data = &outcate ( keep = _type_ _name_ dim1 - dim&dim ) out = _coeff_ ; by _type_ ; var dim1 - dim&dim ; id _name_ ; run ; proc score data = _design_ score = _coeff_ out = _design_ ; var col: ; run ; %local missing ; %if %upcase(&out) ne _NULL_ & &out ne %str() %then %do ; data &out ; merge &data _design_( keep = dim1 - dim&dim in = in ) ; if not in then do ; call symput( 'missing', 'y' ) ; stop ; end ; run ; %end ; %if &missing = y %then %do ; proc datasets library = work nolist ; delete &out ; run ; quit ; %put WARNING: Due to missing values, &out deleted . ; %end ; /*---------------------------- アイテムのサンプル数量を出力 */ data _item_ ; _type_= 'SCORE' ; rename item = _name_ ; drop dim1 - dim&dim i j ; if 0 then set _design_( drop = &outside dim1 - dim&dim ) ; array col ( * ) _numeric_ ; %do i = 1 %to &dim ; dim = "&i" ; do j = 1 to last ; set &outcate( keep = item dim1 - dim&dim ) nobs = last ; by item ; if first.item then do i = 1 to dim( col ) ; col( i ) = . ; end; col( j ) = dim&i ; if last.item then output; end ; %end ; stop ; run; proc score data = _design_( drop = dim1 - dim&dim ) score = _item_ out = &outitem( keep = &outside &item &weight dim ) ; var col: ; by dim ; run ; proc summary data = _design_(keep = &outside dim1 - dim&dim ) nway; class &outside ; var dim1 - dim&dim ; output out = _gmean_( keep = &outside dim1 - dim&dim ) mean = ; run ; data _mean_ ; keep dim grpmean &outside ; array gmean ( &dim ) dim1 - dim&dim ; set _gmean_ ; do i = 1 to &dim ; dim = put( i, 1. ) ; grpmean = gmean( i ) ; output ; end ; run ; proc sort data = _mean_ ; by dim &outside ; run ; %if %upcase(&outitem) ne _NULL_ %then /* 群平均値を追加 */ %do ; data &outitem ; merge &outitem _mean_ ; by dim &outside ; run ; %end ; %if &newdata ne %str() %then %do ; /*----------------------新しいデータに判別モデルを適用 */ proc glmmod data = &newdata ( keep = &outside &item ) outdesign = _new_ noprint ; class &item ; model &outside = &item / noint ; run ; proc score data = _new_ score = _coeff_ out = _newdisc ; var col: ; run ; data _seed ; set _gmean_ ; cluster = _n_ ; call symput( 'GN', put( cluster, 2. ) ) ; run; proc fastclus data = _newdisc ( keep = &outside dim1 - dim&dim ) out = _clus seed = _seed cluster = _result noprint maxc = &gn replace = none maxiter = 0 ; var dim1 - dim&dim ; run; %end ; %if %upcase(&noprint) ne NOPRINT %then %do ; /*------------------------------ カテゴリ数量を印刷 */ title&title 'CATEGORY SCORE' ; proc print data = &outcate ; format dim: &fw ; by item ; id item ; var category freq1 dim: ; run ; proc chart data = &outcate ; format dim: &fw ; %do i = 1 %to &dim ; hbar category / discrete nozeros type = mean sumvar = dim&i group = item ; %end ; run; title&title ; /*-------------------------- アイテム・レンジを印刷 */ proc summary data = &outcate nway ; class item ; var dim: ; output out = _range_ ( drop = _type_ _freq_ ) range = range1 - range&dim ; run ; title&title 'ITEM RANGE' ; proc chart data = _range_ ; format range: &fw ; %do i = 1 %to &dim ; hbar item / discrete nozeros descending type = mean sumvar = range&i ; %end ; run; title&title ; /*------------------ サンプル数量の要約統計量を印刷 */ title&title "SAMPLE SCORE STATISTICS ( VARDEF = &vardef )" ; proc means data = _design_ vardef = &vardef &sumgt mean var std &maxdec ; class &outside ; var dim: ; &weistmt run ; title&title ; /*------------------ 見かけの正判別率を印刷 */ title&title "APPARENT DISCREMINANT RATES " ; data seed ; set _gmean_ ; cluster = _n_ ; call symput( 'G', put( cluster, 2. ) ) ; run; proc fastclus data = _design_ ( keep = &outside dim1 - dim&dim ) out = clus seed = seed cluster = result noprint maxc = &g replace = none maxiter = 0 ; var dim1 - dim&dim ; run; proc freq data = clus ; tables &outside * result / nocol norow nopercent out = tab ; run; data diag ; set tab ; if &outside = result ; run ; proc print data = diag ; sum count percent ; run ; title&title ; %if &newdata ne %str() %then %do ; title&title "APPARENT DISCREMINANT RATES OF NEW DATASET" ; proc freq data = _clus ; tables &outside * _result / nocol norow nopercent out = _tab ; run; data _diag ; set _tab ; if &outside = _result ; run ; proc print data = _diag ; sum count percent ; run ; title&title ; %end ; data _null_ ; file print ; put '---------------------------------------------------' ; put ' %su2 (1995, 1999) by Tokuhisa SUZUKI. Verson 1.1' ; put '---------------------------------------------------' ; run ; %end ; /*---------------------------------- 一時データセットの削除 */ %if %upcase(&workds) = DELETE %then %do ; proc datasets library = work nolist ; delete _design_ /* デザイン行列 */ _colpar_ /* デザイン行列の変数情報 */ _fmt_ /* アイテムformatの制御データ */ _stat_ /* 正準判別分析の各種統計量 */ _out_ /* 正準変量の出力ファイル */ _score_ /* 正準係数だけのファイル */ _std_ /* 正準係数の標準偏差 */ _freq_ /* カテゴリの度数集計ファイル */ _wmean_ /* 正準係数の重み平均ファイル */ _item_ /* アイテム数量作成用の係数 */ _coeff_ /* 線形結合用のカテゴリ数量 */ _mean_ /* サンプル数量の群平均値 */ _gmean_ /* サンプル数量の群平均値 */ _range_ /* アイテム・レンジの計算結果 */ / memtype = data ; run ; quit ; %end ; %macroend: %put ; %put NOTE: %nrstr(%su2) Version 1.1 by Tokuhisa Suzuki (1995, 1999). ; %put ; %mend su2 ;