kernel

11月 242018
 
First load necessary packages

import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D
import keras.backend as K
import scipy, imageio
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline
Then show original picture of my Jeep

# 首先将图片读入为矩阵
# 我们可以用pyplot的imshow()方法来展示图片
# 这是我曾经拥有的牧马人JK Rubicon Unlimited
#
img_data = imageio.imread('./pics/wranglerJK.jpg')
print(img_data.data.shape)

img = Image.fromarray(img_data, 'RGB')
plt.imshow(img)


添加图片说明


Now, build our 2-D convolutional function that takes a custom filter matrix and comput the filtered output image matrix.
def my_init(shape, dtype=None):
new_mat = np.zeros((shape[0], shape[1], 3, 3))
for i in range(shape[0]):
for j in range(shape[1]):
new_mat[:, :, i, j] = filter_mat
return np.array(new_mat, dtype=dtype)


def MyFilter(filter_mat):
print(len(filter_mat.shape))
if len(filter_mat.shape)!=2:
print('Invalid filter matrix. It must be 2-D')
return []
else:
kernel_size=filter_mat.shape
row, col, depth = img_data.shape
input_shape=img_data.shape
filter_size = row*col*depth
print(filter_size)


model = Sequential()
model.add(Conv2D(depth,
kernel_size=kernel_size,
input_shape=input_shape,
padding='same',
activation='linear',
data_format='channels_last',
kernel_initializer=my_init,
name='Conv')
)
model.add(Dense(1, activation='linear'))
model.compile(optimizer='sgd', loss='mse')
model.summary()


inX = model.input
outputs = [layer.output for layer in model.layers if layer.name=='Conv']
functions = [K.function([inX], [out]) for out in outputs]

layer_outs = [func([img_data.reshape(1, row, col, depth)]) for func in functions]
activationLayer = layer_outs[0][0]
temp = (activationLayer-np.min(activationLayer))
normalized_activationLayer = temp/np.max( np.max(temp))
return(normalized_activationLayer.reshape(row, col, depth))
Now, insert our own fixed filter matrix and get the output, using pyplot.imshow() to display the filtered picture. This time we throw in an edge detector.
filter_mat = np.array([-1, -2, -3, 0, 0, 0, 1, 2, 3]).reshape(3, 3)

outLayer = MyFilter(filter_mat)
plt.imshow(outLayer)
Below is the filtered picture.


 
 Posted by at 2:39 上午

SAS implementation of Kernel PCA

 kernel, PCA, predictive modeling, PROC CORR, PROC PRINCOMP, PROC SCORE  SAS implementation of Kernel PCA已关闭评论
2月 062010
 


Kernel method is a very useful technique in data mining that is applicable to any algorithms relying on inner product [1]. The key is applying appropriate kernel function to the inner product of original data space.

I show here SAS/STAT+BASE example code for Kernel PCA implementation. The example used is from Wikipedia @ here. Extention to other algorithms that suitable for Kernel method, such as CCA, ICA, LDA, etc, is straightforward.

Reference:
[1]. Zhu, Mu, "Kernels and Ensembles: Perspectives on Statistical Learning", The American Statistician. May 1, 2008, 62(2): 97-109

Figure: Result



SAS demo code:


/* Demonstrate kernel PCA. Kernel method can be applied to any algorithm that 
   relies on inner product */
data original;
     do ID=-314 to 314;
        x1=sin(ID/100)*1+rannor(8976565)*0.1; 
        x2=cos(ID/100)*1+rannor(92654782)*0.1; 
        Class=1; output;
        x1=sin(ID/100)*2+rannor(8976565)*0.1;
        x2=cos(ID/100)*2+rannor(92654782)*0.1; 
        Class=2; output;
        x1=sin(ID/100)*3+rannor(8976565)*0.1; 
        x2=cos(ID/100)*3+rannor(92654782)*0.1; 
        Class=3; output;
     end;
run;

/*------------ Linear PCA -----------*/
proc princomp data=original  standard noint cov 
              outstat=lin_stat(where=(_TYPE_ = 'USCORE'))
              noprint;
        var x1 x2;
run;

data lin_stat_score;
     set lin_stat;
     _TYPE_= 'PARMS';
run;

proc score data=original   score=lin_stat_score  type=parms   out=lin_pca;
     var x1 x2;
run;

/*--------- Kernel PCA 1.0 ------------*/
/* Inner product based kernel PCA */
proc transpose data=original(drop=ID  Class) out=original_t; run;

proc corr data=original_t 
          outp=inner(where=(_TYPE_='SSCP' & substr(_NAME_, 1, 3)='COL') 
                     drop=intercept)  
          sscp noprint;
     var col:;
run;

/* apply kernel functon to inner product matrix. Here we use (X'Y+1)^2, the 
   one used in wikipedia.org example. */
data inner;
     set inner;
     array _C{*} _numeric_;
     _TYPE_='COV';
     do j=1 to dim(_C); _C[j]=(_C[j]+1)**2; end;
     drop j;
run;

proc princomp data=inner    noint  cov  standard
              outstat=k_stat(where=(_TYPE_ in ('EIGENVAL', 'USCORE')) )
              noprint;
     var col:;
run;

data S(drop=_NAME_)  score;
     set k_stat;
     if _TYPE_='EIGENVAL' then output S; else output score;
     drop _TYPE_;
run;

data _null_;
     set S;
     array _S{*} _numeric_;
     do j=1 to dim(_S);
        if _S[j]<0.5*constant('MACEPS') then do;
          call symput('maxsingval', j-1); stop;
        end;
     end;
run;
%put &maxsingval;

data score; 
     set score; if _n_>&maxsingval then delete; retain _TYPE_ 'PARMS';  
run;

proc score data=inner   score=score  type=parms  out=k_U(keep=Prin:);
        var col:;
run;

data k_U;
     set original(keep=ID Class  x1 x2)  ;
     set k_U;
run;

/*
proc gplot data=k_U;
     plot Prin1*Prin2=Class;
run;quit;
*/

/*--------- Kernel PCA 2.0------------*/
/* Frobenius norm based kernel PCA */
proc transpose data=original(drop=ID  Class) out=original_t; run;

proc corr data=original_t  
          outp=inner2(where=(_TYPE_='SSCP' & substr(_NAME_, 1, 3)='COL')  
                      drop=intercept)  
          sscp noprint;
     var col:;
run;

proc means data=original_t noprint ;
         var col:;
         output out=_uss2(drop=_TYPE_    _FREQ_)    uss= /autoname;
run;

proc transpose data=_uss2  out=_uss2t; run;

proc sql noprint;
          select nobs into :NCOLS from sashelp.vtable
          where libname='WORK'
              and memtype='DATA'
              and memname='_USS2T'
              ;
quit;
%let ncols=%sysfunc(compress(&NCOLS));
%put &ncols;

/* apply kernel functon to inner product matrix. Here we use Gaussian kernel function. */
%let sigma=1;
data inner2;
          array _X{&ncols} COL1-COL&ncols;
          array _USS0{&ncols}  _temporary_;
          if _n_=1 then do;
             do j=1 to &ncols;
                  set  _uss2t(keep=COL1)  point=j;
                  _USS0[j]=COL1;
             end;
          end;
          set inner2;
          _TYPE_='COV';
          do j=1 to &ncols;
              _X[j]=_USS0[_n_] + _USS0[j] - 2*_X[j];
              _X[j]=exp(-0.5*_X[j]/σ);
          end;
          keep _TYPE_  _NAME_ COL1-COL&ncols;
run;


proc princomp data=inner2    noint  cov  standard
              outstat=k_stat2(where=(_TYPE_ in ('EIGENVAL', 'USCORE')) )
              noprint;
     var col:;
run;

data S2(drop=_NAME_)  score2;
     set k_stat2;
     if _TYPE_='EIGENVAL' then output S2; else output score2;
     drop _TYPE_;
run;

data _null_;
     set S2;
     array _S{*} _numeric_;
     do j=1 to dim(_S);
        if _S[j]<0.5*constant('MACEPS') then do;
          call symput('maxsingval', j-1); stop;
        end;
     end;
run;
%put &maxsingval;

data score2; 
     set score2; if _n_>&maxsingval then delete; retain _TYPE_ 'PARMS';  
run;

proc score data=inner2   score=score2  type=parms  out=k_U2(keep=Prin:);
        var col:;
run;

data k_U2;
     set original(keep=ID Class  x1 x2)  ;
     set k_U2;
run;

/*@ Draw figure using R @*/

%macro RScript(Rscript);
data _null_;
     file "&Rscript";
     infile cards ;
     input;
     put _infile_;
%mend;

 
%macro CallR(Rscript, Rlog);
systask command "D:\Progra~1\Analyt~1\R\bin\R.exe CMD BATCH --vanilla --quiet 
                 &Rscript  &Rlog"   taskname=rjob1  wait  status=rjobstatus1;
%mend;

options nosource;
proc export data=original   outfile="c:\o.csv"  dbms=csv; run;
proc export data=lin_PCA    outfile="c:\a.csv"  dbms=csv; run;
proc export data=k_U       outfile="c:\b.csv"  dbms=csv; run;
proc export data=k_U2     outfile="c:\c.csv"   dbms=csv; run;
options source;

%RScript(c:\rscript.r)
cards;
odsn <- read.csv('c:/o.csv', header=T)
adsn <- read.csv('c:/a.csv', header=T)
bdsn <- read.csv('c:/b.csv', header=T)
cdsn <- read.csv('c:/c.csv', header=T)
png(file='c:/figure.png')
par(mfcol=c(2,2), mar=c(4,4,3,2))
plot(x1~x2, data=odsn, col=Class, main='Original')
plot(Prin1~Prin2, data=adsn, col=Class, main='Linear PCA')
plot(Prin1~Prin2, data=bdsn, col=Class, main='Polynomial Kernel')
plot(Prin1~Prin2, data=cdsn, col=Class, main='Gauss Kernal')
dev.off()
;
run;

%CallR(c:\rscript.r, c:\rlog1.txt);

Further Reading:
Chapter 1 & 2 of
Principal Manifolds for Data Visualization and Dimension Reduction by A. Gorban, B. Kegl, D. Wunsch, A. Zinovyev (Eds.) Lecture Notes in Computational Science and Engineering, Springer 2007

Principal Manifolds for Data Visualization and Dimension Reduction (Lecture Notes in Computational Science and Engineering)
 Posted by at 11:42 上午
2月 062010
 


Kernel method is a very useful technique in data mining that is applicable to any algorithms relying on inner product [1]. The key is applying appropriate kernel function to the inner product of original data space.

I show here SAS/STAT+BASE example code for Kernel PCA implementation. The example used is from Wikipedia @ here. Extention to other algorithms that suitable for Kernel method, such as CCA, ICA, LDA, etc, is straightforward.

Reference:
[1]. Zhu, Mu, "Kernels and Ensembles: Perspectives on Statistical Learning", The American Statistician. May 1, 2008, 62(2): 97-109

Figure: Result



SAS demo code:


/* Demonstrate kernel PCA. Kernel method can be applied to any algorithm that 
   relies on inner product */
data original;
     do ID=-314 to 314;
        x1=sin(ID/100)*1+rannor(8976565)*0.1; 
        x2=cos(ID/100)*1+rannor(92654782)*0.1; 
        Class=1; output;
        x1=sin(ID/100)*2+rannor(8976565)*0.1;
        x2=cos(ID/100)*2+rannor(92654782)*0.1; 
        Class=2; output;
        x1=sin(ID/100)*3+rannor(8976565)*0.1; 
        x2=cos(ID/100)*3+rannor(92654782)*0.1; 
        Class=3; output;
     end;
run;

/*------------ Linear PCA -----------*/
proc princomp data=original  standard noint cov 
              outstat=lin_stat(where=(_TYPE_ = 'USCORE'))
              noprint;
        var x1 x2;
run;

data lin_stat_score;
     set lin_stat;
     _TYPE_= 'PARMS';
run;

proc score data=original   score=lin_stat_score  type=parms   out=lin_pca;
     var x1 x2;
run;

/*--------- Kernel PCA 1.0 ------------*/
/* Inner product based kernel PCA */
proc transpose data=original(drop=ID  Class) out=original_t; run;

proc corr data=original_t 
          outp=inner(where=(_TYPE_='SSCP' & substr(_NAME_, 1, 3)='COL') 
                     drop=intercept)  
          sscp noprint;
     var col:;
run;

/* apply kernel functon to inner product matrix. Here we use (X'Y+1)^2, the 
   one used in wikipedia.org example. */
data inner;
     set inner;
     array _C{*} _numeric_;
     _TYPE_='COV';
     do j=1 to dim(_C); _C[j]=(_C[j]+1)**2; end;
     drop j;
run;

proc princomp data=inner    noint  cov  standard
              outstat=k_stat(where=(_TYPE_ in ('EIGENVAL', 'USCORE')) )
              noprint;
     var col:;
run;

data S(drop=_NAME_)  score;
     set k_stat;
     if _TYPE_='EIGENVAL' then output S; else output score;
     drop _TYPE_;
run;

data _null_;
     set S;
     array _S{*} _numeric_;
     do j=1 to dim(_S);
        if _S[j]<0.5*constant('MACEPS') then do;
          call symput('maxsingval', j-1); stop;
        end;
     end;
run;
%put &maxsingval;

data score; 
     set score; if _n_>&maxsingval then delete; retain _TYPE_ 'PARMS';  
run;

proc score data=inner   score=score  type=parms  out=k_U(keep=Prin:);
        var col:;
run;

data k_U;
     set original(keep=ID Class  x1 x2)  ;
     set k_U;
run;

/*
proc gplot data=k_U;
     plot Prin1*Prin2=Class;
run;quit;
*/

/*--------- Kernel PCA 2.0------------*/
/* Frobenius norm based kernel PCA */
proc transpose data=original(drop=ID  Class) out=original_t; run;

proc corr data=original_t  
          outp=inner2(where=(_TYPE_='SSCP' & substr(_NAME_, 1, 3)='COL')  
                      drop=intercept)  
          sscp noprint;
     var col:;
run;

proc means data=original_t noprint ;
         var col:;
         output out=_uss2(drop=_TYPE_    _FREQ_)    uss= /autoname;
run;

proc transpose data=_uss2  out=_uss2t; run;

proc sql noprint;
          select nobs into :NCOLS from sashelp.vtable
          where libname='WORK'
              and memtype='DATA'
              and memname='_USS2T'
              ;
quit;
%let ncols=%sysfunc(compress(&NCOLS));
%put &ncols;

/* apply kernel functon to inner product matrix. Here we use Gaussian kernel function. */
%let sigma=1;
data inner2;
          array _X{&ncols} COL1-COL&ncols;
          array _USS0{&ncols}  _temporary_;
          if _n_=1 then do;
             do j=1 to &ncols;
                  set  _uss2t(keep=COL1)  point=j;
                  _USS0[j]=COL1;
             end;
          end;
          set inner2;
          _TYPE_='COV';
          do j=1 to &ncols;
              _X[j]=_USS0[_n_] + _USS0[j] - 2*_X[j];
              _X[j]=exp(-0.5*_X[j]/σ);
          end;
          keep _TYPE_  _NAME_ COL1-COL&ncols;
run;


proc princomp data=inner2    noint  cov  standard
              outstat=k_stat2(where=(_TYPE_ in ('EIGENVAL', 'USCORE')) )
              noprint;
     var col:;
run;

data S2(drop=_NAME_)  score2;
     set k_stat2;
     if _TYPE_='EIGENVAL' then output S2; else output score2;
     drop _TYPE_;
run;

data _null_;
     set S2;
     array _S{*} _numeric_;
     do j=1 to dim(_S);
        if _S[j]<0.5*constant('MACEPS') then do;
          call symput('maxsingval', j-1); stop;
        end;
     end;
run;
%put &maxsingval;

data score2; 
     set score2; if _n_>&maxsingval then delete; retain _TYPE_ 'PARMS';  
run;

proc score data=inner2   score=score2  type=parms  out=k_U2(keep=Prin:);
        var col:;
run;

data k_U2;
     set original(keep=ID Class  x1 x2)  ;
     set k_U2;
run;

/*@ Draw figure using R @*/

%macro RScript(Rscript);
data _null_;
     file "&Rscript";
     infile cards ;
     input;
     put _infile_;
%mend;

 
%macro CallR(Rscript, Rlog);
systask command "D:\Progra~1\Analyt~1\R\bin\R.exe CMD BATCH --vanilla --quiet 
                 &Rscript  &Rlog"   taskname=rjob1  wait  status=rjobstatus1;
%mend;

options nosource;
proc export data=original   outfile="c:\o.csv"  dbms=csv; run;
proc export data=lin_PCA    outfile="c:\a.csv"  dbms=csv; run;
proc export data=k_U       outfile="c:\b.csv"  dbms=csv; run;
proc export data=k_U2     outfile="c:\c.csv"   dbms=csv; run;
options source;

%RScript(c:\rscript.r)
cards;
odsn <- read.csv('c:/o.csv', header=T)
adsn <- read.csv('c:/a.csv', header=T)
bdsn <- read.csv('c:/b.csv', header=T)
cdsn <- read.csv('c:/c.csv', header=T)
png(file='c:/figure.png')
par(mfcol=c(2,2), mar=c(4,4,3,2))
plot(x1~x2, data=odsn, col=Class, main='Original')
plot(Prin1~Prin2, data=adsn, col=Class, main='Linear PCA')
plot(Prin1~Prin2, data=bdsn, col=Class, main='Polynomial Kernel')
plot(Prin1~Prin2, data=cdsn, col=Class, main='Gauss Kernal')
dev.off()
;
run;

%CallR(c:\rscript.r, c:\rlog1.txt);

Further Reading:
Chapter 1 & 2 of
Principal Manifolds for Data Visualization and Dimension Reduction by A. Gorban, B. Kegl, D. Wunsch, A. Zinovyev (Eds.) Lecture Notes in Computational Science and Engineering, Springer 2007

Principal Manifolds for Data Visualization and Dimension Reduction (Lecture Notes in Computational Science and Engineering)
 Posted by at 11:42 上午