Wednesday, 5 December 2018

testDNN analysis 8

% trainDBN: training the Deep Belief Nets (DBN) model by back projection algorithm
%
% [dbn rmse] = trainDBN( dbn, IN, OUT, opts)
%
%
%Output parameters:
% dbn: the trained Deep Belief Nets (DBN) model
% rmse: the rmse between the teaching data and the estimates
%
%
%Input parameters:
% dbn: the initial Deep Belief Nets (DBN) model
% IN: visible (input) variables, where # of row is number of data and # of col is # of visible (input) nodes % 1000 x 32
% OUT: teaching hidden (output) variables, where # of row is number of data and # of col is # of hidden (output) nodes % 1000 x 4
% opts (optional): options
%
% options (defualt value):
%  opts.LayerNum: # of tarining RBMs counted from output layer (all layer)
%  opts.MaxIter: Maxium iteration number (100)
%  opts.InitialMomentum: Initial momentum until InitialMomentumIter (0.5)
%  opts.InitialMomentumIter: Iteration number for initial momentum (5)
%  opts.FinalMomentum: Final momentum after InitialMomentumIter (0.9)
%  opts.WeightCost: Weight cost (0.0002)
%  opts.DropOutRate: List of Dropout rates for each layer (0)
%  opts.StepRatio: Learning step size (0.01)
%  opts.BatchSize: # of mini-batch data (# of all data)
%  opts.Object: specify the object function ('Square')
%              'Square'
%              'CrossEntorpy'
%  opts.Verbose: verbose or not (false)
%
%
%Example:
% datanum = 1024;
% outputnum = 16;
% hiddennum = 8;
% inputnum = 4;
%
% inputdata = rand(datanum, inputnum);
% outputdata = rand(datanum, outputnum);
%
% dbn = randDBN([inputnum, hiddennum, outputnum]);
% dbn = pretrainDBN( dbn, inputdata );
% dbn = SetLinearMapping( dbn, inputdata, outputdata );
% dbn = trainDBN( dbn, inputdata, outputdata );
%
% estimate = v2h( dbn, inputdata );
%
%
%Reference:
%for details of the dropout
% Hinton et al, Improving neural networks by preventing co-adaptation of feature detectors, 2012.
%
%
%Version: 20131204

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Deep Neural Network:                                     %
%                                                          %
% Copyright (C) 2013 Masayuki Tanaka. All rights reserved. %
%                    mtanaka@ctrl.titech.ac.jp             %
%                                                          %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [dbn rmse] = trainDBN( dbn, IN, OUT, opts)

% Important parameters
InitialMomentum = 0.5;     % momentum for first five iterations
FinalMomentum = 0.9;       % momentum for remaining iterations
WeightCost = 0.0002;       % costs of weight update
InitialMomentumIter = 5;

MaxIter = 100;
StepRatio = 0.01;
BatchSize = 0;
Verbose = false;

Layer = 0;
strbm = 1;

nrbm = numel( dbn.rbm ); % 3
DropOutRate = zeros(nrbm,1); % 3 x 1

OBJECTSQUARE = 1;
OBJECTCROSSENTROPY = 2;
Object = OBJECTSQUARE; % 1

if( exist('opts' ) )
 if( isfield(opts,'MaxIter') )
  MaxIter = opts.MaxIter;
 end
 if( isfield(opts,'InitialMomentum') )
  InitialMomentum = opts.InitialMomentum;
 end
 if( isfield(opts,'InitialMomentumIter') )
  InitialMomentumIter = opts.InitialMomentumIter;
 end
 if( isfield(opts,'FinalMomentum') )
  FinalMomentum = opts.FinalMomentum;
 end
 if( isfield(opts,'WeightCost') )
  WeightCost = opts.WeightCost;
 end
 if( isfield(opts,'DropOutRate') )
  DropOutRate = opts.DropOutRate;
  if( numel(DropOutRate) == 1 )
      DropOutRate = ones(nrbm,1) * DropOutRate;
  end
 end
 if( isfield(opts,'StepRatio') )
  StepRatio = opts.StepRatio;
 end
 if( isfield(opts,'BatchSize') )
  BatchSize = opts.BatchSize;
 end
 if( isfield(opts,'Verbose') )
  Verbose = opts.Verbose;
 end
 if( isfield(opts,'Layer') )
  Layer = opts.Layer;
 end
 if( isfield(opts,'Object') )
  if( strcmpi( opts.object, 'Square' ) )
   Object = OBJECTSQUARE;
  elseif( strcmpi( opts.object, 'CrossEntropy' ) )
   Object = OBJECTCROSSENTROPY;
  end
 end
end

num = size(IN,1); % 1000
if( BatchSize <= 0 ) % 250
  BatchSize = num;
end

if( Layer > 0 ) % 0  <- strbm="1</p">    strbm = nrbm - Layer + 1;
end

deltaDbn = dbn;
for n=strbm:nrbm % 1:3
    deltaDbn.rbm{n}.W = zeros(size(dbn.rbm{n}.W)); % 32 x 16, 16 x 8, 8 x 4
    deltaDbn.rbm{n}.b = zeros(size(dbn.rbm{n}.b)); % 1x16, 1 x 8, 1 x 4
end

if( Layer > 0 ) % 0
    strbm = nrbm - Layer + 1;
end

if( sum(DropOutRate > 0) ) % 1 x 1
    OnInd = GetOnInd( dbn, DropOutRate, strbm ); % 3 x 1 {16 x 32, 8 x 16, 4 x 8}
    for n=max([2,strbm]):nrbm % 2:3
        dbn.rbm{n}.W = dbn.rbm{n}.W / numel(OnInd{n-1}) * size(dbn.rbm{n-1}.W,2);
% 16 x 8                    / 512(=16*32) * 16(32 x 16)
% 8 x 4 = (dbn.rbm{n}.W(8 x 4) / 128(=8*16)) * 8(16 x 8)
    end
end

for iter=1:MaxIter % 1:20
     
    % Set momentum
if( iter <= InitialMomentumIter )
momentum = InitialMomentum; % 0.5
else
momentum = FinalMomentum; % 0.9
    end
   
ind = randperm(num); % 1 x 1000 num=1000 random numbers with values upto 1000
for batch=1:BatchSize:num % 1:250:1000   1 251 501 751
bind = ind(batch:min([batch + BatchSize - 1, num])); % 1 x 250 <-- -="" ind="" min="">ind(1:250)   batch==1
% extract num of Batchsize items or if less upto the upper limit num
        % bind(1:10)= 166 919 119 895 754 24 500 318 249 659
        trainDBN = dbn;
        if( DropOutRate > 0 )
    % 1 x 1 ->  trainDBN.rbm{i}(16 x 32, 8 x 16, 4 x 8)
            [trainDBN OnInd] = GetDroppedDBN( trainDBN, DropOutRate, strbm );
% trainDBN.rbm{i}(512 x 128(n=1), 128 x 32(n=2), 32 x 4(n=3))

            Hall = v2hall( trainDBN, IN(bind,OnInd{1}) );
% IN(1000 x 32) bind(1 x 250) OnInd(16 x 32)
% size(IN(bind,OnInd{1}))-> 250 x 512
% Hall(1->250 x 128, 2->250 x 32, 3->250 x 4)
%  size(IN(bind,OnInd{1}))->(250 x 512)  * trainDBN(512 x 128) * = Hall->250 x 128, 250 x 32, 250 x 4(n=3)
else
             Hall = v2hall( trainDBN, IN(bind,:) ); % IN(1000 x 32) bind(1 x 250)
        end
   
        for n=nrbm:-1:strbm % 3 : -1 : 1
            derSgm = Hall{n} .* ( 1 - Hall{n} ); %derivative sigmoid 250 x 4(n=3), 250 x 32(n=2), 250 x 1288(n=1)
            if( n+1 > nrbm )
                der = ( Hall{nrbm} - OUT(bind,:) );  %der(250 x 4) Hall{nrbm}(250 x 4) 250 x 4 = OUT(1000 x 4) (bind(1 x 250),:)
                if( Object == OBJECTSQUARE )
                    der = derSgm .* der; % 250 x 4(n=3)
                end
            else
                der = derSgm .* ( der * trainDBN.rbm{n+1}.W' );  % (n=1)(250 x 128 * 32 x 128'), 250 x 32( 250 x 4 * 4 x 32 )(n=2)
            end
           
            if( n-1 > 0 )
                in = Hall{n-1}; % (n=3)250 x 32, (n=2)250 x 128, (n=1)250 x 512, 
            else
                if( DropOutRate > 0 )
                    in = IN(bind,OnInd{1}); %  IN(1000 x 32) bind(1 x 250) OnInd{1}(16 x 32) = 250 x 512(n=3,n=2,n=1)
                else
                    in = IN(bind,:); % 250 x 32(n=3), 250 x 128(n=2), 250 x 512(n=1)
                end
            end
           
            in = cat(2, ones(numel(bind),1), in); %(n=3)250 x 33,(n=2)250 x 129, ones(numel(bind),1)(250 x 1)(250 x 512) = 250 x 513
            deltaWb = in' * der / numel(bind);
% (n=3)33 x 4=250 x 33' * 250 x 4 ,
% 129 x 32 = 250 x 129' * 250 x 32,
% 513 x 128 =250 x 513' * 250 x 128  / 250
            deltab = deltaWb(1,:); % 1 x 4(n=3), 1 x 32, 1 x 128
            deltaW = deltaWb(2:end,:); % 32 x 4(n=3), 128 x 32(n=2), 512 x 128(n=1)
           
            if( strcmpi( dbn.rbm{n}.type, 'GBRBM' ) ) % BBRBM   Gaussian-Binary Restricted Bolztmann
deltaW = bsxfun( @rdivide, deltaW, trainDBN.rbm{n}.sig' );
            end
           
            deltaDbn.rbm{n}.W = momentum * deltaDbn.rbm{n}.W; % 0.5 * 32 x 16(n=1), 16 x 8(n=2), 8 x 4(n=3)
            deltaDbn.rbm{n}.b = momentum * deltaDbn.rbm{n}.b; % 0.5 * 1x16(n=1), 1 x 8(n=2), 1 x 4(n=3)
             
            if( DropOutRate > 0 ) %(0.5, 0.5, 0.5)'
                if( n == nrbm )
                    deltaDbn.rbm{n}.W(OnInd{n},:) = deltaDbn.rbm{n}.W(OnInd{n},:) - StepRatio * deltaW;
% 32 x 4                        32 x 4                           32 x 4
                    deltaDbn.rbm{n}.b = deltaDbn.rbm{n}.b - StepRatio * deltab; % 1 x 4
                else
                    deltaDbn.rbm{n}.W(OnInd{n},OnInd{n+1}) = deltaDbn.rbm{n}.W(OnInd{n},OnInd{n+1}) - StepRatio * deltaW;
% (n=2)128 x 32=(8 x 16,4 x 8) (n=1)512 x 128=(32x16(16x32,8x16))      (n=2)128 x 32 (n=1)512 x 128
                    deltaDbn.rbm{n}.b(1,OnInd{n+1}) = deltaDbn.rbm{n}.b(1,OnInd{n+1}) - StepRatio * deltab;
% 1 x 32(n=2) (n=1)1 x 128(onInd{n+1}=8 x 16)
                end
            else
                deltaDbn.rbm{n}.W = deltaDbn.rbm{n}.W - StepRatio * deltaW;
                deltaDbn.rbm{n}.b = deltaDbn.rbm{n}.b - StepRatio * deltab;
            end
keyboard()
        end
           
        for n=strbm:nrbm           
            dbn.rbm{n}.W = dbn.rbm{n}.W + deltaDbn.rbm{n}.W;
            dbn.rbm{n}.b = dbn.rbm{n}.b + deltaDbn.rbm{n}.b; 
        end
    end
   
    if( Verbose )
        tdbn = dbn;
        if( sum(DropOutRate > 0) )
            OnInd = GetOnInd( tdbn, DropOutRate, strbm );
            for n=max([2,strbm]):nrbm
                tdbn.rbm{n}.W = tdbn.rbm{n}.W * numel(OnInd{n-1}) / size(tdbn.rbm{n-1}.W,2);
            end
        end
        out = v2h( tdbn, IN );
        err = power( OUT - out, 2 );
        rmse = sqrt( sum(err(:)) / numel(err) );
        fprintf( '%3d : %9.4f\n', iter, rmse );
    end
end

if( sum(DropOutRate > 0) )
    OnInd = GetOnInd( dbn, DropOutRate, strbm );
    for n=max([2,strbm]):nrbm
        dbn.rbm{n}.W = dbn.rbm{n}.W * numel(OnInd{n-1}) / size(dbn.rbm{n-1}.W,2);
    end
end