%
% [dbn rmse] = trainDBN( dbn, IN, OUT, opts)
%
%
%Output parameters:
% dbn: the trained Deep Belief Nets (DBN) model
% rmse: the rmse between the teaching data and the estimates
%
%
%Input parameters:
% dbn: the initial Deep Belief Nets (DBN) model
% IN: visible (input) variables, where # of row is number of data and # of col is # of visible (input) nodes % 1000 x 32
% OUT: teaching hidden (output) variables, where # of row is number of data and # of col is # of hidden (output) nodes % 1000 x 4
% opts (optional): options
%
% options (defualt value):
% opts.LayerNum: # of tarining RBMs counted from output layer (all layer)
% opts.MaxIter: Maxium iteration number (100)
% opts.InitialMomentum: Initial momentum until InitialMomentumIter (0.5)
% opts.InitialMomentumIter: Iteration number for initial momentum (5)
% opts.FinalMomentum: Final momentum after InitialMomentumIter (0.9)
% opts.WeightCost: Weight cost (0.0002)
% opts.DropOutRate: List of Dropout rates for each layer (0)
% opts.StepRatio: Learning step size (0.01)
% opts.BatchSize: # of mini-batch data (# of all data)
% opts.Object: specify the object function ('Square')
% 'Square'
% 'CrossEntorpy'
% opts.Verbose: verbose or not (false)
%
%
%Example:
% datanum = 1024;
% outputnum = 16;
% hiddennum = 8;
% inputnum = 4;
%
% inputdata = rand(datanum, inputnum);
% outputdata = rand(datanum, outputnum);
%
% dbn = randDBN([inputnum, hiddennum, outputnum]);
% dbn = pretrainDBN( dbn, inputdata );
% dbn = SetLinearMapping( dbn, inputdata, outputdata );
% dbn = trainDBN( dbn, inputdata, outputdata );
%
% estimate = v2h( dbn, inputdata );
%
%
%Reference:
%for details of the dropout
% Hinton et al, Improving neural networks by preventing co-adaptation of feature detectors, 2012.
%
%
%Version: 20131204
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Deep Neural Network: %
% %
% Copyright (C) 2013 Masayuki Tanaka. All rights reserved. %
% mtanaka@ctrl.titech.ac.jp %
% %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [dbn rmse] = trainDBN( dbn, IN, OUT, opts)
% Important parameters
InitialMomentum = 0.5; % momentum for first five iterations
FinalMomentum = 0.9; % momentum for remaining iterations
WeightCost = 0.0002; % costs of weight update
InitialMomentumIter = 5;
MaxIter = 100;
StepRatio = 0.01;
BatchSize = 0;
Verbose = false;
Layer = 0;
strbm = 1;
nrbm = numel( dbn.rbm ); % 3
DropOutRate = zeros(nrbm,1); % 3 x 1
OBJECTSQUARE = 1;
OBJECTCROSSENTROPY = 2;
Object = OBJECTSQUARE; % 1
if( exist('opts' ) )
if( isfield(opts,'MaxIter') )
MaxIter = opts.MaxIter;
end
if( isfield(opts,'InitialMomentum') )
InitialMomentum = opts.InitialMomentum;
end
if( isfield(opts,'InitialMomentumIter') )
InitialMomentumIter = opts.InitialMomentumIter;
end
if( isfield(opts,'FinalMomentum') )
FinalMomentum = opts.FinalMomentum;
end
if( isfield(opts,'WeightCost') )
WeightCost = opts.WeightCost;
end
if( isfield(opts,'DropOutRate') )
DropOutRate = opts.DropOutRate;
if( numel(DropOutRate) == 1 )
DropOutRate = ones(nrbm,1) * DropOutRate;
end
end
if( isfield(opts,'StepRatio') )
StepRatio = opts.StepRatio;
end
if( isfield(opts,'BatchSize') )
BatchSize = opts.BatchSize;
end
if( isfield(opts,'Verbose') )
Verbose = opts.Verbose;
end
if( isfield(opts,'Layer') )
Layer = opts.Layer;
end
if( isfield(opts,'Object') )
if( strcmpi( opts.object, 'Square' ) )
Object = OBJECTSQUARE;
elseif( strcmpi( opts.object, 'CrossEntropy' ) )
Object = OBJECTCROSSENTROPY;
end
end
end
num = size(IN,1); % 1000
if( BatchSize <= 0 ) % 250
BatchSize = num;
end
if( Layer > 0 ) % 0 <- strbm="1</p"> strbm = nrbm - Layer + 1;
end
deltaDbn = dbn;
for n=strbm:nrbm % 1:3
deltaDbn.rbm{n}.W = zeros(size(dbn.rbm{n}.W)); % 32 x 16, 16 x 8, 8 x 4
deltaDbn.rbm{n}.b = zeros(size(dbn.rbm{n}.b)); % 1x16, 1 x 8, 1 x 4
end
if( Layer > 0 ) % 0
strbm = nrbm - Layer + 1;
end
if( sum(DropOutRate > 0) ) % 1 x 1
OnInd = GetOnInd( dbn, DropOutRate, strbm ); % 3 x 1 {16 x 32, 8 x 16, 4 x 8}
for n=max([2,strbm]):nrbm % 2:3
dbn.rbm{n}.W = dbn.rbm{n}.W / numel(OnInd{n-1}) * size(dbn.rbm{n-1}.W,2);
% 16 x 8 / 512(=16*32) * 16(32 x 16)
% 8 x 4 = (dbn.rbm{n}.W(8 x 4) / 128(=8*16)) * 8(16 x 8)
end
end
for iter=1:MaxIter % 1:20
% Set momentum
if( iter <= InitialMomentumIter )
momentum = InitialMomentum; % 0.5
else
momentum = FinalMomentum; % 0.9
end
ind = randperm(num); % 1 x 1000 num=1000 random numbers with values upto 1000
for batch=1:BatchSize:num % 1:250:1000 1 251 501 751
bind = ind(batch:min([batch + BatchSize - 1, num])); % 1 x 250 <-- -="" ind="" min="">ind(1:250) batch==1-->
% extract num of Batchsize items or if less upto the upper limit num
% bind(1:10)= 166 919 119 895 754 24 500 318 249 659
trainDBN = dbn;
if( DropOutRate > 0 )
% 1 x 1 -> trainDBN.rbm{i}(16 x 32, 8 x 16, 4 x 8)
[trainDBN OnInd] = GetDroppedDBN( trainDBN, DropOutRate, strbm );
% trainDBN.rbm{i}(512 x 128(n=1), 128 x 32(n=2), 32 x 4(n=3))
Hall = v2hall( trainDBN, IN(bind,OnInd{1}) );
% IN(1000 x 32) bind(1 x 250) OnInd(16 x 32)
% size(IN(bind,OnInd{1}))-> 250 x 512
% Hall(1->250 x 128, 2->250 x 32, 3->250 x 4)
% size(IN(bind,OnInd{1}))->(250 x 512) * trainDBN(512 x 128) * = Hall->250 x 128, 250 x 32, 250 x 4(n=3)
else
Hall = v2hall( trainDBN, IN(bind,:) ); % IN(1000 x 32) bind(1 x 250)
end
for n=nrbm:-1:strbm % 3 : -1 : 1
derSgm = Hall{n} .* ( 1 - Hall{n} ); %derivative sigmoid 250 x 4(n=3), 250 x 32(n=2), 250 x 1288(n=1)
if( n+1 > nrbm )
der = ( Hall{nrbm} - OUT(bind,:) ); %der(250 x 4) Hall{nrbm}(250 x 4) 250 x 4 = OUT(1000 x 4) (bind(1 x 250),:)
if( Object == OBJECTSQUARE )
der = derSgm .* der; % 250 x 4(n=3)
end
else
der = derSgm .* ( der * trainDBN.rbm{n+1}.W' ); % (n=1)(250 x 128 * 32 x 128'), 250 x 32( 250 x 4 * 4 x 32 )(n=2)
end
if( n-1 > 0 )
in = Hall{n-1}; % (n=3)250 x 32, (n=2)250 x 128, (n=1)250 x 512,
else
if( DropOutRate > 0 )
in = IN(bind,OnInd{1}); % IN(1000 x 32) bind(1 x 250) OnInd{1}(16 x 32) = 250 x 512(n=3,n=2,n=1)
else
in = IN(bind,:); % 250 x 32(n=3), 250 x 128(n=2), 250 x 512(n=1)
end
end
in = cat(2, ones(numel(bind),1), in); %(n=3)250 x 33,(n=2)250 x 129, ones(numel(bind),1)(250 x 1)(250 x 512) = 250 x 513
deltaWb = in' * der / numel(bind);
% (n=3)33 x 4=250 x 33' * 250 x 4 ,
% 129 x 32 = 250 x 129' * 250 x 32,
% 513 x 128 =250 x 513' * 250 x 128 / 250
deltab = deltaWb(1,:); % 1 x 4(n=3), 1 x 32, 1 x 128
deltaW = deltaWb(2:end,:); % 32 x 4(n=3), 128 x 32(n=2), 512 x 128(n=1)
if( strcmpi( dbn.rbm{n}.type, 'GBRBM' ) ) % BBRBM Gaussian-Binary Restricted Bolztmann
deltaW = bsxfun( @rdivide, deltaW, trainDBN.rbm{n}.sig' );
end
deltaDbn.rbm{n}.W = momentum * deltaDbn.rbm{n}.W; % 0.5 * 32 x 16(n=1), 16 x 8(n=2), 8 x 4(n=3)
deltaDbn.rbm{n}.b = momentum * deltaDbn.rbm{n}.b; % 0.5 * 1x16(n=1), 1 x 8(n=2), 1 x 4(n=3)
if( DropOutRate > 0 ) %(0.5, 0.5, 0.5)'
if( n == nrbm )
deltaDbn.rbm{n}.W(OnInd{n},:) = deltaDbn.rbm{n}.W(OnInd{n},:) - StepRatio * deltaW;
% 32 x 4 32 x 4 32 x 4
deltaDbn.rbm{n}.b = deltaDbn.rbm{n}.b - StepRatio * deltab; % 1 x 4
else
deltaDbn.rbm{n}.W(OnInd{n},OnInd{n+1}) = deltaDbn.rbm{n}.W(OnInd{n},OnInd{n+1}) - StepRatio * deltaW;
% (n=2)128 x 32=(8 x 16,4 x 8) (n=1)512 x 128=(32x16(16x32,8x16)) (n=2)128 x 32 (n=1)512 x 128
deltaDbn.rbm{n}.b(1,OnInd{n+1}) = deltaDbn.rbm{n}.b(1,OnInd{n+1}) - StepRatio * deltab;
% 1 x 32(n=2) (n=1)1 x 128(onInd{n+1}=8 x 16)
end
else
deltaDbn.rbm{n}.W = deltaDbn.rbm{n}.W - StepRatio * deltaW;
deltaDbn.rbm{n}.b = deltaDbn.rbm{n}.b - StepRatio * deltab;
end
keyboard()
end
for n=strbm:nrbm
dbn.rbm{n}.W = dbn.rbm{n}.W + deltaDbn.rbm{n}.W;
dbn.rbm{n}.b = dbn.rbm{n}.b + deltaDbn.rbm{n}.b;
end
end
if( Verbose )
tdbn = dbn;
if( sum(DropOutRate > 0) )
OnInd = GetOnInd( tdbn, DropOutRate, strbm );
for n=max([2,strbm]):nrbm
tdbn.rbm{n}.W = tdbn.rbm{n}.W * numel(OnInd{n-1}) / size(tdbn.rbm{n-1}.W,2);
end
end
out = v2h( tdbn, IN );
err = power( OUT - out, 2 );
rmse = sqrt( sum(err(:)) / numel(err) );
fprintf( '%3d : %9.4f\n', iter, rmse );
end
end
if( sum(DropOutRate > 0) )
OnInd = GetOnInd( dbn, DropOutRate, strbm );
for n=max([2,strbm]):nrbm
dbn.rbm{n}.W = dbn.rbm{n}.W * numel(OnInd{n-1}) / size(dbn.rbm{n-1}.W,2);
end
end