Sunday 28 October 2018

mnistdeepauto analysis 8

%CG_MNIST.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

function [f, df] = CG_MNIST(VV,Dim,XX);

l1 = Dim(1); % 784
l2 = Dim(2); % 1000
l3 = Dim(3); % 500
l4= Dim(4); % 250
l5= Dim(5); % 30
l6= Dim(6); % 250
l7= Dim(7); % 500
l8= Dim(8); % 1000
l9= Dim(9); % 784
N = size(XX,1); % 1000

% Do decomversion.
 w1 = reshape(VV(1:(l1+1)*l2),l1+1,l2); % 785x1000
 xxx = (l1+1)*l2; % 785000
 w2 = reshape(VV(xxx+1:xxx+(l2+1)*l3),l2+1,l3); % 1001x500
 xxx = xxx+(l2+1)*l3; % 1285500
 w3 = reshape(VV(xxx+1:xxx+(l3+1)*l4),l3+1,l4); % 501x250
 xxx = xxx+(l3+1)*l4;
 w4 = reshape(VV(xxx+1:xxx+(l4+1)*l5),l4+1,l5); % 251x30
 xxx = xxx+(l4+1)*l5;
 w5 = reshape(VV(xxx+1:xxx+(l5+1)*l6),l5+1,l6); % 31x250
 xxx = xxx+(l5+1)*l6;
 w6 = reshape(VV(xxx+1:xxx+(l6+1)*l7),l6+1,l7); % 251x500
 xxx = xxx+(l6+1)*l7;
 w7 = reshape(VV(xxx+1:xxx+(l7+1)*l8),l7+1,l8); % 501x1000
 xxx = xxx+(l7+1)*l8;
 w8 = reshape(VV(xxx+1:xxx+(l8+1)*l9),l8+1,l9); % 1001x784


  XX = [XX ones(N,1)]; % 1000x785
  w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs  ones(N,1)]; % 1000x785 * 785x1000 = 1000x100->1000x1001
  w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 1000x1001 * 1001x500 = 1000x500->1000x501
  w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs  ones(N,1)]; % 1000x501 * 501x250 => 1000x251
  w4probs = w3probs*w4; w4probs = [w4probs  ones(N,1)]; % 1000x251 * 251x30 = 1000x31
  w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs  ones(N,1)]; % 1000x31 * 31x250 => 1000x251
  w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs  ones(N,1)]; % 1000x251 * 251x500 => 1000x501
  w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs  ones(N,1)]; % 1000x501 * 501x1000 => 1000x1001
  XXout = 1./(1 + exp(-w7probs*w8)); % 1000x1001 * 1001x784 = 1000x784

f = -1/N*sum(sum( XX(:,1:end-1).*log(XXout) + (1-XX(:,1:end-1)).*log(1-XXout))); % 1000x785-1 .* 1000x784 --> 1x1
IO = 1/N*(XXout-XX(:,1:end-1)); % 1000x784
Ix8=IO; % 1000x784
dw8 =  w7probs'*Ix8; % 1001x1000 * 1000x784 = 1001x784

Ix7 = (Ix8*w8').*w7probs.*(1-w7probs); % 1000x784 * 784x1001 .* 1000x1001 .* 1-1000x1001 = 1000x1001
Ix7 = Ix7(:,1:end-1); % 1000x1000 
dw7 =  w6probs'*Ix7; % 501x1000 * 1000x1000 = 501x1000

Ix6 = (Ix7*w7').*w6probs.*(1-w6probs); % 1000x1000 * 1000x501 = 1000x501
Ix6 = Ix6(:,1:end-1); % 1000x500
dw6 =  w5probs'*Ix6; % 251x500

Ix5 = (Ix6*w6').*w5probs.*(1-w5probs); % 1000x500 * 500x251 =1000x251
Ix5 = Ix5(:,1:end-1); % 1000x250
dw5 =  w4probs'*Ix5; % 31x250

Ix4 = (Ix5*w5'); % 1000x250 * 250x31 = 1000x31
Ix4 = Ix4(:,1:end-1); % 1000x30
dw4 =  w3probs'*Ix4; % 251x100 * 1000x30 = 251x30

Ix3 = (Ix4*w4').*w3probs.*(1-w3probs); % 1000x30 * 30x251 .* 1000x251 = 1000x251
Ix3 = Ix3(:,1:end-1); % 1000x250
dw3 =  w2probs'*Ix3; % 501x250

Ix2 = (Ix3*w3').*w2probs.*(1-w2probs); % 1000x250 * 250x501 .*1000x501 = 1000x501
Ix2 = Ix2(:,1:end-1); % 1000x500
dw2 =  w1probs'*Ix2; % 1001x500

Ix1 = (Ix2*w2').*w1probs.*(1-w1probs); %1000x500 * 500x1001 = 1000x1001
Ix1 = Ix1(:,1:end-1); % 1000x1000
dw1 =  XX'*Ix1; % 785x1000

df = [dw1(:)' dw2(:)' dw3(:)' dw4(:)' dw5(:)' dw6(:)'  dw7(:)'  dw8(:)'  ]'; % 2837314x1


mnistdeepauto analysis 7

% mnistdisp.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

function [err] = mnistdisp(digits); % 784x30
% display a group of MNIST images
col=28;
row=28;

[dd,N] = size(digits); % 784x30
imdisp=zeros(2*28,ceil(N/2)*28); % 56, 420 pixel picture size

for nn=1:N % 1:30
  ii=rem(nn,2); if(ii==0) ii=2; end %% ii is line number 1rem1->1 2rem2=0->2 3->1 4->2
  jj=ceil(nn/2); % jj is digit sequence 1/2=1 2/2=1 3/2=2 4/2=2

  img1 = reshape(digits(:,nn),row,col); %reshape((784x1),28,28) = 28x28 nn=1..30
  img2(((ii-1)*row+1):(ii*row),((jj-1)*col+1):(jj*col))=img1'; % img2(nn=1 ->      0+1: 1*row,     0+1:1*col) = img1'
   %  img2(nn=2 -> 1*row+1: 2*row, 1*col+1:2*col) = img1'
end

imagesc(img2,[0 1]); colormap gray; axis equal; axis off;
drawnow;
err=0;

mnistdeepauto analysis 6

%backprop.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

% This program fine-tunes an autoencoder with backpropagation.
% Weights of the autoencoder are going to be saved in mnist_weights.mat
% and trainig and test reconstruction errors in mnist_error.mat
% You can also set maxepoch, default value is 200 as in our paper. 

maxepoch=1; %maxepoch=200;
fprintf(1,'\nFine-tuning deep autoencoder by minimizing cross entropy error. \n');
fprintf(1,'60 batches of 1000 cases each. \n');

load mnistvh
load mnisthp
load mnisthp2
load mnistpo

makebatches;
[numcases numdims numbatches]=size(batchdata); % 100x784x600
N=numcases;

%%%% PREINITIALIZE WEIGHTS OF THE AUTOENCODER %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
w1=[vishid; hidrecbiases]; % 784x1000 ; 1x1000 = 785x1000
w2=[hidpen; penrecbiases]; % 1000x500 ; 1x500 = 1001x500
w3=[hidpen2; penrecbiases2]; %500x250 ; 1x250 = 501x250
w4=[hidtop; toprecbiases]; % 250x30 ; 1x30 = 251x30
w5=[hidtop'; topgenbiases]; % 30x250 ; 1x250 = 31x250
w6=[hidpen2'; hidgenbiases2]; % 250x500 ; 1x500 = 251x500
w7=[hidpen'; hidgenbiases]; % 500x1000 ; 1x1000 = 501x1000
w8=[vishid'; visbiases]; % 1000x784 ; 1x784 = 1001x784

%%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

l1=size(w1,1)-1; % 784
l2=size(w2,1)-1; % 1000
l3=size(w3,1)-1; % 500
l4=size(w4,1)-1; % 250
l5=size(w5,1)-1; % 30
l6=size(w6,1)-1; % 250
l7=size(w7,1)-1; % 500
l8=size(w8,1)-1; % 1000
l9=l1;  % 784
test_err=[];
train_err=[];

for epoch = 1:maxepoch

 %%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 err=0;
 [numcases numdims numbatches]=size(batchdata); % 100x784x600
 N=numcases; % 100
 for batch = 1:numbatches
  data = [batchdata(:,:,batch)]; % 100x784
  data = [data ones(N,1)]; % 100x785 = 100x784 ++ 100x1
  w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs  ones(N,1)]; % 100x785 * 785x1000 ++ 100x1001
  w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 100x1001 * 1001x500 ++ 100x501
  w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs  ones(N,1)]; % 100x501 * 501x250 ++ 100x251
  w4probs = w3probs*w4; w4probs = [w4probs  ones(N,1)]; % 100x251 * 251x30 ++ 100x31
  w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs  ones(N,1)]; % 100x31 * 31x250 ++ 100x251
  w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs  ones(N,1)]; % 100x251 * 251x500 ++ 100x501
  w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs  ones(N,1)]; %  100x501 * 501x1000 ++ 100x1001
  dataout = 1./(1 + exp(-w7probs*w8)); % 100x1001 * 1001x784 = 100x784
  err= err +  1/N*sum(sum( (data(:,1:end-1)-dataout).^2 )); % 100x784 - 100x784
 end
 train_err(epoch)=err/numbatches;

 %%%%%%%%%%%%%% END OF COMPUTING TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 %%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%
 fprintf(1,'Displaying in figure 1: Top row - real data, Bottom row -- reconstructions \n');
 output=[];
 for ii=1:15
  output = [output data(ii,1:end-1)' dataout(ii,:)']; % 784x100 ++ 784x1
 end
   if epoch==1
   close all
   figure('Position',[100,600,1000,200]);
   else
   figure(1)
   end
   mnistdisp(output);
   drawnow;

 %%%%%%%%%%%%%%%%%%%% COMPUTE TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 [testnumcases testnumdims testnumbatches]=size(testbatchdata); % 100x784x100
 N=testnumcases; % 100
 err=0;
 for batch = 1:testnumbatches
  data = [testbatchdata(:,:,batch)];
  data = [data ones(N,1)]; % 100x785
  w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs  ones(N,1)]; %100x785 * 785x1000 -> 100x1001
  w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 100x1001 * 1001x500 -> 1001x501
  w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs  ones(N,1)]; % 100x501 * 501x250 -> 100x251
  w4probs = w3probs*w4; w4probs = [w4probs  ones(N,1)]; % 100x251 * 251x30 -> 100x31
  w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs  ones(N,1)]; % 100x31 * 31x250 -> 100x251
  w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs  ones(N,1)]; % 100x251 * 251x500 -> 100x501
  w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs  ones(N,1)]; % 100x501 * 501x1000 -> 100x1001
  dataout = 1./(1 + exp(-w7probs*w8)); % 100x1001 * 1001x784 = 100x784
  err = err +  1/N*sum(sum( (data(:,1:end-1)-dataout).^2 ));
 end
 test_err(epoch)=err/testnumbatches;
 fprintf(1,'Before epoch %d Train squared error: %6.3f Test squared error: %6.3f \t \t \n',epoch,train_err(epoch),test_err(epoch));

%%%%%%%%%%%%%% END OF COMPUTING TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  %%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%
 fprintf(1,'Displaying in figure 2: Top row - real test data, Bottom row -- reconstructions \n');
 output=[];
 for ii=1:15
  output = [output data(ii,1:end-1)' dataout(ii,:)']; % 784x100 ++ 784x1
 end
   if epoch==1
   %close all
   figure('Position',[100,600,1000,200]);
   else
   figure(1)
   end
   mnistdisp(output);
   drawnow;
   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tt=0;
 for batch = 1:numbatches/10
  fprintf(1,'epoch %d batch %d\n',epoch,batch);

  %%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  tt=tt+1;
  data=[];
  for kk=1:10
   data=[data
        batchdata(:,:,(tt-1)*10+kk)];
  end

%%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  max_iter=3;
  VV = [w1(:)' w2(:)' w3(:)' w4(:)' w5(:)' w6(:)' w7(:)' w8(:)']';
  Dim = [l1; l2; l3; l4; l5; l6; l7; l8; l9];

  [X, fX] = minimize(VV,'CG_MNIST',max_iter,Dim,data);

  w1 = reshape(X(1:(l1+1)*l2),l1+1,l2);
  xxx = (l1+1)*l2;
  w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
  xxx = xxx+(l2+1)*l3;
  w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
  xxx = xxx+(l3+1)*l4;
  w4 = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
  xxx = xxx+(l4+1)*l5;
  w5 = reshape(X(xxx+1:xxx+(l5+1)*l6),l5+1,l6);
  xxx = xxx+(l5+1)*l6;
  w6 = reshape(X(xxx+1:xxx+(l6+1)*l7),l6+1,l7);
  xxx = xxx+(l6+1)*l7;
  w7 = reshape(X(xxx+1:xxx+(l7+1)*l8),l7+1,l8);
  xxx = xxx+(l7+1)*l8;
  w8 = reshape(X(xxx+1:xxx+(l8+1)*l9),l8+1,l9);

  %%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 end

 save mnist_weights w1 w2 w3 w4 w5 w6 w7 w8
 save mnist_error test_err train_err;

end



mnistdeepauto analysis 5

% rbmhidlinear.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

% This program trains Restricted Boltzmann Machine in which
% visible, binary, stochastic pixels are connected to
% hidden, stochastic real-valued feature detectors drawn from a unit
% variance Gaussian whose mean is determined by the input from
% the logistic visible units. Learning is done with 1-step Contrastive Divergence.
% The program assumes that the following variables are set externally:
% maxepoch  -- maximum number of epochs
% numhid    -- number of hidden units
% batchdata -- the data that is divided into batches (numcases numdims numbatches)
% restart   -- set to 1 if learning starts from beginning

epsilonw      = 0.001; % Learning rate for weights
epsilonvb     = 0.001; % Learning rate for biases of visible units
epsilonhb     = 0.001; % Learning rate for biases of hidden units
weightcost  = 0.0002; 
initialmomentum  = 0.5;
finalmomentum    = 0.9;


[numcases numdims numbatches]=size(batchdata); % 100x250x600

if restart ==1,
  restart=0;
  epoch=1;

% Initializing symmetric weights and biases.
  vishid     = 0.1*randn(numdims, numhid); % 250x30 = 250,30
  hidbiases  = zeros(1,numhid); % 1x30
  visbiases  = zeros(1,numdims); % 1x250


  poshidprobs = zeros(numcases,numhid); % 100x30
  neghidprobs = zeros(numcases,numhid); % 100x30
  posprods    = zeros(numdims,numhid); % 250x30
  negprods    = zeros(numdims,numhid); % 250x30
  vishidinc  = zeros(numdims,numhid); % 250x30
  hidbiasinc = zeros(1,numhid); % 1x30
  visbiasinc = zeros(1,numdims); % 1x250
  sigmainc = zeros(1,numhid); % 1x30
  batchposhidprobs=zeros(numcases,numhid,numbatches); % 100x30x600
end

for epoch = epoch:maxepoch,
 fprintf(1,'epoch %d\n',epoch);
 errsum=0;

 for batch = 1:numbatches, % 1:600
 fprintf(1,'epoch %d batch %d\n',epoch,batch);

%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  data = batchdata(:,:,batch); %100x250 x600
  poshidprobs =  (data*vishid) + repmat(hidbiases,numcases,1); % 100x250 * 250x30 + 1x30repmat 100-> 100x30 = 100x30
  batchposhidprobs(:,:,batch)=poshidprobs; %100x30
  posprods    = data' * poshidprobs; % 250x100 * 100x30 = 250x30
  poshidact   = sum(poshidprobs); % 1x30
  posvisact = sum(data); % 1x250
 
 %%%%%%%%% END OF POSITIVE PHASE  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 poshidstates = poshidprobs+randn(numcases,numhid); % 100x30

%%%%%%%%% START NEGATIVE PHASE  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));
  neghidprobs = (negdata*vishid) + repmat(hidbiases,numcases,1);
  negprods  = negdata'*neghidprobs;
  neghidact = sum(neghidprobs);
  negvisact = sum(negdata);
keyboard()
%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


  err= sum(sum( (data-negdata).^2 ));
  errsum = err + errsum;
   if epoch>5,
     momentum=finalmomentum;
   else
     momentum=initialmomentum;
   end;

%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    vishidinc = momentum*vishidinc + ... % 250x30
                epsilonw*( (posprods-negprods)/numcases - weightcost*vishid); % 250x30
    visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact); % 1x250
    hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact); % 1x30
    vishid = vishid + vishidinc; % 250x30
    visbiases = visbiases + visbiasinc; % 1x250
    hidbiases = hidbiases + hidbiasinc; % 1x30

%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 end
fprintf(1, 'epoch %4i error %f \n', epoch, errsum);

end

mnistdeepauto analysis4

% rbm.m
% Version 1.000 
%
% Code provided by Geoff Hinton and Ruslan Salakhutdinov 
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

% This program trains Restricted Boltzmann Machine in which
% visible, binary, stochastic pixels are connected to
% hidden, binary, stochastic feature detectors using symmetrically
% weighted connections. Learning is done with 1-step Contrastive Divergence.   
% The program assumes that the following variables are set externally:
% maxepoch  -- maximum number of epochs
% numhid    -- number of hidden units 
% batchdata -- the data that is divided into batches (numcases numdims numbatches)
% restart   -- set to 1 if learning starts from beginning 

epsilonw      = 0.1;   % Learning rate for weights 
epsilonvb     = 0.1;   % Learning rate for biases of visible units 
epsilonhb     = 0.1;   % Learning rate for biases of hidden units 
weightcost  = 0.0002;   
initialmomentum  = 0.5;
finalmomentum    = 0.9;

[numcases numdims numbatches]=size(batchdata); %%100x784x600

if restart ==1,
  restart=0;
  epoch=1;

% Initializing symmetric weights and biases. 
  vishid     = 0.1*randn(numdims, numhid); %784x1000
  hidbiases  = zeros(1,numhid); % 1x1000
  visbiases  = zeros(1,numdims); % 1x784

  poshidprobs = zeros(numcases,numhid); %100x1000
  neghidprobs = zeros(numcases,numhid); %100x1000
  posprods    = zeros(numdims,numhid); %784x1000
  negprods    = zeros(numdims,numhid); %784x1000
  vishidinc  = zeros(numdims,numhid); %784x1000
  hidbiasinc = zeros(1,numhid); %1x1000
  visbiasinc = zeros(1,numdims); %1x784
  batchposhidprobs=zeros(numcases,numhid,numbatches); %100x1000x600
end

for epoch = epoch:maxepoch, % 1 : 10
 fprintf(1,'epoch %d\n',epoch); 
 errsum=0;
 for batch = 1:numbatches,
  fprintf(1,'epoch %d batch %d\n',epoch,batch);  

%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  data = batchdata(:,:,batch); % 100x784
  poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1)));    %100x784 * 784x1000 - 1x1000repmat->100x1000 = 100x1000
  batchposhidprobs(:,:,batch)=poshidprobs; %100x1000<-batch b="" nbsp="">
  posprods    = data' * poshidprobs; %784x100 * 100x1000 = 784x1000
  poshidact   = sum(poshidprobs); % 1x1000
  posvisact = sum(data); % 1x784

%%%%%%%%% END OF POSITIVE PHASE  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  poshidstates = poshidprobs > rand(numcases,numhid); %100x1000 > 100x1000 = 100 x 1000

%%%%%%%%% START NEGATIVE PHASE  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1))); % 100x1000 * 1000x784 - 1x784repmat->100x784 = 100 x 784
  neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1)));   % 100x784 * 784x1000 - 1x1000repmat->100x1000 = 100x1000
  negprods  = negdata'*neghidprobs; % 784x100 * 100x1000
  neghidact = sum(neghidprobs); % 1x1000
  negvisact = sum(negdata); % 1x784
keyboard()
%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  err= sum(sum( (data-negdata).^2 )); %1x1
  errsum = err + errsum;

   if epoch>5,
     momentum=finalmomentum;
   else
     momentum=initialmomentum;
   end;

%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    vishidinc = momentum*vishidinc + ...    % 784x1000
                epsilonw*( (posprods-negprods)/numcases - weightcost*vishid); % 784x 1000
    visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact); % 1x784 + 1x784 - 1x784
    hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact); % 1x1000 + 1x1000 - 1x1000

    vishid = vishid + vishidinc; % 784x1000 + 784x1000
    visbiases = visbiases + visbiasinc; % 1x784 + x784
    hidbiases = hidbiases + hidbiasinc; % 1x1000 + 1x1000 

%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

  end
  fprintf(1, 'epoch %4i error %6.1f  \n', epoch, errsum);

end;

Tuesday 9 October 2018

mnistdeepauto analysis 3

% makebatches.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

digitdata=[];   %  concats all digit images in digitdata array
targets=[]; % and adds 10 bits constant label info for each digit to the targets array.
load digit0; digitdata = [digitdata; D]; targets = [targets; repmat([1 0 0 0 0 0 0 0 0 0], size(D,1), 1)];  %targets(5923x10) digitdata(5923x784) D(5923x784)
load digit1; digitdata = [digitdata; D]; targets = [targets; repmat([0 1 0 0 0 0 0 0 0 0], size(D,1), 1)];  %targets(12665x10) digitdata(12665x784) D(6742x784)
load digit2; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 1 0 0 0 0 0 0 0], size(D,1), 1)];
load digit3; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 1 0 0 0 0 0 0], size(D,1), 1)];
load digit4; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 1 0 0 0 0 0], size(D,1), 1)];
load digit5; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 1 0 0 0 0], size(D,1), 1)];
load digit6; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 1 0 0 0], size(D,1), 1)];
load digit7; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 1 0 0], size(D,1), 1)];
load digit8; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 1 0], size(D,1), 1)];
load digit9; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 0 1], size(D,1), 1)];
digitdata = digitdata/255;

totnum=size(digitdata,1); %60000 x 784
fprintf(1, 'Size of the training dataset= %5d \n', totnum);

rand('state',0); %so we know the permutation of the training data
randomorder=randperm(totnum); %random order sequence holder of 60000 digits

numbatches=totnum/100;
numdims  =  size(digitdata,2);  % 784
batchsize = 100;
batchdata = zeros(batchsize, numdims, numbatches); %100x784x600
batchtargets = zeros(batchsize, 10, numbatches); %100x10x600

for b=1:numbatches %600
  batchdata(:,:,b) = digitdata(randomorder(1+(b-1)*batchsize:b*batchsize), :); %100x784x600 batchdata is created according to random order indexes
  batchtargets(:,:,b) = targets(randomorder(1+(b-1)*batchsize:b*batchsize), :); %100x10x600 similar to batchdata
end;

clear digitdata targets;

digitdata=[];
targets=[];

load test0; digitdata = [digitdata; D]; targets = [targets; repmat([1 0 0 0 0 0 0 0 0 0], size(D,1), 1)];
load test1; digitdata = [digitdata; D]; targets = [targets; repmat([0 1 0 0 0 0 0 0 0 0], size(D,1), 1)];
load test2; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 1 0 0 0 0 0 0 0], size(D,1), 1)];
load test3; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 1 0 0 0 0 0 0], size(D,1), 1)];
load test4; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 1 0 0 0 0 0], size(D,1), 1)];
load test5; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 1 0 0 0 0], size(D,1), 1)];
load test6; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 1 0 0 0], size(D,1), 1)];
load test7; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 1 0 0], size(D,1), 1)];
load test8; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 1 0], size(D,1), 1)];
load test9; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 0 1], size(D,1), 1)];
%}
digitdata = digitdata/255;  % 10000,784

totnum=size(digitdata,1); % 10000
%totnum=10000;
fprintf(1, 'Size of the test dataset= %5d \n', totnum);

rand('state',0); %so we know the permutation of the training data
randomorder=randperm(totnum);

numbatches=totnum/100; % 100
%numbatches=100;
numdims  =  size(digitdata,2);  % 784
%mumdims=784;
batchsize = 100;
testbatchdata = zeros(batchsize, numdims, numbatches); % 100, 784, 100
testbatchtargets = zeros(batchsize, 10, numbatches); % 100, 10, 100

for b=1:numbatches
  testbatchdata(:,:,b) = digitdata(randomorder(1+(b-1)*batchsize:b*batchsize), :);
  testbatchtargets(:,:,b) = targets(randomorder(1+(b-1)*batchsize:b*batchsize), :);
end;


clear digitdata targets;


%%% Reset random seeds
rand('state',sum(100*clock));
randn('state',sum(100*clock));



mnistdeepauto analysis 2

% converter.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

% This program reads raw MNIST files available at
% http://yann.lecun.com/exdb/mnist/
% and converts them to files in matlab format
% Before using this program you first need to download files:
% train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
% t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
% and gunzip them. You need to allocate some space for this.

% This program was originally written by Yee Whye Teh

% Work with test files first
fprintf(1,'You first need to download files:\n train-images-idx3-ubyte.gz\n train-labels-idx1-ubyte.gz\n t10k-images-idx3-ubyte.gz\n t10k-labels-idx1-ubyte.gz\n from http://yann.lecun.com/exdb/mnist/\n and gunzip them \n');

f = fopen('t10k-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');
%{
count=4
octave:6> a
a =

    50855936
   270991360
   469762048
   469762048
%} 
g = fopen('t10k-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');
%{
count=2
octave:16> g
g =  5
octave:17> l
l =

    17301504
   270991360
%}
 
fprintf(1,'Starting to convert Test MNIST images (prints 10 dots) \n');
n = 1000;

Df = cell(1,10);
for d=0:9,
  Df{d+1} = fopen(['test' num2str(d) '.ascii'],'w');   %test1.ascii test2.ascii usw
end;
 
for i=1:10,   % read number i 's 1000 raw labels and 784 pixel 1000 raw images , total of 10000 numbers
  fprintf('.');
  rawimages = fread(f,28*28*n,'uchar');  %images
  rawlabels = fread(g,n,'uchar'); %labels
  rawimages = reshape(rawimages,28*28,n);  % 784 x 1000

  for j=1:n,  % write 1000 images and labels of the current number
    fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j)); %fprintf to the Df recorded file pointer according to the raw label value,  number j's images.
fprintf(Df{rawlabels(j)+1},'\n'); % each number read is written to the corresponding testX.ascii file. 1000 cases and 784 pixels each
  end;
end;

%fprintf(1,'\n');
for d=0:9,  %convert ascii to mat file and print its length
  fclose(Df{d+1});
  D = load(['test' num2str(d) '.ascii'],'-ascii'); %load each file dedicated to a number into D 1x768320
  fprintf('%5d Digits of class %d\n',size(D,1),d); %print to screen size 1st column of D
  save(['test' num2str(d) '.mat'],'D','-mat'); %save D to testX.mat
end;


% Work with trainig files second
f = fopen('train-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');

g = fopen('train-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');

fprintf(1,'Starting to convert Training MNIST images (prints 60 dots)\n');
n = 1000;

Df = cell(1,10);
for d=0:9,
  Df{d+1} = fopen(['digit' num2str(d) '.ascii'],'w');
end;

for i=1:60,
  fprintf('.');
  rawimages = fread(f,28*28*n,'uchar');
  rawlabels = fread(g,n,'uchar');
  rawimages = reshape(rawimages,28*28,n);

  for j=1:n,
    fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
    fprintf(Df{rawlabels(j)+1},'\n');
  end;
end;

fprintf(1,'\n');
for d=0:9,
  fclose(Df{d+1});
  D = load(['digit' num2str(d) '.ascii'],'-ascii');  %5949 x 784 last one
  fprintf('%5d Digits of class %d\n',size(D,1),d); % 5949 Digits of class 9
  save(['digit' num2str(d) '.mat'],'D','-mat');
end;

%dos('rm *.ascii');
dos('erase *.ascii');

mnistdeepauto analysis 1

%mnistdeepauto.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.


% This program pretrains a deep autoencoder for MNIST dataset
% You can set the maximum number of epochs for pretraining each layer
% and you can set the architecture of the multilayer net.

clear all
close all

maxepoch=1; %In the Science paper we use maxepoch=50, but it works just fine. maxepoch=10
numhid=1000; numpen=500; numpen2=250; numopen=30;

fprintf(1,'Converting Raw files into Matlab format \n');
converter;

fprintf(1,'Pretraining a deep autoencoder. \n');
fprintf(1,'The Science paper used 50 epochs. This uses %3i \n', maxepoch);

makebatches;
[numcases numdims numbatches]=size(batchdata); %100x784x600

fprintf(1,'Pretraining Layer 1 with RBM: %d-%d \n',numdims,numhid); %784, 1000
restart=1;
rbm;
hidrecbiases=hidbiases; % 1x1000
save mnistvh vishid hidrecbiases visbiases;  %784x1000 1x1000 1x784

fprintf(1,'\nPretraining Layer 2 with RBM: %d-%d \n',numhid,numpen); % 1000, 500
batchdata=batchposhidprobs; %100x1000x600
numhid=numpen; % 500
restart=1;
rbm;
hidpen=vishid; penrecbiases=hidbiases; hidgenbiases=visbiases;
save mnisthp hidpen penrecbiases hidgenbiases; % 1000x500 1x500 1x1000

fprintf(1,'\nPretraining Layer 3 with RBM: %d-%d \n',numpen,numpen2); % 500, 250
batchdata=batchposhidprobs; %100x500x600
numhid=numpen2; % 250
restart=1;
rbm;
hidpen2=vishid; penrecbiases2=hidbiases; hidgenbiases2=visbiases;
save mnisthp2 hidpen2 penrecbiases2 hidgenbiases2; % 500x250 1x250 1x500

fprintf(1,'\nPretraining Layer 4 with RBM: %d-%d \n',numpen2,numopen); %250, 30
batchdata=batchposhidprobs;  %100x250x600
numhid=numopen; % 30
restart=1;
rbmhidlinear;
hidtop=vishid; toprecbiases=hidbiases; topgenbiases=visbiases;
save mnistpo hidtop toprecbiases topgenbiases; % 250x30 1x30 1x250

backprop;