Wednesday 7 November 2018

mnistclassify analysis 2

backpropclassify.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

% This program fine-tunes an autoencoder with backpropagation.
% Weights of the autoencoder are going to be saved in mnist_weights.mat
% and trainig and test reconstruction errors in mnist_error.mat
% You can also set maxepoch, default value is 200 as in our paper. 

maxepoch=2; %maxepoch=200;
fprintf(1,'\nTraining discriminative model on MNIST by minimizing cross entropy error. \n');
fprintf(1,'60 batches of 1000 cases each. \n');

load mnistvhclassify
load mnisthpclassify
load mnisthp2classify

makebatches;
[numcases numdims numbatches]=size(batchdata); % 100  784  600
N=numcases; % 100

%%%% PREINITIALIZE WEIGHTS OF THE DISCRIMINATIVE MODEL%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

w1=[vishid; hidrecbiases]; % 784 250 + 1 250 = 785 250
w2=[hidpen; penrecbiases]; % 250 250 + 1 250 = 251 250
w3=[hidpen2; penrecbiases2]; % 250 500 + 1 500 = 251 500
w_class = 0.1*randn(size(w3,2)+1,10); % randn(501,10)  = 501 10


%%%%%%%%%% END OF PREINITIALIZATION OF WEIGHTS  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

l1=size(w1,1)-1; % 784
l2=size(w2,1)-1; % 250
l3=size(w3,1)-1; % 250
l4=size(w_class,1)-1; % 500
l5=10;
test_err=[];
train_err=[];


for epoch = 1:maxepoch

 %%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 err=0;
 err_cr=0;
 counter=0;
 [numcases numdims numbatches]=size(batchdata); % 100  784  600
 N=numcases; % 100
 for batch = 1:numbatches  % 1 : 600
  data = [batchdata(:,:,batch)]; % 100  784
  target = [batchtargets(:,:,batch)]; % 100 10 600
  data = [data ones(N,1)]; % 100 785
  w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs  ones(N,1)]; % 100 785 * 785 250 += 100 251
  w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 100 251 * 251 250 += 100 251
  w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs  ones(N,1)]; % 100 251 * 251 500 += 100 501
  targetout = exp(w3probs*w_class); % 100 501 * 501 10 = 100 10
  targetout = targetout./repmat(sum(targetout,2),1,10); % 100 10 = 100 10 ./ repmat ( 100 1), 1 10) = 100 10

  [I J]=max(targetout,[],2);  % 100 1 , 100 1 = 100 1 -->I has the value J has the sequence
  [I1 J1]=max(target,[],2);  % max(100 10,[],2) 100 1
  counter=counter+length(find(J==J1));  % =6 for the first batch
  err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;  %cross entrophy

 end
 train_err(epoch)=(numcases*numbatches-counter); % total number of errors for all the batches in this epoche
 train_crerr(epoch)=err_cr/numbatches; % total cross enthropy error for the complete batchdata in this epoche

 %%%%%%%%%%%%%% END OF COMPUTING TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 %%%%%%%%%%%%%%%%%%%% COMPUTE TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 err=0;
 err_cr=0;
 counter=0;
 [testnumcases testnumdims testnumbatches]=size(testbatchdata); % 100 784 100
 N=testnumcases; % 100
 for batch = 1:testnumbatches % 1: 100
  data = [testbatchdata(:,:,batch)]; % 100 784
  target = [testbatchtargets(:,:,batch)]; % 100 10 = (100 10 100(:,:,batch)
  data = [data ones(N,1)]; % 100 785
  w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs  ones(N,1)]; % 100 785 *785 250 = 100 250 -> 100 251
  w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 100 251 * 251 250 = 100 250 -> 100 251
  w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs  ones(N,1)]; % 100 251 * 251 50 = 100 50 -> 100 51
  targetout = exp(w3probs*w_class); % 100 51 * 51 10 = 100 10
  targetout = targetout./repmat(sum(targetout,2),1,10); % = 100 10 ./ repmat ( 100 1), 1 10) = 100 10

  [I J]=max(targetout,[],2); % 100 1 , 100 1 = 100 1 -->I has the value J has the sequence
  [I1 J1]=max(target,[],2); % max(100 10,[],2) 100 1
  counter=counter+length(find(J==J1));  % =9 for the first batch
  err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout)));  %cross entrophy
 
 end
 test_err(epoch)=(testnumcases*testnumbatches-counter);  % total number of errors for all the batches in this epoche
 test_crerr(epoch)=err_cr/testnumbatches; % total cross enthropy error for the complete batchdata in this epoche
 fprintf(1,'Before epoch %d Train # misclassified: %d (from %d). Test # misclassified: %d (from %d) \t \t \n',...
            epoch,train_err(epoch),numcases*numbatches,test_err(epoch),testnumcases*testnumbatches);

 %%%%%%%%%%%%%% END OF COMPUTING TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 tt=0;
 for batch = 1:numbatches/10
  fprintf(1,'epoch %d batch %d\n',epoch,batch);

  %%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  tt=tt+1;
  data=[];
  targets=[];
  for kk=1:10
   data=[data
         batchdata(:,:,(tt-1)*10+kk)]; % 1000 784
   targets=[targets
         batchtargets(:,:,(tt-1)*10+kk)];  % 1000 10
  end

 %%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  max_iter=3;

  if epoch<2 6="" fixed.="" holding="" irst="" nbsp="" original="" other="" p="" top-level="" update="" weights="">    N = size(data,1); % 1000 /1000 784)
    XX = [data ones(N,1)]; % 1000 785
    w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs  ones(N,1)]; % 1000 785 * 785 250 = 1000 250 -> 1000 251
    w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 1000 251 * 251 250 = 1000 250 -> 1000 251
    w3probs = 1./(1 + exp(-w2probs*w3)); %w3probs = [w3probs  ones(N,1)]; % 1000 251 * 251 50 = 1000 50 -> 1000 51

    VV = [w_class(:)']'; % 51 10 = 510 1
    Dim = [l4; l5]; % 2 1
    [X, fX] = minimize(VV,'CG_CLASSIFY_INIT',max_iter,Dim,w3probs,targets); % 510 1, 4 1 = min(510 1,'CG_CLASS...',3, 2 1, 1000 51, 1000 10
    w_class = reshape(X,l4+1,l5); %reshape(X,51,10)= 51 10

  else
    VV = [w1(:)' w2(:)' w3(:)' w_class(:)']'; % 272060 1
    Dim = [l1; l2; l3; l4; l5]; % 5 1
    [X, fX] = minimize(VV,'CG_CLASSIFY',max_iter,Dim,data,targets); %[% 272060 1, 4 1]=mini..(272060 1,'CG_CLA..', 3, 5 1, 1000 784, 1000 10);

    w1 = reshape(X(1:(l1+1)*l2),l1+1,l2); %reshape(272060(1 : 785*250,785,250) = 785 250
    xxx = (l1+1)*l2; % 785 * 250
    w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3); % reshape(X(251 : 251 * 250), 251, 250); = 251 250
    xxx = xxx+(l2+1)*l3; % 785 * 250 + 251 * 250
    w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4); % reshape(X(xxx+1: xxx+251*50, 251,50) = 251, 50
    xxx = xxx+(l3+1)*l4; % 271550
    w_class = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5); % 51 10

  end
  %%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 end

 save mnistclassify_weights w1 w2 w3 w_class
 save mnistclassify_error test_err test_crerr train_err train_crerr;

end