%CG_MNIST.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
function [f, df] = CG_MNIST(VV,Dim,XX);
l1 = Dim(1); % 784
l2 = Dim(2); % 1000
l3 = Dim(3); % 500
l4= Dim(4); % 250
l5= Dim(5); % 30
l6= Dim(6); % 250
l7= Dim(7); % 500
l8= Dim(8); % 1000
l9= Dim(9); % 784
N = size(XX,1); % 1000
% Do decomversion.
w1 = reshape(VV(1:(l1+1)*l2),l1+1,l2); % 785x1000
xxx = (l1+1)*l2; % 785000
w2 = reshape(VV(xxx+1:xxx+(l2+1)*l3),l2+1,l3); % 1001x500
xxx = xxx+(l2+1)*l3; % 1285500
w3 = reshape(VV(xxx+1:xxx+(l3+1)*l4),l3+1,l4); % 501x250
xxx = xxx+(l3+1)*l4;
w4 = reshape(VV(xxx+1:xxx+(l4+1)*l5),l4+1,l5); % 251x30
xxx = xxx+(l4+1)*l5;
w5 = reshape(VV(xxx+1:xxx+(l5+1)*l6),l5+1,l6); % 31x250
xxx = xxx+(l5+1)*l6;
w6 = reshape(VV(xxx+1:xxx+(l6+1)*l7),l6+1,l7); % 251x500
xxx = xxx+(l6+1)*l7;
w7 = reshape(VV(xxx+1:xxx+(l7+1)*l8),l7+1,l8); % 501x1000
xxx = xxx+(l7+1)*l8;
w8 = reshape(VV(xxx+1:xxx+(l8+1)*l9),l8+1,l9); % 1001x784
XX = [XX ones(N,1)]; % 1000x785
w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs ones(N,1)]; % 1000x785 * 785x1000 = 1000x100->1000x1001
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 1000x1001 * 1001x500 = 1000x500->1000x501
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)]; % 1000x501 * 501x250 => 1000x251
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)]; % 1000x251 * 251x30 = 1000x31
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)]; % 1000x31 * 31x250 => 1000x251
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)]; % 1000x251 * 251x500 => 1000x501
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)]; % 1000x501 * 501x1000 => 1000x1001
XXout = 1./(1 + exp(-w7probs*w8)); % 1000x1001 * 1001x784 = 1000x784
f = -1/N*sum(sum( XX(:,1:end-1).*log(XXout) + (1-XX(:,1:end-1)).*log(1-XXout))); % 1000x785-1 .* 1000x784 --> 1x1
IO = 1/N*(XXout-XX(:,1:end-1)); % 1000x784
Ix8=IO; % 1000x784
dw8 = w7probs'*Ix8; % 1001x1000 * 1000x784 = 1001x784
Ix7 = (Ix8*w8').*w7probs.*(1-w7probs); % 1000x784 * 784x1001 .* 1000x1001 .* 1-1000x1001 = 1000x1001
Ix7 = Ix7(:,1:end-1); % 1000x1000
dw7 = w6probs'*Ix7; % 501x1000 * 1000x1000 = 501x1000
Ix6 = (Ix7*w7').*w6probs.*(1-w6probs); % 1000x1000 * 1000x501 = 1000x501
Ix6 = Ix6(:,1:end-1); % 1000x500
dw6 = w5probs'*Ix6; % 251x500
Ix5 = (Ix6*w6').*w5probs.*(1-w5probs); % 1000x500 * 500x251 =1000x251
Ix5 = Ix5(:,1:end-1); % 1000x250
dw5 = w4probs'*Ix5; % 31x250
Ix4 = (Ix5*w5'); % 1000x250 * 250x31 = 1000x31
Ix4 = Ix4(:,1:end-1); % 1000x30
dw4 = w3probs'*Ix4; % 251x100 * 1000x30 = 251x30
Ix3 = (Ix4*w4').*w3probs.*(1-w3probs); % 1000x30 * 30x251 .* 1000x251 = 1000x251
Ix3 = Ix3(:,1:end-1); % 1000x250
dw3 = w2probs'*Ix3; % 501x250
Ix2 = (Ix3*w3').*w2probs.*(1-w2probs); % 1000x250 * 250x501 .*1000x501 = 1000x501
Ix2 = Ix2(:,1:end-1); % 1000x500
dw2 = w1probs'*Ix2; % 1001x500
Ix1 = (Ix2*w2').*w1probs.*(1-w1probs); %1000x500 * 500x1001 = 1000x1001
Ix1 = Ix1(:,1:end-1); % 1000x1000
dw1 = XX'*Ix1; % 785x1000
df = [dw1(:)' dw2(:)' dw3(:)' dw4(:)' dw5(:)' dw6(:)' dw7(:)' dw8(:)' ]'; % 2837314x1
Sunday, 28 October 2018
mnistdeepauto analysis 7
% mnistdisp.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
function [err] = mnistdisp(digits); % 784x30
% display a group of MNIST images
col=28;
row=28;
[dd,N] = size(digits); % 784x30
imdisp=zeros(2*28,ceil(N/2)*28); % 56, 420 pixel picture size
for nn=1:N % 1:30
ii=rem(nn,2); if(ii==0) ii=2; end %% ii is line number 1rem1->1 2rem2=0->2 3->1 4->2
jj=ceil(nn/2); % jj is digit sequence 1/2=1 2/2=1 3/2=2 4/2=2
img1 = reshape(digits(:,nn),row,col); %reshape((784x1),28,28) = 28x28 nn=1..30
img2(((ii-1)*row+1):(ii*row),((jj-1)*col+1):(jj*col))=img1'; % img2(nn=1 -> 0+1: 1*row, 0+1:1*col) = img1'
% img2(nn=2 -> 1*row+1: 2*row, 1*col+1:2*col) = img1'
end
imagesc(img2,[0 1]); colormap gray; axis equal; axis off;
drawnow;
err=0;
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
function [err] = mnistdisp(digits); % 784x30
% display a group of MNIST images
col=28;
row=28;
[dd,N] = size(digits); % 784x30
imdisp=zeros(2*28,ceil(N/2)*28); % 56, 420 pixel picture size
for nn=1:N % 1:30
ii=rem(nn,2); if(ii==0) ii=2; end %% ii is line number 1rem1->1 2rem2=0->2 3->1 4->2
jj=ceil(nn/2); % jj is digit sequence 1/2=1 2/2=1 3/2=2 4/2=2
img1 = reshape(digits(:,nn),row,col); %reshape((784x1),28,28) = 28x28 nn=1..30
img2(((ii-1)*row+1):(ii*row),((jj-1)*col+1):(jj*col))=img1'; % img2(nn=1 -> 0+1: 1*row, 0+1:1*col) = img1'
% img2(nn=2 -> 1*row+1: 2*row, 1*col+1:2*col) = img1'
end
imagesc(img2,[0 1]); colormap gray; axis equal; axis off;
drawnow;
err=0;
mnistdeepauto analysis 6
%backprop.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program fine-tunes an autoencoder with backpropagation.
% Weights of the autoencoder are going to be saved in mnist_weights.mat
% and trainig and test reconstruction errors in mnist_error.mat
% You can also set maxepoch, default value is 200 as in our paper.
maxepoch=1; %maxepoch=200;
fprintf(1,'\nFine-tuning deep autoencoder by minimizing cross entropy error. \n');
fprintf(1,'60 batches of 1000 cases each. \n');
load mnistvh
load mnisthp
load mnisthp2
load mnistpo
makebatches;
[numcases numdims numbatches]=size(batchdata); % 100x784x600
N=numcases;
%%%% PREINITIALIZE WEIGHTS OF THE AUTOENCODER %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
w1=[vishid; hidrecbiases]; % 784x1000 ; 1x1000 = 785x1000
w2=[hidpen; penrecbiases]; % 1000x500 ; 1x500 = 1001x500
w3=[hidpen2; penrecbiases2]; %500x250 ; 1x250 = 501x250
w4=[hidtop; toprecbiases]; % 250x30 ; 1x30 = 251x30
w5=[hidtop'; topgenbiases]; % 30x250 ; 1x250 = 31x250
w6=[hidpen2'; hidgenbiases2]; % 250x500 ; 1x500 = 251x500
w7=[hidpen'; hidgenbiases]; % 500x1000 ; 1x1000 = 501x1000
w8=[vishid'; visbiases]; % 1000x784 ; 1x784 = 1001x784
%%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
l1=size(w1,1)-1; % 784
l2=size(w2,1)-1; % 1000
l3=size(w3,1)-1; % 500
l4=size(w4,1)-1; % 250
l5=size(w5,1)-1; % 30
l6=size(w6,1)-1; % 250
l7=size(w7,1)-1; % 500
l8=size(w8,1)-1; % 1000
l9=l1; % 784
test_err=[];
train_err=[];
for epoch = 1:maxepoch
%%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err=0;
[numcases numdims numbatches]=size(batchdata); % 100x784x600
N=numcases; % 100
for batch = 1:numbatches
data = [batchdata(:,:,batch)]; % 100x784
data = [data ones(N,1)]; % 100x785 = 100x784 ++ 100x1
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)]; % 100x785 * 785x1000 ++ 100x1001
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 100x1001 * 1001x500 ++ 100x501
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)]; % 100x501 * 501x250 ++ 100x251
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)]; % 100x251 * 251x30 ++ 100x31
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)]; % 100x31 * 31x250 ++ 100x251
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)]; % 100x251 * 251x500 ++ 100x501
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)]; % 100x501 * 501x1000 ++ 100x1001
dataout = 1./(1 + exp(-w7probs*w8)); % 100x1001 * 1001x784 = 100x784
err= err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 )); % 100x784 - 100x784
end
train_err(epoch)=err/numbatches;
%%%%%%%%%%%%%% END OF COMPUTING TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%
fprintf(1,'Displaying in figure 1: Top row - real data, Bottom row -- reconstructions \n');
output=[];
for ii=1:15
output = [output data(ii,1:end-1)' dataout(ii,:)']; % 784x100 ++ 784x1
end
if epoch==1
close all
figure('Position',[100,600,1000,200]);
else
figure(1)
end
mnistdisp(output);
drawnow;
%%%%%%%%%%%%%%%%%%%% COMPUTE TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[testnumcases testnumdims testnumbatches]=size(testbatchdata); % 100x784x100
N=testnumcases; % 100
err=0;
for batch = 1:testnumbatches
data = [testbatchdata(:,:,batch)];
data = [data ones(N,1)]; % 100x785
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)]; %100x785 * 785x1000 -> 100x1001
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 100x1001 * 1001x500 -> 1001x501
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)]; % 100x501 * 501x250 -> 100x251
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)]; % 100x251 * 251x30 -> 100x31
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)]; % 100x31 * 31x250 -> 100x251
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)]; % 100x251 * 251x500 -> 100x501
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)]; % 100x501 * 501x1000 -> 100x1001
dataout = 1./(1 + exp(-w7probs*w8)); % 100x1001 * 1001x784 = 100x784
err = err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 ));
end
test_err(epoch)=err/testnumbatches;
fprintf(1,'Before epoch %d Train squared error: %6.3f Test squared error: %6.3f \t \t \n',epoch,train_err(epoch),test_err(epoch));
%%%%%%%%%%%%%% END OF COMPUTING TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%
fprintf(1,'Displaying in figure 2: Top row - real test data, Bottom row -- reconstructions \n');
output=[];
for ii=1:15
output = [output data(ii,1:end-1)' dataout(ii,:)']; % 784x100 ++ 784x1
end
if epoch==1
%close all
figure('Position',[100,600,1000,200]);
else
figure(1)
end
mnistdisp(output);
drawnow;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tt=0;
for batch = 1:numbatches/10
fprintf(1,'epoch %d batch %d\n',epoch,batch);
%%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tt=tt+1;
data=[];
for kk=1:10
data=[data
batchdata(:,:,(tt-1)*10+kk)];
end
%%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
max_iter=3;
VV = [w1(:)' w2(:)' w3(:)' w4(:)' w5(:)' w6(:)' w7(:)' w8(:)']';
Dim = [l1; l2; l3; l4; l5; l6; l7; l8; l9];
[X, fX] = minimize(VV,'CG_MNIST',max_iter,Dim,data);
w1 = reshape(X(1:(l1+1)*l2),l1+1,l2);
xxx = (l1+1)*l2;
w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
xxx = xxx+(l2+1)*l3;
w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
xxx = xxx+(l3+1)*l4;
w4 = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
xxx = xxx+(l4+1)*l5;
w5 = reshape(X(xxx+1:xxx+(l5+1)*l6),l5+1,l6);
xxx = xxx+(l5+1)*l6;
w6 = reshape(X(xxx+1:xxx+(l6+1)*l7),l6+1,l7);
xxx = xxx+(l6+1)*l7;
w7 = reshape(X(xxx+1:xxx+(l7+1)*l8),l7+1,l8);
xxx = xxx+(l7+1)*l8;
w8 = reshape(X(xxx+1:xxx+(l8+1)*l9),l8+1,l9);
%%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
save mnist_weights w1 w2 w3 w4 w5 w6 w7 w8
save mnist_error test_err train_err;
end
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program fine-tunes an autoencoder with backpropagation.
% Weights of the autoencoder are going to be saved in mnist_weights.mat
% and trainig and test reconstruction errors in mnist_error.mat
% You can also set maxepoch, default value is 200 as in our paper.
maxepoch=1; %maxepoch=200;
fprintf(1,'\nFine-tuning deep autoencoder by minimizing cross entropy error. \n');
fprintf(1,'60 batches of 1000 cases each. \n');
load mnistvh
load mnisthp
load mnisthp2
load mnistpo
makebatches;
[numcases numdims numbatches]=size(batchdata); % 100x784x600
N=numcases;
%%%% PREINITIALIZE WEIGHTS OF THE AUTOENCODER %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
w1=[vishid; hidrecbiases]; % 784x1000 ; 1x1000 = 785x1000
w2=[hidpen; penrecbiases]; % 1000x500 ; 1x500 = 1001x500
w3=[hidpen2; penrecbiases2]; %500x250 ; 1x250 = 501x250
w4=[hidtop; toprecbiases]; % 250x30 ; 1x30 = 251x30
w5=[hidtop'; topgenbiases]; % 30x250 ; 1x250 = 31x250
w6=[hidpen2'; hidgenbiases2]; % 250x500 ; 1x500 = 251x500
w7=[hidpen'; hidgenbiases]; % 500x1000 ; 1x1000 = 501x1000
w8=[vishid'; visbiases]; % 1000x784 ; 1x784 = 1001x784
%%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
l1=size(w1,1)-1; % 784
l2=size(w2,1)-1; % 1000
l3=size(w3,1)-1; % 500
l4=size(w4,1)-1; % 250
l5=size(w5,1)-1; % 30
l6=size(w6,1)-1; % 250
l7=size(w7,1)-1; % 500
l8=size(w8,1)-1; % 1000
l9=l1; % 784
test_err=[];
train_err=[];
for epoch = 1:maxepoch
%%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err=0;
[numcases numdims numbatches]=size(batchdata); % 100x784x600
N=numcases; % 100
for batch = 1:numbatches
data = [batchdata(:,:,batch)]; % 100x784
data = [data ones(N,1)]; % 100x785 = 100x784 ++ 100x1
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)]; % 100x785 * 785x1000 ++ 100x1001
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 100x1001 * 1001x500 ++ 100x501
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)]; % 100x501 * 501x250 ++ 100x251
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)]; % 100x251 * 251x30 ++ 100x31
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)]; % 100x31 * 31x250 ++ 100x251
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)]; % 100x251 * 251x500 ++ 100x501
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)]; % 100x501 * 501x1000 ++ 100x1001
dataout = 1./(1 + exp(-w7probs*w8)); % 100x1001 * 1001x784 = 100x784
err= err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 )); % 100x784 - 100x784
end
train_err(epoch)=err/numbatches;
%%%%%%%%%%%%%% END OF COMPUTING TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%
fprintf(1,'Displaying in figure 1: Top row - real data, Bottom row -- reconstructions \n');
output=[];
for ii=1:15
output = [output data(ii,1:end-1)' dataout(ii,:)']; % 784x100 ++ 784x1
end
if epoch==1
close all
figure('Position',[100,600,1000,200]);
else
figure(1)
end
mnistdisp(output);
drawnow;
%%%%%%%%%%%%%%%%%%%% COMPUTE TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[testnumcases testnumdims testnumbatches]=size(testbatchdata); % 100x784x100
N=testnumcases; % 100
err=0;
for batch = 1:testnumbatches
data = [testbatchdata(:,:,batch)];
data = [data ones(N,1)]; % 100x785
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)]; %100x785 * 785x1000 -> 100x1001
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; % 100x1001 * 1001x500 -> 1001x501
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)]; % 100x501 * 501x250 -> 100x251
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)]; % 100x251 * 251x30 -> 100x31
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)]; % 100x31 * 31x250 -> 100x251
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)]; % 100x251 * 251x500 -> 100x501
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)]; % 100x501 * 501x1000 -> 100x1001
dataout = 1./(1 + exp(-w7probs*w8)); % 100x1001 * 1001x784 = 100x784
err = err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 ));
end
test_err(epoch)=err/testnumbatches;
fprintf(1,'Before epoch %d Train squared error: %6.3f Test squared error: %6.3f \t \t \n',epoch,train_err(epoch),test_err(epoch));
%%%%%%%%%%%%%% END OF COMPUTING TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%
fprintf(1,'Displaying in figure 2: Top row - real test data, Bottom row -- reconstructions \n');
output=[];
for ii=1:15
output = [output data(ii,1:end-1)' dataout(ii,:)']; % 784x100 ++ 784x1
end
if epoch==1
%close all
figure('Position',[100,600,1000,200]);
else
figure(1)
end
mnistdisp(output);
drawnow;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tt=0;
for batch = 1:numbatches/10
fprintf(1,'epoch %d batch %d\n',epoch,batch);
%%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tt=tt+1;
data=[];
for kk=1:10
data=[data
batchdata(:,:,(tt-1)*10+kk)];
end
%%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
max_iter=3;
VV = [w1(:)' w2(:)' w3(:)' w4(:)' w5(:)' w6(:)' w7(:)' w8(:)']';
Dim = [l1; l2; l3; l4; l5; l6; l7; l8; l9];
[X, fX] = minimize(VV,'CG_MNIST',max_iter,Dim,data);
w1 = reshape(X(1:(l1+1)*l2),l1+1,l2);
xxx = (l1+1)*l2;
w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
xxx = xxx+(l2+1)*l3;
w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
xxx = xxx+(l3+1)*l4;
w4 = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
xxx = xxx+(l4+1)*l5;
w5 = reshape(X(xxx+1:xxx+(l5+1)*l6),l5+1,l6);
xxx = xxx+(l5+1)*l6;
w6 = reshape(X(xxx+1:xxx+(l6+1)*l7),l6+1,l7);
xxx = xxx+(l6+1)*l7;
w7 = reshape(X(xxx+1:xxx+(l7+1)*l8),l7+1,l8);
xxx = xxx+(l7+1)*l8;
w8 = reshape(X(xxx+1:xxx+(l8+1)*l9),l8+1,l9);
%%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
save mnist_weights w1 w2 w3 w4 w5 w6 w7 w8
save mnist_error test_err train_err;
end
mnistdeepauto analysis 5
% rbmhidlinear.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program trains Restricted Boltzmann Machine in which
% visible, binary, stochastic pixels are connected to
% hidden, stochastic real-valued feature detectors drawn from a unit
% variance Gaussian whose mean is determined by the input from
% the logistic visible units. Learning is done with 1-step Contrastive Divergence.
% The program assumes that the following variables are set externally:
% maxepoch -- maximum number of epochs
% numhid -- number of hidden units
% batchdata -- the data that is divided into batches (numcases numdims numbatches)
% restart -- set to 1 if learning starts from beginning
epsilonw = 0.001; % Learning rate for weights
epsilonvb = 0.001; % Learning rate for biases of visible units
epsilonhb = 0.001; % Learning rate for biases of hidden units
weightcost = 0.0002;
initialmomentum = 0.5;
finalmomentum = 0.9;
[numcases numdims numbatches]=size(batchdata); % 100x250x600
if restart ==1,
restart=0;
epoch=1;
% Initializing symmetric weights and biases.
vishid = 0.1*randn(numdims, numhid); % 250x30 = 250,30
hidbiases = zeros(1,numhid); % 1x30
visbiases = zeros(1,numdims); % 1x250
poshidprobs = zeros(numcases,numhid); % 100x30
neghidprobs = zeros(numcases,numhid); % 100x30
posprods = zeros(numdims,numhid); % 250x30
negprods = zeros(numdims,numhid); % 250x30
vishidinc = zeros(numdims,numhid); % 250x30
hidbiasinc = zeros(1,numhid); % 1x30
visbiasinc = zeros(1,numdims); % 1x250
sigmainc = zeros(1,numhid); % 1x30
batchposhidprobs=zeros(numcases,numhid,numbatches); % 100x30x600
end
for epoch = epoch:maxepoch,
fprintf(1,'epoch %d\n',epoch);
errsum=0;
for batch = 1:numbatches, % 1:600
fprintf(1,'epoch %d batch %d\n',epoch,batch);
%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
data = batchdata(:,:,batch); %100x250 x600
poshidprobs = (data*vishid) + repmat(hidbiases,numcases,1); % 100x250 * 250x30 + 1x30repmat 100-> 100x30 = 100x30
batchposhidprobs(:,:,batch)=poshidprobs; %100x30
posprods = data' * poshidprobs; % 250x100 * 100x30 = 250x30
poshidact = sum(poshidprobs); % 1x30
posvisact = sum(data); % 1x250
%%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
poshidstates = poshidprobs+randn(numcases,numhid); % 100x30
%%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));
neghidprobs = (negdata*vishid) + repmat(hidbiases,numcases,1);
negprods = negdata'*neghidprobs;
neghidact = sum(neghidprobs);
negvisact = sum(negdata);
keyboard()
%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err= sum(sum( (data-negdata).^2 ));
errsum = err + errsum;
if epoch>5,
momentum=finalmomentum;
else
momentum=initialmomentum;
end;
%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
vishidinc = momentum*vishidinc + ... % 250x30
epsilonw*( (posprods-negprods)/numcases - weightcost*vishid); % 250x30
visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact); % 1x250
hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact); % 1x30
vishid = vishid + vishidinc; % 250x30
visbiases = visbiases + visbiasinc; % 1x250
hidbiases = hidbiases + hidbiasinc; % 1x30
%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
fprintf(1, 'epoch %4i error %f \n', epoch, errsum);
end
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program trains Restricted Boltzmann Machine in which
% visible, binary, stochastic pixels are connected to
% hidden, stochastic real-valued feature detectors drawn from a unit
% variance Gaussian whose mean is determined by the input from
% the logistic visible units. Learning is done with 1-step Contrastive Divergence.
% The program assumes that the following variables are set externally:
% maxepoch -- maximum number of epochs
% numhid -- number of hidden units
% batchdata -- the data that is divided into batches (numcases numdims numbatches)
% restart -- set to 1 if learning starts from beginning
epsilonw = 0.001; % Learning rate for weights
epsilonvb = 0.001; % Learning rate for biases of visible units
epsilonhb = 0.001; % Learning rate for biases of hidden units
weightcost = 0.0002;
initialmomentum = 0.5;
finalmomentum = 0.9;
[numcases numdims numbatches]=size(batchdata); % 100x250x600
if restart ==1,
restart=0;
epoch=1;
% Initializing symmetric weights and biases.
vishid = 0.1*randn(numdims, numhid); % 250x30 = 250,30
hidbiases = zeros(1,numhid); % 1x30
visbiases = zeros(1,numdims); % 1x250
poshidprobs = zeros(numcases,numhid); % 100x30
neghidprobs = zeros(numcases,numhid); % 100x30
posprods = zeros(numdims,numhid); % 250x30
negprods = zeros(numdims,numhid); % 250x30
vishidinc = zeros(numdims,numhid); % 250x30
hidbiasinc = zeros(1,numhid); % 1x30
visbiasinc = zeros(1,numdims); % 1x250
sigmainc = zeros(1,numhid); % 1x30
batchposhidprobs=zeros(numcases,numhid,numbatches); % 100x30x600
end
for epoch = epoch:maxepoch,
fprintf(1,'epoch %d\n',epoch);
errsum=0;
for batch = 1:numbatches, % 1:600
fprintf(1,'epoch %d batch %d\n',epoch,batch);
%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
data = batchdata(:,:,batch); %100x250 x600
poshidprobs = (data*vishid) + repmat(hidbiases,numcases,1); % 100x250 * 250x30 + 1x30repmat 100-> 100x30 = 100x30
batchposhidprobs(:,:,batch)=poshidprobs; %100x30
posprods = data' * poshidprobs; % 250x100 * 100x30 = 250x30
poshidact = sum(poshidprobs); % 1x30
posvisact = sum(data); % 1x250
%%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
poshidstates = poshidprobs+randn(numcases,numhid); % 100x30
%%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));
neghidprobs = (negdata*vishid) + repmat(hidbiases,numcases,1);
negprods = negdata'*neghidprobs;
neghidact = sum(neghidprobs);
negvisact = sum(negdata);
keyboard()
%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err= sum(sum( (data-negdata).^2 ));
errsum = err + errsum;
if epoch>5,
momentum=finalmomentum;
else
momentum=initialmomentum;
end;
%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
vishidinc = momentum*vishidinc + ... % 250x30
epsilonw*( (posprods-negprods)/numcases - weightcost*vishid); % 250x30
visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact); % 1x250
hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact); % 1x30
vishid = vishid + vishidinc; % 250x30
visbiases = visbiases + visbiasinc; % 1x250
hidbiases = hidbiases + hidbiasinc; % 1x30
%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
fprintf(1, 'epoch %4i error %f \n', epoch, errsum);
end
mnistdeepauto analysis4
% rbm.m
% Version 1.000
%
% Code provided by Geoff Hinton and Ruslan Salakhutdinov
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program trains Restricted Boltzmann Machine in which
% visible, binary, stochastic pixels are connected to
% hidden, binary, stochastic feature detectors using symmetrically
% weighted connections. Learning is done with 1-step Contrastive Divergence.
% The program assumes that the following variables are set externally:
% maxepoch -- maximum number of epochs
% numhid -- number of hidden units
% batchdata -- the data that is divided into batches (numcases numdims numbatches)
% restart -- set to 1 if learning starts from beginning
epsilonw = 0.1; % Learning rate for weights
epsilonvb = 0.1; % Learning rate for biases of visible units
epsilonhb = 0.1; % Learning rate for biases of hidden units
weightcost = 0.0002;
initialmomentum = 0.5;
finalmomentum = 0.9;
[numcases numdims numbatches]=size(batchdata); %%100x784x600
if restart ==1,
restart=0;
epoch=1;
% Initializing symmetric weights and biases.
vishid = 0.1*randn(numdims, numhid); %784x1000
hidbiases = zeros(1,numhid); % 1x1000
visbiases = zeros(1,numdims); % 1x784
poshidprobs = zeros(numcases,numhid); %100x1000
neghidprobs = zeros(numcases,numhid); %100x1000
posprods = zeros(numdims,numhid); %784x1000
negprods = zeros(numdims,numhid); %784x1000
vishidinc = zeros(numdims,numhid); %784x1000
hidbiasinc = zeros(1,numhid); %1x1000
visbiasinc = zeros(1,numdims); %1x784
batchposhidprobs=zeros(numcases,numhid,numbatches); %100x1000x600
end
for epoch = epoch:maxepoch, % 1 : 10
fprintf(1,'epoch %d\n',epoch);
errsum=0;
for batch = 1:numbatches,
fprintf(1,'epoch %d batch %d\n',epoch,batch);
%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
data = batchdata(:,:,batch); % 100x784
poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1))); %100x784 * 784x1000 - 1x1000repmat->100x1000 = 100x1000
batchposhidprobs(:,:,batch)=poshidprobs; %100x1000<-batch b="" nbsp="">-batch>
posprods = data' * poshidprobs; %784x100 * 100x1000 = 784x1000
poshidact = sum(poshidprobs); % 1x1000
posvisact = sum(data); % 1x784
%%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
poshidstates = poshidprobs > rand(numcases,numhid); %100x1000 > 100x1000 = 100 x 1000
%%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1))); % 100x1000 * 1000x784 - 1x784repmat->100x784 = 100 x 784
neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1))); % 100x784 * 784x1000 - 1x1000repmat->100x1000 = 100x1000
negprods = negdata'*neghidprobs; % 784x100 * 100x1000
neghidact = sum(neghidprobs); % 1x1000
negvisact = sum(negdata); % 1x784
keyboard()
%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err= sum(sum( (data-negdata).^2 )); %1x1
errsum = err + errsum;
if epoch>5,
momentum=finalmomentum;
else
momentum=initialmomentum;
end;
%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
vishidinc = momentum*vishidinc + ... % 784x1000
epsilonw*( (posprods-negprods)/numcases - weightcost*vishid); % 784x 1000
visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact); % 1x784 + 1x784 - 1x784
hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact); % 1x1000 + 1x1000 - 1x1000
vishid = vishid + vishidinc; % 784x1000 + 784x1000
visbiases = visbiases + visbiasinc; % 1x784 + x784
hidbiases = hidbiases + hidbiasinc; % 1x1000 + 1x1000
%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
fprintf(1, 'epoch %4i error %6.1f \n', epoch, errsum);
end;
% Version 1.000
%
% Code provided by Geoff Hinton and Ruslan Salakhutdinov
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program trains Restricted Boltzmann Machine in which
% visible, binary, stochastic pixels are connected to
% hidden, binary, stochastic feature detectors using symmetrically
% weighted connections. Learning is done with 1-step Contrastive Divergence.
% The program assumes that the following variables are set externally:
% maxepoch -- maximum number of epochs
% numhid -- number of hidden units
% batchdata -- the data that is divided into batches (numcases numdims numbatches)
% restart -- set to 1 if learning starts from beginning
epsilonw = 0.1; % Learning rate for weights
epsilonvb = 0.1; % Learning rate for biases of visible units
epsilonhb = 0.1; % Learning rate for biases of hidden units
weightcost = 0.0002;
initialmomentum = 0.5;
finalmomentum = 0.9;
[numcases numdims numbatches]=size(batchdata); %%100x784x600
if restart ==1,
restart=0;
epoch=1;
% Initializing symmetric weights and biases.
vishid = 0.1*randn(numdims, numhid); %784x1000
hidbiases = zeros(1,numhid); % 1x1000
visbiases = zeros(1,numdims); % 1x784
poshidprobs = zeros(numcases,numhid); %100x1000
neghidprobs = zeros(numcases,numhid); %100x1000
posprods = zeros(numdims,numhid); %784x1000
negprods = zeros(numdims,numhid); %784x1000
vishidinc = zeros(numdims,numhid); %784x1000
hidbiasinc = zeros(1,numhid); %1x1000
visbiasinc = zeros(1,numdims); %1x784
batchposhidprobs=zeros(numcases,numhid,numbatches); %100x1000x600
end
for epoch = epoch:maxepoch, % 1 : 10
fprintf(1,'epoch %d\n',epoch);
errsum=0;
for batch = 1:numbatches,
fprintf(1,'epoch %d batch %d\n',epoch,batch);
%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
data = batchdata(:,:,batch); % 100x784
poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1))); %100x784 * 784x1000 - 1x1000repmat->100x1000 = 100x1000
batchposhidprobs(:,:,batch)=poshidprobs; %100x1000<-batch b="" nbsp="">-batch>
posprods = data' * poshidprobs; %784x100 * 100x1000 = 784x1000
poshidact = sum(poshidprobs); % 1x1000
posvisact = sum(data); % 1x784
%%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
poshidstates = poshidprobs > rand(numcases,numhid); %100x1000 > 100x1000 = 100 x 1000
%%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1))); % 100x1000 * 1000x784 - 1x784repmat->100x784 = 100 x 784
neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1))); % 100x784 * 784x1000 - 1x1000repmat->100x1000 = 100x1000
negprods = negdata'*neghidprobs; % 784x100 * 100x1000
neghidact = sum(neghidprobs); % 1x1000
negvisact = sum(negdata); % 1x784
keyboard()
%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err= sum(sum( (data-negdata).^2 )); %1x1
errsum = err + errsum;
if epoch>5,
momentum=finalmomentum;
else
momentum=initialmomentum;
end;
%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
vishidinc = momentum*vishidinc + ... % 784x1000
epsilonw*( (posprods-negprods)/numcases - weightcost*vishid); % 784x 1000
visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact); % 1x784 + 1x784 - 1x784
hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact); % 1x1000 + 1x1000 - 1x1000
vishid = vishid + vishidinc; % 784x1000 + 784x1000
visbiases = visbiases + visbiasinc; % 1x784 + x784
hidbiases = hidbiases + hidbiasinc; % 1x1000 + 1x1000
%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
fprintf(1, 'epoch %4i error %6.1f \n', epoch, errsum);
end;
Tuesday, 9 October 2018
mnistdeepauto analysis 3
% makebatches.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
digitdata=[]; % concats all digit images in digitdata array
targets=[]; % and adds 10 bits constant label info for each digit to the targets array.
load digit0; digitdata = [digitdata; D]; targets = [targets; repmat([1 0 0 0 0 0 0 0 0 0], size(D,1), 1)]; %targets(5923x10) digitdata(5923x784) D(5923x784)
load digit1; digitdata = [digitdata; D]; targets = [targets; repmat([0 1 0 0 0 0 0 0 0 0], size(D,1), 1)]; %targets(12665x10) digitdata(12665x784) D(6742x784)
load digit2; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 1 0 0 0 0 0 0 0], size(D,1), 1)];
load digit3; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 1 0 0 0 0 0 0], size(D,1), 1)];
load digit4; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 1 0 0 0 0 0], size(D,1), 1)];
load digit5; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 1 0 0 0 0], size(D,1), 1)];
load digit6; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 1 0 0 0], size(D,1), 1)];
load digit7; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 1 0 0], size(D,1), 1)];
load digit8; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 1 0], size(D,1), 1)];
load digit9; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 0 1], size(D,1), 1)];
digitdata = digitdata/255;
totnum=size(digitdata,1); %60000 x 784
fprintf(1, 'Size of the training dataset= %5d \n', totnum);
rand('state',0); %so we know the permutation of the training data
randomorder=randperm(totnum); %random order sequence holder of 60000 digits
numbatches=totnum/100;
numdims = size(digitdata,2); % 784
batchsize = 100;
batchdata = zeros(batchsize, numdims, numbatches); %100x784x600
batchtargets = zeros(batchsize, 10, numbatches); %100x10x600
for b=1:numbatches %600
batchdata(:,:,b) = digitdata(randomorder(1+(b-1)*batchsize:b*batchsize), :); %100x784x600 batchdata is created according to random order indexes
batchtargets(:,:,b) = targets(randomorder(1+(b-1)*batchsize:b*batchsize), :); %100x10x600 similar to batchdata
end;
clear digitdata targets;
digitdata=[];
targets=[];
load test0; digitdata = [digitdata; D]; targets = [targets; repmat([1 0 0 0 0 0 0 0 0 0], size(D,1), 1)];
load test1; digitdata = [digitdata; D]; targets = [targets; repmat([0 1 0 0 0 0 0 0 0 0], size(D,1), 1)];
load test2; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 1 0 0 0 0 0 0 0], size(D,1), 1)];
load test3; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 1 0 0 0 0 0 0], size(D,1), 1)];
load test4; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 1 0 0 0 0 0], size(D,1), 1)];
load test5; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 1 0 0 0 0], size(D,1), 1)];
load test6; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 1 0 0 0], size(D,1), 1)];
load test7; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 1 0 0], size(D,1), 1)];
load test8; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 1 0], size(D,1), 1)];
load test9; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 0 1], size(D,1), 1)];
%}
digitdata = digitdata/255; % 10000,784
totnum=size(digitdata,1); % 10000
%totnum=10000;
fprintf(1, 'Size of the test dataset= %5d \n', totnum);
rand('state',0); %so we know the permutation of the training data
randomorder=randperm(totnum);
numbatches=totnum/100; % 100
%numbatches=100;
numdims = size(digitdata,2); % 784
%mumdims=784;
batchsize = 100;
testbatchdata = zeros(batchsize, numdims, numbatches); % 100, 784, 100
testbatchtargets = zeros(batchsize, 10, numbatches); % 100, 10, 100
for b=1:numbatches
testbatchdata(:,:,b) = digitdata(randomorder(1+(b-1)*batchsize:b*batchsize), :);
testbatchtargets(:,:,b) = targets(randomorder(1+(b-1)*batchsize:b*batchsize), :);
end;
clear digitdata targets;
%%% Reset random seeds
rand('state',sum(100*clock));
randn('state',sum(100*clock));
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
digitdata=[]; % concats all digit images in digitdata array
targets=[]; % and adds 10 bits constant label info for each digit to the targets array.
load digit0; digitdata = [digitdata; D]; targets = [targets; repmat([1 0 0 0 0 0 0 0 0 0], size(D,1), 1)]; %targets(5923x10) digitdata(5923x784) D(5923x784)
load digit1; digitdata = [digitdata; D]; targets = [targets; repmat([0 1 0 0 0 0 0 0 0 0], size(D,1), 1)]; %targets(12665x10) digitdata(12665x784) D(6742x784)
load digit2; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 1 0 0 0 0 0 0 0], size(D,1), 1)];
load digit3; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 1 0 0 0 0 0 0], size(D,1), 1)];
load digit4; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 1 0 0 0 0 0], size(D,1), 1)];
load digit5; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 1 0 0 0 0], size(D,1), 1)];
load digit6; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 1 0 0 0], size(D,1), 1)];
load digit7; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 1 0 0], size(D,1), 1)];
load digit8; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 1 0], size(D,1), 1)];
load digit9; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 0 1], size(D,1), 1)];
digitdata = digitdata/255;
totnum=size(digitdata,1); %60000 x 784
fprintf(1, 'Size of the training dataset= %5d \n', totnum);
rand('state',0); %so we know the permutation of the training data
randomorder=randperm(totnum); %random order sequence holder of 60000 digits
numbatches=totnum/100;
numdims = size(digitdata,2); % 784
batchsize = 100;
batchdata = zeros(batchsize, numdims, numbatches); %100x784x600
batchtargets = zeros(batchsize, 10, numbatches); %100x10x600
for b=1:numbatches %600
batchdata(:,:,b) = digitdata(randomorder(1+(b-1)*batchsize:b*batchsize), :); %100x784x600 batchdata is created according to random order indexes
batchtargets(:,:,b) = targets(randomorder(1+(b-1)*batchsize:b*batchsize), :); %100x10x600 similar to batchdata
end;
clear digitdata targets;
digitdata=[];
targets=[];
load test0; digitdata = [digitdata; D]; targets = [targets; repmat([1 0 0 0 0 0 0 0 0 0], size(D,1), 1)];
load test1; digitdata = [digitdata; D]; targets = [targets; repmat([0 1 0 0 0 0 0 0 0 0], size(D,1), 1)];
load test2; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 1 0 0 0 0 0 0 0], size(D,1), 1)];
load test3; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 1 0 0 0 0 0 0], size(D,1), 1)];
load test4; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 1 0 0 0 0 0], size(D,1), 1)];
load test5; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 1 0 0 0 0], size(D,1), 1)];
load test6; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 1 0 0 0], size(D,1), 1)];
load test7; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 1 0 0], size(D,1), 1)];
load test8; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 1 0], size(D,1), 1)];
load test9; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 0 1], size(D,1), 1)];
%}
digitdata = digitdata/255; % 10000,784
totnum=size(digitdata,1); % 10000
%totnum=10000;
fprintf(1, 'Size of the test dataset= %5d \n', totnum);
rand('state',0); %so we know the permutation of the training data
randomorder=randperm(totnum);
numbatches=totnum/100; % 100
%numbatches=100;
numdims = size(digitdata,2); % 784
%mumdims=784;
batchsize = 100;
testbatchdata = zeros(batchsize, numdims, numbatches); % 100, 784, 100
testbatchtargets = zeros(batchsize, 10, numbatches); % 100, 10, 100
for b=1:numbatches
testbatchdata(:,:,b) = digitdata(randomorder(1+(b-1)*batchsize:b*batchsize), :);
testbatchtargets(:,:,b) = targets(randomorder(1+(b-1)*batchsize:b*batchsize), :);
end;
clear digitdata targets;
%%% Reset random seeds
rand('state',sum(100*clock));
randn('state',sum(100*clock));
mnistdeepauto analysis 2
% converter.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program reads raw MNIST files available at
% http://yann.lecun.com/exdb/mnist/
% and converts them to files in matlab format
% Before using this program you first need to download files:
% train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
% t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
% and gunzip them. You need to allocate some space for this.
% This program was originally written by Yee Whye Teh
% Work with test files first
fprintf(1,'You first need to download files:\n train-images-idx3-ubyte.gz\n train-labels-idx1-ubyte.gz\n t10k-images-idx3-ubyte.gz\n t10k-labels-idx1-ubyte.gz\n from http://yann.lecun.com/exdb/mnist/\n and gunzip them \n');
f = fopen('t10k-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');
%{
count=4
octave:6> a
a =
50855936
270991360
469762048
469762048
%}
g = fopen('t10k-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');
%{
count=2
octave:16> g
g = 5
octave:17> l
l =
17301504
270991360
%}
fprintf(1,'Starting to convert Test MNIST images (prints 10 dots) \n');
n = 1000;
Df = cell(1,10);
for d=0:9,
Df{d+1} = fopen(['test' num2str(d) '.ascii'],'w'); %test1.ascii test2.ascii usw
end;
for i=1:10, % read number i 's 1000 raw labels and 784 pixel 1000 raw images , total of 10000 numbers
fprintf('.');
rawimages = fread(f,28*28*n,'uchar'); %images
rawlabels = fread(g,n,'uchar'); %labels
rawimages = reshape(rawimages,28*28,n); % 784 x 1000
for j=1:n, % write 1000 images and labels of the current number
fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j)); %fprintf to the Df recorded file pointer according to the raw label value, number j's images.
fprintf(Df{rawlabels(j)+1},'\n'); % each number read is written to the corresponding testX.ascii file. 1000 cases and 784 pixels each
end;
end;
%fprintf(1,'\n');
for d=0:9, %convert ascii to mat file and print its length
fclose(Df{d+1});
D = load(['test' num2str(d) '.ascii'],'-ascii'); %load each file dedicated to a number into D 1x768320
fprintf('%5d Digits of class %d\n',size(D,1),d); %print to screen size 1st column of D
save(['test' num2str(d) '.mat'],'D','-mat'); %save D to testX.mat
end;
% Work with trainig files second
f = fopen('train-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');
g = fopen('train-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');
fprintf(1,'Starting to convert Training MNIST images (prints 60 dots)\n');
n = 1000;
Df = cell(1,10);
for d=0:9,
Df{d+1} = fopen(['digit' num2str(d) '.ascii'],'w');
end;
for i=1:60,
fprintf('.');
rawimages = fread(f,28*28*n,'uchar');
rawlabels = fread(g,n,'uchar');
rawimages = reshape(rawimages,28*28,n);
for j=1:n,
fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
fprintf(Df{rawlabels(j)+1},'\n');
end;
end;
fprintf(1,'\n');
for d=0:9,
fclose(Df{d+1});
D = load(['digit' num2str(d) '.ascii'],'-ascii'); %5949 x 784 last one
fprintf('%5d Digits of class %d\n',size(D,1),d); % 5949 Digits of class 9
save(['digit' num2str(d) '.mat'],'D','-mat');
end;
%dos('rm *.ascii');
dos('erase *.ascii');
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program reads raw MNIST files available at
% http://yann.lecun.com/exdb/mnist/
% and converts them to files in matlab format
% Before using this program you first need to download files:
% train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
% t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
% and gunzip them. You need to allocate some space for this.
% This program was originally written by Yee Whye Teh
% Work with test files first
fprintf(1,'You first need to download files:\n train-images-idx3-ubyte.gz\n train-labels-idx1-ubyte.gz\n t10k-images-idx3-ubyte.gz\n t10k-labels-idx1-ubyte.gz\n from http://yann.lecun.com/exdb/mnist/\n and gunzip them \n');
f = fopen('t10k-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');
%{
count=4
octave:6> a
a =
50855936
270991360
469762048
469762048
%}
g = fopen('t10k-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');
%{
count=2
octave:16> g
g = 5
octave:17> l
l =
17301504
270991360
%}
fprintf(1,'Starting to convert Test MNIST images (prints 10 dots) \n');
n = 1000;
Df = cell(1,10);
for d=0:9,
Df{d+1} = fopen(['test' num2str(d) '.ascii'],'w'); %test1.ascii test2.ascii usw
end;
for i=1:10, % read number i 's 1000 raw labels and 784 pixel 1000 raw images , total of 10000 numbers
fprintf('.');
rawimages = fread(f,28*28*n,'uchar'); %images
rawlabels = fread(g,n,'uchar'); %labels
rawimages = reshape(rawimages,28*28,n); % 784 x 1000
for j=1:n, % write 1000 images and labels of the current number
fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j)); %fprintf to the Df recorded file pointer according to the raw label value, number j's images.
fprintf(Df{rawlabels(j)+1},'\n'); % each number read is written to the corresponding testX.ascii file. 1000 cases and 784 pixels each
end;
end;
%fprintf(1,'\n');
for d=0:9, %convert ascii to mat file and print its length
fclose(Df{d+1});
D = load(['test' num2str(d) '.ascii'],'-ascii'); %load each file dedicated to a number into D 1x768320
fprintf('%5d Digits of class %d\n',size(D,1),d); %print to screen size 1st column of D
save(['test' num2str(d) '.mat'],'D','-mat'); %save D to testX.mat
end;
% Work with trainig files second
f = fopen('train-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');
g = fopen('train-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');
fprintf(1,'Starting to convert Training MNIST images (prints 60 dots)\n');
n = 1000;
Df = cell(1,10);
for d=0:9,
Df{d+1} = fopen(['digit' num2str(d) '.ascii'],'w');
end;
for i=1:60,
fprintf('.');
rawimages = fread(f,28*28*n,'uchar');
rawlabels = fread(g,n,'uchar');
rawimages = reshape(rawimages,28*28,n);
for j=1:n,
fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
fprintf(Df{rawlabels(j)+1},'\n');
end;
end;
fprintf(1,'\n');
for d=0:9,
fclose(Df{d+1});
D = load(['digit' num2str(d) '.ascii'],'-ascii'); %5949 x 784 last one
fprintf('%5d Digits of class %d\n',size(D,1),d); % 5949 Digits of class 9
save(['digit' num2str(d) '.mat'],'D','-mat');
end;
%dos('rm *.ascii');
dos('erase *.ascii');
mnistdeepauto analysis 1
%mnistdeepauto.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program pretrains a deep autoencoder for MNIST dataset
% You can set the maximum number of epochs for pretraining each layer
% and you can set the architecture of the multilayer net.
clear all
close all
maxepoch=1; %In the Science paper we use maxepoch=50, but it works just fine. maxepoch=10
numhid=1000; numpen=500; numpen2=250; numopen=30;
fprintf(1,'Converting Raw files into Matlab format \n');
converter;
fprintf(1,'Pretraining a deep autoencoder. \n');
fprintf(1,'The Science paper used 50 epochs. This uses %3i \n', maxepoch);
makebatches;
[numcases numdims numbatches]=size(batchdata); %100x784x600
fprintf(1,'Pretraining Layer 1 with RBM: %d-%d \n',numdims,numhid); %784, 1000
restart=1;
rbm;
hidrecbiases=hidbiases; % 1x1000
save mnistvh vishid hidrecbiases visbiases; %784x1000 1x1000 1x784
fprintf(1,'\nPretraining Layer 2 with RBM: %d-%d \n',numhid,numpen); % 1000, 500
batchdata=batchposhidprobs; %100x1000x600
numhid=numpen; % 500
restart=1;
rbm;
hidpen=vishid; penrecbiases=hidbiases; hidgenbiases=visbiases;
save mnisthp hidpen penrecbiases hidgenbiases; % 1000x500 1x500 1x1000
fprintf(1,'\nPretraining Layer 3 with RBM: %d-%d \n',numpen,numpen2); % 500, 250
batchdata=batchposhidprobs; %100x500x600
numhid=numpen2; % 250
restart=1;
rbm;
hidpen2=vishid; penrecbiases2=hidbiases; hidgenbiases2=visbiases;
save mnisthp2 hidpen2 penrecbiases2 hidgenbiases2; % 500x250 1x250 1x500
fprintf(1,'\nPretraining Layer 4 with RBM: %d-%d \n',numpen2,numopen); %250, 30
batchdata=batchposhidprobs; %100x250x600
numhid=numopen; % 30
restart=1;
rbmhidlinear;
hidtop=vishid; toprecbiases=hidbiases; topgenbiases=visbiases;
save mnistpo hidtop toprecbiases topgenbiases; % 250x30 1x30 1x250
backprop;
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program pretrains a deep autoencoder for MNIST dataset
% You can set the maximum number of epochs for pretraining each layer
% and you can set the architecture of the multilayer net.
clear all
close all
maxepoch=1; %In the Science paper we use maxepoch=50, but it works just fine. maxepoch=10
numhid=1000; numpen=500; numpen2=250; numopen=30;
fprintf(1,'Converting Raw files into Matlab format \n');
converter;
fprintf(1,'Pretraining a deep autoencoder. \n');
fprintf(1,'The Science paper used 50 epochs. This uses %3i \n', maxepoch);
makebatches;
[numcases numdims numbatches]=size(batchdata); %100x784x600
fprintf(1,'Pretraining Layer 1 with RBM: %d-%d \n',numdims,numhid); %784, 1000
restart=1;
rbm;
hidrecbiases=hidbiases; % 1x1000
save mnistvh vishid hidrecbiases visbiases; %784x1000 1x1000 1x784
fprintf(1,'\nPretraining Layer 2 with RBM: %d-%d \n',numhid,numpen); % 1000, 500
batchdata=batchposhidprobs; %100x1000x600
numhid=numpen; % 500
restart=1;
rbm;
hidpen=vishid; penrecbiases=hidbiases; hidgenbiases=visbiases;
save mnisthp hidpen penrecbiases hidgenbiases; % 1000x500 1x500 1x1000
fprintf(1,'\nPretraining Layer 3 with RBM: %d-%d \n',numpen,numpen2); % 500, 250
batchdata=batchposhidprobs; %100x500x600
numhid=numpen2; % 250
restart=1;
rbm;
hidpen2=vishid; penrecbiases2=hidbiases; hidgenbiases2=visbiases;
save mnisthp2 hidpen2 penrecbiases2 hidgenbiases2; % 500x250 1x250 1x500
fprintf(1,'\nPretraining Layer 4 with RBM: %d-%d \n',numpen2,numopen); %250, 30
batchdata=batchposhidprobs; %100x250x600
numhid=numopen; % 30
restart=1;
rbmhidlinear;
hidtop=vishid; toprecbiases=hidbiases; topgenbiases=visbiases;
save mnistpo hidtop toprecbiases topgenbiases; % 250x30 1x30 1x250
backprop;
Subscribe to:
Posts (Atom)