Tuesday 9 October 2018

mnistdeepauto analysis 2

% converter.m
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.

% This program reads raw MNIST files available at
% http://yann.lecun.com/exdb/mnist/
% and converts them to files in matlab format
% Before using this program you first need to download files:
% train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
% t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
% and gunzip them. You need to allocate some space for this.

% This program was originally written by Yee Whye Teh

% Work with test files first
fprintf(1,'You first need to download files:\n train-images-idx3-ubyte.gz\n train-labels-idx1-ubyte.gz\n t10k-images-idx3-ubyte.gz\n t10k-labels-idx1-ubyte.gz\n from http://yann.lecun.com/exdb/mnist/\n and gunzip them \n');

f = fopen('t10k-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');
%{
count=4
octave:6> a
a =

    50855936
   270991360
   469762048
   469762048
%} 
g = fopen('t10k-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');
%{
count=2
octave:16> g
g =  5
octave:17> l
l =

    17301504
   270991360
%}
 
fprintf(1,'Starting to convert Test MNIST images (prints 10 dots) \n');
n = 1000;

Df = cell(1,10);
for d=0:9,
  Df{d+1} = fopen(['test' num2str(d) '.ascii'],'w');   %test1.ascii test2.ascii usw
end;
 
for i=1:10,   % read number i 's 1000 raw labels and 784 pixel 1000 raw images , total of 10000 numbers
  fprintf('.');
  rawimages = fread(f,28*28*n,'uchar');  %images
  rawlabels = fread(g,n,'uchar'); %labels
  rawimages = reshape(rawimages,28*28,n);  % 784 x 1000

  for j=1:n,  % write 1000 images and labels of the current number
    fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j)); %fprintf to the Df recorded file pointer according to the raw label value,  number j's images.
fprintf(Df{rawlabels(j)+1},'\n'); % each number read is written to the corresponding testX.ascii file. 1000 cases and 784 pixels each
  end;
end;

%fprintf(1,'\n');
for d=0:9,  %convert ascii to mat file and print its length
  fclose(Df{d+1});
  D = load(['test' num2str(d) '.ascii'],'-ascii'); %load each file dedicated to a number into D 1x768320
  fprintf('%5d Digits of class %d\n',size(D,1),d); %print to screen size 1st column of D
  save(['test' num2str(d) '.mat'],'D','-mat'); %save D to testX.mat
end;


% Work with trainig files second
f = fopen('train-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');

g = fopen('train-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');

fprintf(1,'Starting to convert Training MNIST images (prints 60 dots)\n');
n = 1000;

Df = cell(1,10);
for d=0:9,
  Df{d+1} = fopen(['digit' num2str(d) '.ascii'],'w');
end;

for i=1:60,
  fprintf('.');
  rawimages = fread(f,28*28*n,'uchar');
  rawlabels = fread(g,n,'uchar');
  rawimages = reshape(rawimages,28*28,n);

  for j=1:n,
    fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
    fprintf(Df{rawlabels(j)+1},'\n');
  end;
end;

fprintf(1,'\n');
for d=0:9,
  fclose(Df{d+1});
  D = load(['digit' num2str(d) '.ascii'],'-ascii');  %5949 x 784 last one
  fprintf('%5d Digits of class %d\n',size(D,1),d); % 5949 Digits of class 9
  save(['digit' num2str(d) '.mat'],'D','-mat');
end;

%dos('rm *.ascii');
dos('erase *.ascii');