A Handson Analysis
of a Backpropagation Example
Ali Rıza SARAL
I analized
an MNIST number recognition program written in Python. It uses backpropagation.
The code is
written for Python 2.6 or 2.7. Michal Daniel Dobrzanski has a repository for
Python 3
[here](https://github.com/MichalDanielDobrzanski/DeepLearningPython35).
I found this code from # Code samples for
"Neural Networks and Deep Learning".
This
repository contains code samples for Nielsen’s book on ["Neural Networks
and Deep
Learning"](http://neuralnetworksanddeeplearning.com).
MIT License
Copyright
(c) 2012-2018 Michael Nielsen
It can
easily be found on the internet...
I put
sys.exit(1) commands in order to stop and check various variables. I added the outputs at these points so that
the flow of the program can easily be viewed.
I also added
a full output of print statements that I added for the first epoche.
Cheers.
Ali R+
Runner.py--------------------------------------------------------------------------------------------
# -*-
coding: utf-8 -*-
"""
Created on
Sat Mar 23 17:38:51 2024
@author: ars
"""
import
mnist_loader
training_data,
validation_data, test_data = mnist_loader.load_data_wrapper()
training_data
= list(training_data) # Convert zip
object to list
validation_data
= list(validation_data) # Convert zip
object to list
test_data =
list(test_data) # Convert zip object to
list
import
network
net =
network.Network([784, 30, 10])
#%%
net.SGD(training_data,30,
10, 3.0, test_data=test_data)
"""
runfile('C:/Users/ars/ARStensorflow/00vbt/ex1/runner.py',
wdir='C:/Users/ars/ARStensorflow/00vbt/ex1')
Reloaded
modules: mnist_loader, network
Epoch 0:
8076 / 10000
Epoch 1:
8277 / 10000
Epoch 2:
8734 / 10000
Epoch 3:
9251 / 10000
Epoch 4:
9336 / 10000
Epoch 5:
9383 / 10000
Epoch 6:
9385 / 10000
Epoch 7:
9407 / 10000
Epoch 8:
9426 / 10000
Epoch 9:
9429 / 10000
Epoch 10:
9444 / 10000
Epoch 11:
9442 / 10000
Epoch 12:
9422 / 10000
Epoch 13:
9482 / 10000
Epoch 14:
9464 / 10000
Epoch 15:
9445 / 10000
Epoch 16:
9465 / 10000
Epoch 17:
9470 / 10000
Epoch 18:
9496 / 10000
Epoch 19:
9490 / 10000
Epoch 20:
9477 / 10000
Epoch 21:
9481 / 10000
Epoch 22:
9486 / 10000
Epoch 23:
9508 / 10000
Epoch 24:
9462 / 10000
Epoch 25:
9477 / 10000
Epoch 26:
9477 / 10000
Epoch 27:
9470 / 10000
Epoch 28:
9478 / 10000
Epoch 29:
9463 / 10000
"""
Network.py------------------------------------------------------------------------------------------
"""
network.py
~~~~~~~~~~
A module to
implement the stochastic gradient descent learning
algorithm
for a feedforward neural network.
Gradients are calculated
using
backpropagation. Note that I have
focused on making the code
simple,
easily readable, and easily modifiable.
It is not optimized,
and omits
many desirable features.
"""
####
Libraries
# Standard
library
import
random
import sys
#
Third-party libraries
import numpy
as np
class
Network(object):
def __init__(self, sizes):
"""The list ``sizes``
contains the number of neurons in the
respective layers of the network. For example, if the list
was [2, 3, 1] then it would be a
three-layer network, with the
first layer containing 2 neurons, the
second layer 3 neurons,
and the third layer 1 neuron. The biases and weights for the
network are initialized randomly, using
a Gaussian
distribution with mean 0, and variance
1. Note that the first
layer is assumed to be an input layer,
and by convention we
won't set any biases for those neurons,
since biases are only
ever used in computing the outputs from
later layers."""
print("Sizes:", sizes)
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(y, 1)
for y in sizes[1:]]
for bias in self.biases:
print("bias.shape=",bias.shape)
print("Sizes[1:]:", sizes[1:])
self.weights = [np.random.randn(y, x)
for x, y in
zip(sizes[:-1], sizes[1:])]
print("\nSizes[:-1]:",
sizes[:-1])
print("Sizes[1:]:",
sizes[1:])
for weight in self.weights:
print("weight.shape=",weight.shape)
"""
runcell(0,
'C:/Users/ars/ARStensorflow/00vbt/ex1/runner.py')
Sizes: [784, 30, 10]
bias.shape= (30, 1)
bias.shape= (10, 1)
Sizes[1:]: [30, 10]
Sizes[:-1]: [784, 30]
Sizes[1:]: [30, 10]
weight.shape= (30, 784)
weight.shape= (10, 30)
"""
def feedforward(self, a):
"""Return the output of
the network if ``a`` is input."""
print("Input Layer Activation
(a):")
print("shape(a)=",
np.shape(a))
for i, (b, w) in
enumerate(zip(self.biases, self.weights), 1):
print("\nProcessing
Layer:", i)
print("shape Biases=",
np.shape(b))
print("shape Weights=",
np.shape(w))
z = np.dot(w, a) + b
print("shape Weighted
Input(z)=", np.shape(z))
a = sigmoid(z)
print("shape Output
Activation (a)=", np.shape(a))
#sys.exit(1)
return a
"""
runcell(1,
'C:/Users/ars/ARStensorflow/00vbt/ex1/runner.py')
Input Layer Activation (a):
shape(a)= (784, 1)
Processing Layer: 1
shape Biases= (30, 1)
shape Weights= (30, 784)
shape Weighted Input(z)= (30, 1)
shape Output Activation (a)= (30, 1)
Processing Layer: 2
shape Biases= (10, 1)
shape Weights= (10, 30)
shape Weighted Input(z)= (10, 1)
shape Output Activation (a)= (10, 1)
An exception has occurred, use %tb to see
the full traceback.
SystemExit: 1
"""
def SGD(self, training_data, epochs,
mini_batch_size, eta,
test_data=None):
"""Train the neural
network using mini-batch stochastic
gradient descent. The ``training_data`` is a list of tuples
``(x, y)`` representing the training
inputs and the desired
outputs. The other non-optional parameters are
self-explanatory. If ``test_data`` is provided then the
network will be evaluated against the
test data after each
epoch, and partial progress printed
out. This is useful for
tracking progress, but slows things
down substantially."""
print("\nSGD***************************************************")
print("mini_batch_size=",mini_batch_size)
if test_data:
n_test = len(test_data)
n = len(training_data)
print("len(training_data)=",n)
for j in range(epochs): # Changed xrange to range
random.shuffle(training_data)
mini_batches = [
training_data[k:k+mini_batch_size]
for k in range(0, n,
mini_batch_size)] # Changed xrange to
range
print("shape(mini_batches)=", np.shape(mini_batches))
#sys.exit(1)
"""
SGD---------------
mini_batch_size= 10
len(training_data)= 50000
shape(mini_batches)= (5000, 10, 2)
2-> x,y
An exception has occurred, use %tb
to see the full traceback.
"""
ars2=0
for mini_batch in mini_batches:
ars2+=1
print("ars2=",ars2)
self.update_mini_batch(mini_batch,
eta)
sys.exit(1)
if test_data:
print("Epoch {0}: {1} /
{2}".format(
j,
self.evaluate(test_data), n_test))
else:
print("Epoch {0}
complete".format(j))
def update_mini_batch(self, mini_batch,
eta):
"""Update the network's
weights and biases by applying
gradient descent using backpropagation
to a single mini batch.
The ``mini_batch`` is a list of tuples
``(x, y)``, and ``eta``
is the learning rate."""
print("\nupdate_mini_batch////////////////////////////////////")
nabla_b = [np.zeros(b.shape) for b in
self.biases]
nabla_w = [np.zeros(w.shape) for w in
self.weights]
print("shape
nabla_b=",np.shape(nabla_b))
print("shape
nabla_w=",np.shape(nabla_w))
ars=0
for x, y in mini_batch:
ars+=1
print("\ncalling backprop
#=",ars)
delta_nabla_b, delta_nabla_w =
self.backprop(x, y)
nabla_b = [nb+dnb for nb, dnb in
zip(nabla_b, delta_nabla_b)]
nabla_w = [nw+dnw for nw, dnw in
zip(nabla_w, delta_nabla_w)]
print("nabla_b[0]=",nabla_b[0])
print("nabla_b[1]=",nabla_b[1])
print("nabla_w[0]=",nabla_w[0])
print("nabla_w[1]=",nabla_w[1])
self.weights =
[w-(eta/len(mini_batch))*nw
for w, nw in
zip(self.weights, nabla_w)]
self.biases =
[b-(eta/len(mini_batch))*nb
for b, nb in
zip(self.biases, nabla_b)]
#sys.exit(1)
def backprop(self, x, y):
"""Return a tuple
``(nabla_b, nabla_w)`` representing the
gradient for the cost function
C_x. ``nabla_b`` and
``nabla_w`` are layer-by-layer lists of
numpy arrays, similar
to ``self.biases`` and
``self.weights``."""
print("\nbackprop=====================================")
nabla_b = [np.zeros(b.shape) for b in
self.biases]
nabla_w = [np.zeros(w.shape) for w in
self.weights]
print("xxxxxxxxxxxnabla_b=",nabla_b[:-3])
print("nabla_w=",nabla_w[:-3])
# feedforward
print("\nfeedforward---------------------")
activation = x
activations = [x] # list to store all
the activations, layer by layer
zs = [] # list to store all the z
vectors, layer by layer
for b, w in zip(self.biases,
self.weights):
z = np.dot(w, activation)+b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
print("shape(zs)=",
np.shape(zs))
print("shape(activation)=",
np.shape(activation))
print("shape(activations)=",
np.shape(activations))
# backward pass
print("\nbackward---------------------")
delta =
self.cost_derivative(activations[-1], y) * \
sigmoid_prime(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta,
activations[-2].transpose())
print("shape(delta)=",np.shape(delta))
print("shape(nabla_b)=",np.shape(nabla_b))
print("shape(nabla_w)=",np.shape(nabla_w))
# Note that the variable l in the loop
below is used a little
# differently to the notation in
Chapter 2 of the book. Here,
# l = 1 means the last layer of
neurons, l = 2 is the
# second-last layer, and so on. It's a renumbering of the
# scheme in the book, used here to take
advantage of the fact
# that Python can use negative indices
in lists.
for l in range(2,
self.num_layers): # Changed xrange to
range
print("-l=",-l)
z = zs[-l]
sp = sigmoid_prime(z)
delta =
np.dot(self.weights[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta,
activations[-l-1].transpose())
if l<4 :
print("delta[{}] =
{}".format(l, delta[-l]))
#print("nabla_b[{}] =
{}".format(l, nabla_b[-l][:-3]))
#print("nabla_w[{}] =
{}".format(l, nabla_w[-l][:-3]))
return (nabla_b, nabla_w)
def evaluate(self, test_data):
"""Return the number of
test inputs for which the neural
network outputs the correct result.
Note that the neural
network's output is assumed to be the
index of whichever
neuron in the final layer has the
highest activation."""
test_results =
[(np.argmax(self.feedforward(x)), y)
for (x, y) in
test_data]
return sum(int(x == y) for (x, y) in
test_results)
def cost_derivative(self,
output_activations, y):
"""Return the vector of
partial derivatives \partial C_x /
\partial a for the output
activations."""
return (output_activations-y)
####
Miscellaneous functions
def
sigmoid(z):
"""The sigmoid
function."""
return 1.0/(1.0+np.exp(-z))
def
sigmoid_prime(z):
"""Derivative of the sigmoid
function."""
return sigmoid(z)*(1-sigmoid(z))
"""
runfile('C:/Users/ars/ARStensorflow/00vbt/ex1/runner.py',
wdir='C:/Users/ars/ARStensorflow/00vbt/ex1')
Reloaded
modules: mnist_loader, network
Sizes: [784,
30, 10]
bias.shape=
(30, 1)
bias.shape=
(10, 1)
Sizes[1:]:
[30, 10]
Sizes[:-1]:
[784, 30]
Sizes[1:]:
[30, 10]
weight.shape=
(30, 784)
weight.shape=
(10, 30)
SGD***************************************************
mini_batch_size=
10
len(training_data)=
50000
shape(mini_batches)=
(5000, 10, 2)
ars2= 1
update_mini_batch////////////////////////////////////
shape
nabla_b= (2,)
shape
nabla_w= (2,)
calling
backprop #= 1
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[-1.42634488e-10]
calling
backprop #= 2
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[-1.6675494e-07]
calling
backprop #= 3
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[-6.69058141e-06]
calling backprop
#= 4
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[-0.00990081]
calling
backprop #= 5
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[-2.21166674e-13]
calling
backprop #= 6
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[5.21994737e-06]
calling backprop
#= 7
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[-1.46279172e-09]
calling
backprop #= 8
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[7.47492242e-08]
calling
backprop #= 9
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[-1.09874347e-09]
calling backprop
#= 10
backprop=====================================
xxxxxxxxxxxnabla_b=
[]
nabla_w= []
feedforward---------------------
shape(zs)=
(2,)
shape(activation)=
(10, 1)
shape(activations)=
(3,)
backward---------------------
shape(delta)=
(10, 1)
shape(nabla_b)=
(2,)
shape(nabla_w)=
(2,)
-l= -2
delta[2] =
[-0.00207019]
nabla_b[0]=
[[-0.01325782]
[-0.03262269]
[-0.03594767]
[ 0.00368335]
[ 0.09522265]
[-0.09147546]
[-0.01036631]
[ 0.16768215]
[-0.09533466]
[ 0.00056786]
[-0.01581537]
[ 0.01267168]
[-0.00257171]
[ 0.09161304]
[-0.00379968]
[ 0.02993509]
[-0.01039408]
[-0.053856
]
[-0.02370034]
[-0.03059559]
[-0.11553744]
[ 0.00219626]
[ 0.20589038]
[-0.01510159]
[ 0.01420204]
[ 0.06503109]
[-0.17590391]
[-0.03317643]
[-0.01197257]
[-0.08747135]]
nabla_b[1]=
[[0.36830155]
[0.62675642]
[0.68100886]
[0.00181689]
[0.04951175]
[0.47319558]
[0.85981884]
[0.33877696]
[0.31558633]
[0.3640103 ]]
nabla_w[0]=
[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]
nabla_w[1]=
[[ 4.10015805e-01 3.22926842e-01 2.23404037e-01 2.18877662e-01
2.70085532e-01 3.18373857e-02 3.52924774e-01 9.47840951e-02
1.59868586e-01 4.77892724e-02 2.20288346e-01 2.20205925e-01
-1.68261942e-02 3.00148440e-01 1.92237370e-02 1.42144195e-01
2.30670517e-01 3.03449621e-01 1.52493627e-02 1.50987158e-01
1.68088798e-01 1.42725187e-01 5.07676562e-02 -6.74804386e-05
1.97445388e-02 2.00607563e-01 1.18179665e-01 3.81606047e-01
1.72484435e-01 -7.02472066e-02]
[ 5.53954755e-01 5.63129601e-01 4.63566614e-01 4.35228659e-01
5.00773768e-01 3.55405686e-02 4.50396288e-01 2.81969035e-01
1.52322864e-01 2.96873425e-01 2.81555015e-01 4.58461317e-01
2.73101931e-01 4.73329622e-01 4.81376679e-02 4.52391721e-01
2.15350865e-01 2.63431582e-01 1.71456348e-02 3.71569920e-01
2.47275902e-01 3.13953178e-02 1.61893015e-01 9.59780389e-03
3.23937381e-02 3.90550915e-01 2.13925323e-01 5.54998773e-01
4.92759817e-01 1.43993109e-01]
[ 6.16086888e-01 6.02604002e-01 5.37537213e-01 5.47739625e-01
6.00823224e-01 7.10742376e-02 7.70878436e-01 2.10498456e-01
1.97151373e-01 3.52736622e-01 4.84395298e-01 5.00352136e-01
1.55482741e-01 5.81145995e-01 -1.08148860e-01 3.38641639e-01
2.76317801e-01 3.50592185e-01 2.40462361e-02 4.66033878e-01
2.64761425e-01 1.56685705e-01 1.56309513e-01 -1.06681221e-01
-1.06243615e-01 3.48106487e-01 1.20492110e-01 6.84493280e-01
4.54685392e-01 1.51535600e-01]
[ 1.81767698e-03 1.63954575e-03 8.45787544e-04 1.50209055e-03
7.12282995e-04 7.36863064e-05 1.42559910e-03 1.18807871e-03
4.01618226e-04 2.33191992e-04 5.47004368e-04 1.43919597e-03
1.23651581e-03 1.48510588e-03 7.55718779e-05 6.77655758e-04
5.66081161e-04 9.58860915e-04 4.96835894e-05 3.02563859e-04
3.98767564e-04 -3.57741365e-05 3.63975742e-04 1.87809475e-06
4.43689259e-05 5.56522250e-04 8.41395617e-04 1.81339297e-03
1.85298963e-03 2.47101392e-04]
[ 1.94439021e-02 4.89760463e-02 1.24454742e-01 -7.92407401e-02
6.08570458e-02 -2.11839865e-03 4.01604278e-02 1.25026192e-01
1.32171376e-01 -8.82951231e-02 1.36229311e-01 -1.02288539e-01
-3.32739987e-03 -2.49987223e-02
-1.49643101e-03 -8.03672884e-02
-6.31984296e-03 3.65717561e-02 -1.46002147e-03 2.60672916e-02
-7.76295902e-02 1.42819964e-01 -5.78882657e-02 8.88275228e-05
-6.01227529e-03 4.38541440e-02 2.73708990e-04 3.35288397e-03
-2.77810688e-02 4.60318291e-04]
[ 4.54299849e-01 3.99142986e-01 2.35076136e-01 3.96136952e-01
3.29510653e-01 1.04237984e-02 4.20897166e-01 2.49798459e-01
8.93637190e-02 1.64013704e-01 1.67286863e-01 4.13558945e-01
1.75480476e-01 4.31321943e-01 2.97122732e-02 2.00977999e-01
1.60210728e-01 3.38856419e-01 1.29250425e-02 1.86653617e-01
1.92044382e-01 2.01207009e-02 1.60278587e-01 1.14329229e-02
2.50601189e-02 3.43704920e-01 1.72189596e-01 4.43404704e-01
3.53722686e-01 3.30965866e-02]
[ 7.26438390e-01 8.06274286e-01 4.86621269e-01 6.70054704e-01
5.74081103e-01 5.56085447e-02 7.62946744e-01 4.06732950e-01
2.03630890e-01 3.34200326e-01 4.28793400e-01 6.86806677e-01
3.39252118e-01 7.67125687e-01 8.05907230e-02 5.05392890e-01
3.79526873e-01 5.61154707e-01 2.25712780e-02 4.32248073e-01
2.81019759e-01 1.43847117e-01 1.88166354e-01 2.41122918e-02
6.44215402e-02 5.01391555e-01 2.48282226e-01 7.21868789e-01
6.18708125e-01 1.09112722e-01]
[ 1.14430196e-01 3.32483412e-01 6.14609665e-02 3.98638790e-01
2.38280250e-01 4.47666721e-02 3.70386318e-01 1.56943973e-01
-4.48693372e-02 2.53068353e-01 1.69090010e-01 4.36377888e-01
1.93474746e-01 4.53510581e-01 1.01570738e-01 2.75188231e-01
2.94291190e-01 3.45084058e-01 5.12236008e-03 2.18546578e-01
6.94532403e-02 4.68230011e-02 7.51438959e-02 6.34173176e-02
9.61252736e-02 2.85222148e-01 1.19664159e-01 1.25720761e-01
2.59277421e-01 9.03950354e-02]
[ 2.88058370e-01 3.03812804e-01 1.99450118e-01 1.55420516e-01
2.11262547e-01 2.65573576e-02 1.83563674e-01 2.20535146e-01
1.43914304e-01 9.17437533e-02 6.47602450e-02 1.47915954e-01
2.51352124e-01 1.72069697e-01 7.58256310e-03 1.63989432e-01
1.74382007e-02 1.58236675e-01 1.64225561e-03 1.65530666e-01
3.89096521e-02 3.22221605e-02 5.88510658e-02 1.52274825e-04
2.27319868e-03 1.08819243e-01 1.65643861e-01 2.88621521e-01
2.72827422e-01 9.84375373e-02]
[ 9.40141076e-02 4.36210985e-01 8.63798720e-02 2.28816776e-01
2.99960206e-01 6.51496851e-02 8.56031711e-02 3.06853771e-01
6.24451582e-02 4.43259288e-01 -9.20583316e-02 3.01430208e-01
4.94280184e-01 2.98448455e-01 2.19879497e-01 4.72259197e-01
5.00616961e-02 3.36038311e-01 -1.99755570e-02 4.71118109e-01
-2.53260615e-02 1.17042481e-02 1.32364766e-01 1.04075860e-01
1.29992669e-01 3.04113306e-01 1.89165779e-01 9.19560428e-02
2.56775068e-01 2.17348900e-01]]
C:\ProgramData\anaconda3\lib\site-packages\numpy\core\fromnumeric.py:2009:
VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences
(which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths
or shapes) is deprecated. If you meant to do this, you must specify
'dtype=object' when creating the ndarray.
result = asarray(a).shape
An exception
has occurred, use %tb to see the full traceback.
SystemExit:
1
"""
mnist_loader.py------------------------------------------------------------------------------------
"""
mnist_loader
~~~~~~~~~~~~
A library to
load the MNIST image data. For details
of the data
structures
that are returned, see the doc strings for ``load_data``
and
``load_data_wrapper``. In practice,
``load_data_wrapper`` is the
function
usually called by our neural network code.
"""
####
Libraries
# Standard
library
import
pickle
import gzip
#
Third-party libraries
import numpy
as np
def
load_data():
"""Return the MNIST data as
a tuple containing the training data,
the validation data, and the test data.
The ``training_data`` is returned as a
tuple with two entries.
The first entry contains the actual
training images. This is a
numpy ndarray with 50,000 entries. Each entry is, in turn, a
numpy ndarray with 784 values, representing
the 28 * 28 = 784
pixels in a single MNIST image.
The second entry in the ``training_data``
tuple is a numpy ndarray
containing 50,000 entries. Those entries are just the digit
values (0...9) for the corresponding images
contained in the first
entry of the tuple.
The ``validation_data`` and ``test_data``
are similar, except
each contains only 10,000 images.
This is a nice data format, but for use in
neural networks it's
helpful to modify the format of the
``training_data`` a little.
That's done in the wrapper function
``load_data_wrapper()``, see
below.
"""
with gzip.open('./data/mnist.pkl.gz', 'rb')
as f:
training_data, validation_data,
test_data = pickle.load(f, encoding='latin1')
return (training_data, validation_data,
test_data)
def
load_data_wrapper():
"""Return a tuple containing
``(training_data, validation_data,
test_data)``. Based on ``load_data``, but
the format is more
convenient for use in our implementation of
neural networks.
In particular, ``training_data`` is a list
containing 50,000
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
containing the input image. ``y`` is a 10-dimensional
numpy.ndarray representing the unit vector
corresponding to the
correct digit for ``x``.
``validation_data`` and ``test_data`` are
lists containing 10,000
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
numpy.ndarry containing the input image,
and ``y`` is the
corresponding classification, i.e., the
digit values (integers)
corresponding to ``x``.
Obviously, this means we're using slightly
different formats for
the training data and the validation / test
data. These formats
turn out to be the most convenient for use
in our neural network
code."""
tr_d, va_d, te_d = load_data()
training_inputs = [np.reshape(x, (784, 1))
for x in tr_d[0]]
training_results = [vectorized_result(y)
for y in tr_d[1]]
training_data = list(zip(training_inputs,
training_results)) # Convert zip object
to list
validation_inputs = [np.reshape(x, (784,
1)) for x in va_d[0]]
validation_data =
list(zip(validation_inputs, va_d[1])) #
Convert zip object to list
test_inputs = [np.reshape(x, (784, 1)) for
x in te_d[0]]
test_data = list(zip(test_inputs,
te_d[1])) # Convert zip object to list
return (training_data, validation_data,
test_data)
def
vectorized_result(j):
"""Return a 10-dimensional
unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output
from the neural
network."""
e = np.zeros((10, 1))
e[j] = 1.0
return e
exploreNetwork.py -------------------------------------------------------------------------------
# -*-
coding: utf-8 -*-
"""
Created on
Wed Apr 24 21:51:30 2024
@author: ars
To play and
test the network.py ????????????????
"""
import numpy
as np
"""Return
a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output
from the neural
network."""
def
vectorized_result(j):
"""Return a 10-dimensional
unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output
from the neural
network."""
e = np.zeros((3, 1))
e[j] = 1.0
return e
print(vectorized_result(2))
#%%
a = [1, 2,
3, 4, 5, 6]
print(a[-1:])
print(a[1])
for x, y in
zip(a[:-1], a[1:]):
print('x=%d y=%d' % (x, y))
#%%
sizes = [3,
2, 1]
biases = [y
for y in sizes[1:]]
print("---------",biases)
weights =
[(x, y) for x, y in zip(sizes[:-1], sizes[1:])]
print("sizes[:-1]=",sizes[:-1],"
sizes[1:]", sizes[1:])
print("--------",weights)
#%%
import numpy
as np
# Define the
sigmoid function
def
sigmoid(z):
return 1 / (1 + np.exp(-z))
class
NeuralNetwork:
def __init__(self, sizes):
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(y, 1)
for y in sizes[1:]]
self.weights = [np.random.randn(y, x)
for x, y in zip(sizes[:-1], sizes[1:])]
def feedforward(self, a):
"""Return the output of
the network if ``a`` is input."""
for b, w in zip(self.biases,
self.weights):
z = np.dot(w, a) + b
a = sigmoid(z)
return a
# Assuming
the example sizes and input data
sizes = [3,
2, 1]
a =
np.array([[0.5], [0.5], [0.5]])
# Create the
neural network
net = NeuralNetwork(sizes)
# Perform
forward propagation
output =
net.feedforward(a)
print("Output
after forward propagation:")
print(output)
#%%
class
NeuralNetwork:
def __init__(self, sizes):
self.num_layers = len(sizes)
self.sizes = sizes
self.biases = [np.random.randn(y, 1)
for y in sizes[1:]]
self.weights = [np.random.randn(y, x)
for x, y in zip(sizes[:-1], sizes[1:])]
def feedforward(self, a):
"""Return the output of
the network if ``a`` is input."""
print("Input Layer Activation (a):")
print(a)
for i, (b, w) in
enumerate(zip(self.biases, self.weights), 1):
print("\nProcessing
Layer:", i)
print("Biases (b) for
Layer", i, ":")
print(b)
print("Weights (w) for
Layer", i, ":")
print(w)
z = np.dot(w, a) + b
print("Weighted Input (z) for
Layer", i, ":")
print(z)
a = sigmoid(z)
print("Output Activation (a)
after applying activation function for Layer", i, ":")
print(a)
return a
# Example
sizes and input data
sizes = [3,
2, 1]
a =
np.array([[0.5], [0.5], [0.5]])
# Create the
neural network
net =
NeuralNetwork(sizes)
# Perform
forward propagation
output =
net.feedforward(a)
print("\nFinal
Output after forward propagation:")
print(output)
"""
runcell(3,
'C:/Users/ars/ARStensorflow/00vbt/anamolyTransformer/convertNumOneOfALL.py')
Input Layer
Activation (a):
[[0.5]
[0.5]
[0.5]]
Processing
Layer: 1
Biases (b)
for Layer 1 :
[[-0.34238311]
[-0.31615907]]
Weights (w)
for Layer 1 :
[[-0.84184229
-0.06860264 -0.35556292]
[ 0.18949908
1.41720536 0.52521287]]
Weighted
Input (z) for Layer 1 :
[[-0.97538703]
[ 0.74979958]]
Output
Activation (a) after applying activation function for Layer 1 :
[[0.27380806]
[0.67913503]]
Processing
Layer: 2
Biases (b)
for Layer 2 :
[[0.06311896]]
Weights (w)
for Layer 2 :
[[-1.74024052
-1.10110167]]
Weighted
Input (z) for Layer 2 :
[[-1.16116963]]
Output
Activation (a) after applying activation function for Layer 2 :
[[0.23845482]]
Final Output
after forward propagation:
[[0.23845482]]
"""