SIMPLE TWO PARALLEL INPUTS AND ONE OUTPUT TRANSFORMER EXAMPLE
This is a
simple example for a transformer taking 2 input arrays and producing a single
output array.
I will come
with further examples of using multiple layers and complex structures with
transformer approach. Please note that
if one of the inputs were a picture and the other a text the transformer would
recognise that what it sees a CAT...
The program
takes two number arrays of length 5. It
calculates the average of two numbers of the same sequence in these two
arrays. This is a working example.
# -*-
coding: utf-8 -*-
"""
Created on
Wed Feb 28 15:16:28 2024
@author: ars
"""
import
tensorflow as tf
from
tensorflow.keras import layers, Model
# Define the
transformer layer
class
TransformerLayer(layers.Layer):
def __init__(self, d_model, num_heads, dff,
rate=0.1):
super(TransformerLayer,
self).__init__()
self.mha = layers.MultiHeadAttention(num_heads=num_heads,
key_dim=d_model)
self.ffn = tf.keras.Sequential([
layers.Dense(dff,
activation='relu'),
layers.Dense(d_model)
])
self.layernorm1 =
layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 =
layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs, training=True):
attn_output = self.mha(inputs, inputs)
attn_output =
self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs +
attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output,
training=training)
out2 = self.layernorm2(out1 + ffn_output)
return out2
# Define the
input shape
input_shape
= (5,)
# Define the
inputs
input1 =
layers.Input(shape=input_shape, name='input1')
input2 =
layers.Input(shape=input_shape, name='input2')
#
Concatenate the inputs
concatenated
= layers.Concatenate(axis=1)([input1, input2])
# Reshape
for transformer input
reshape =
layers.Reshape((2, 5))(concatenated)
#
Transformer layer
transformer_layer
= TransformerLayer(d_model=5, num_heads=2, dff=32)
# Apply
transformer layer
transformed_output
= transformer_layer(reshape)
# Global
average pooling
average_output
= layers.GlobalAveragePooling1D()(transformed_output)
# Output
layer
output =
layers.Dense(5, activation='linear')(average_output)
# Build the
model
model =
Model(inputs=[input1, input2], outputs=output)
# Compile
the model
model.compile(optimizer='adam',
loss='mean_squared_error', metrics=['mae'])
# Print the
model summary
model.summary()
#%%#---------------------------------------------------------------------------------------------
import numpy
as np
# Generate
some random test data
num_samples
= 1000
input1_test
= np.random.rand(num_samples, 5)
input2_test
= np.random.rand(num_samples, 5)
# Calculate
the average manually for comparison
average_manual
= (input1_test + input2_test) / 2.0
# Check the
shape of the test data
print("Shape
of input1_test:", input1_test.shape)
print("Shape
of input2_test:", input2_test.shape)
# Test the
model
predictions
= model.predict([input1_test, input2_test])
# Compare
the predictions with the manual calculation
for i in
range(5):
print("\nSample", i+1, " -
input1:", input1_test[i])
print("Sample", i+1, " -
input2:", input2_test[i])
print("Sample", i+1, " -
Manual Average:", average_manual[i], " - Predicted Average:",
predictions[i])
Model:
"model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input1 (InputLayer) [(None, 5)] 0 []
input2 (InputLayer) [(None, 5)] 0 []
concatenate_1 (Concatenate (None, 10) 0 ['input1[0][0]',
)
'input2[0][0]']
reshape_1 (Reshape) (None, 2, 5) 0 ['concatenate_1[0][0]']
transformer_layer_1 (Trans (None, 2, 5) 612 ['reshape_1[0][0]']
formerLayer)
global_average_pooling1d ( (None, 5) 0 ['transformer_layer_1[0][0]']
GlobalAveragePooling1D)
dense_4 (Dense) (None, 5) 30 ['global_average_pooling1d[0][
0]']
OUTPUT:--------------------------------------------------------------------
runcell(1,
'C:/Users/ars/ARStensorflow/0parallelARS/untitled0.py')
Shape of
input1_test: (1000, 5)
Shape of
input2_test: (1000, 5)
32/32
[==============================] - 0s 3ms/step
Sample
1 - input1: [0.40338464 0.4324481 0.20288709 0.85402018 0.69681939]
Sample
1 - input2: [0.92298319 0.39169773
0.47804982 0.80640389 0.96490146]
Sample
1 - Manual Average: [0.66318391
0.41207291 0.34046846 0.83021203 0.83086043]
- Predicted Average: [ 0.41462776
0.16984153 1.310897 1.2504835
-0.22809528]
Sample
2 - input1: [0.7499501 0.1342272
0.09384698 0.32732734 0.6872341 ]
Sample
2 - input2: [0.76973532 0.10832048
0.32817306 0.60530674 0.61595368]
Sample
2 - Manual Average: [0.75984271
0.12127384 0.21101002 0.46631704 0.65159389]
- Predicted Average: [-0.4052685
0.01396421 -0.05984974 1.1452911 -0.23754917]
Sample
3 - input1: [0.3095081 0.25686936 0.83059622 0.20532096 0.80553001]
Sample
3 - input2: [0.66867723 0.38651418
0.36205749 0.91205604 0.13740754]
Sample
3 - Manual Average: [0.48909267
0.32169177 0.59632685 0.5586885
0.47146877] - Predicted Average:
[-0.49286604 -0.0415334 -0.49873334 0.53398705 -0.18983857]
Sample
4 - input1: [0.363748 0.96901881 0.760858 0.31562726 0.50555152]
Sample
4 - input2: [0.55109577 0.87754127
0.87178709 0.42192351 0.3426839 ]
Sample
4 - Manual Average: [0.45742188
0.92328004 0.81632255 0.36877539 0.42411771]
- Predicted Average: [-0.89761406
0.03873652 -1.0142024 1.4196234 0.16065012]
Sample
5 - input1: [0.15684707 0.07559663
0.26578657 0.00073441 0.31646286]
Sample
5 - input2: [0.17205819 0.57338043
0.40403394 0.49935905 0.76232375]
Sample
5 - Manual Average: [0.16445263
0.32448853 0.33491026 0.25004673 0.5393933 ]
- Predicted Average: [0.07035738 0.19064039 0.74532485 1.6838341 0.1143308 ]
runcell(1,
'C:/Users/ars/ARStensorflow/0parallelARS/untitled0.py')
Shape of
input1_test: (1000, 5)
Shape of
input2_test: (1000, 5)
32/32
[==============================] - 0s 3ms/step
Sample
1 - input1: [0.15694803 0.21293792
0.47923616 0.98860532 0.07281257]
Sample
1 - input2: [0.92906837 0.68171534
0.74526118 0.40535621 0.77818246]
Sample
1 - Manual Average: [0.5430082 0.44732663 0.61224867 0.69698076
0.42549751] - Predicted Average:
[-0.6867658 -0.46808803 -0.12430111 0.6539723
-0.44549745]
Sample
2 - input1: [0.28995958 0.38700105
0.7171286 0.8887408 0.10529674]
Sample
2 - input2: [0.26600798 0.42171587
0.38329856 0.51847964 0.07816427]
Sample
2 - Manual Average: [0.27798378
0.40435846 0.55021358 0.70361022 0.0917305 ]
- Predicted Average: [-1.2057661
-0.6692148 -1.1835346 -0.05061923 -0.49671552]
Sample
3 - input1: [0.31312179 0.59934665
0.64874245 0.26271201 0.52528184]
Sample
3 - input2: [0.28762358 0.36924366
0.05406523 0.9903467 0.01666271]
Sample
3 - Manual Average: [0.30037268
0.48429516 0.35140384 0.62652935 0.27097228]
- Predicted Average: [ 0.04163997 -0.26772195 0.6635431
0.24431774 -0.24538673]
Sample
4 - input1: [0.26770316 0.22319183
0.72713793 0.55752506 0.39540953]
Sample
4 - input2: [0.53671129 0.15183273
0.55340938 0.0380593 0.95026388]
Sample
4 - Manual Average: [0.40220723
0.18751228 0.64027366 0.29779218 0.6728367 ]
- Predicted Average: [-1.1873685
-0.5455142 -0.77665997 1.1318963
-0.22853746]
Sample
5 - input1: [0.15905445 0.74677288
0.99688255 0.38000617 0.08582965]
Sample
5 - input2: [0.09005614 0.42813253
0.72824425 0.28000251 0.36697629]
Sample
5 - Manual Average: [0.1245553 0.5874527
0.8625634 0.33000434
0.22640297] - Predicted Average:
[-1.498805 -0.7617358 -1.405476
0.60692716 -0.08873218]
Note: This transformer needs tuning or structural adjusting.