tensorflow implementation of residual network approach (mnist dataset)

present (sb for a job etc)

Residual network is a masterpiece of God He Kaiming, the effect is very good, the depth can reach 1000 layers. However, its implementation is not that difficult, here tensorflow as a framework to realize the residual network based on mnist dataset, of course, only relatively shallow.

As shown in the figure below:

The Connection part of the solid line, indicates that the channels are the same, such as the first pink rectangle and the third pink rectangle in the above figure, which are both 3x3x64 feature maps, and since the channels are the same, the calculation is used as H(x) = F(x) + x

The Connection part of the dashed line, indicates that the channels are different, such as the first green rectangle and the third green rectangle in the above figure, which are 3x3x64 and 3x3x128 feature maps, respectively, and the channels are different, and the computation method used is H(x)=F(x)+Wx, where W is the convolution operation, which is used for adjusting the x dimension.

Depending on whether the input and output sizes are the same or not, they are further categorized into identity_block and conv_block, each of which has the two modes shown above, triple convolution and bi-convolution, with the triple convolution being a bit faster, and thus that is the one chosen here.

The implementation is shown in the following code:

#tensorflow is based on the VGG11 network on the mnist dataset and can be run directly
from  import input_data
import tensorflow as tf
#tensorflow implements VGG11 based on mnist
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

#x=
#y=
#X=
#Y=
x = (tf.float32, [None,784])
y = (tf.float32, [None, 10])
sess = ()

def weight_variable(shape):
# Here's where the initial variables are constructed
 initial = tf.truncated_normal(shape, mean=0,stddev=0.1)
#Creating variables
 return (initial)

def bias_variable(shape):
 initial = (0.1, shape=shape)
 return (initial)

# Define here the id_block block of the residual network, where the input and output dimensions are the same
def identity_block(X_input, kernel_size, in_filter, out_filters, stage, block):
 """
 Implementation of the identity block as defined in Figure 3

 Arguments:
 X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
 kernel_size -- integer, specifying the shape of the middle CONV's window for the main path
 filters -- python list of integers, defining the number of filters in the CONV layers of the main path
 stage -- integer, used to name the layers, depending on their position in the network
 block -- string/character, used to name the layers, depending on their position in the network
 training -- train or test

 Returns:
 X -- output of the identity block, tensor of shape (n_H, n_W, n_C)
 """

 # defining name basis
 block_name = 'res' + str(stage) + block
 f1, f2, f3 = out_filters
 with tf.variable_scope(block_name):
  X_shortcut = X_input

  #first
  W_conv1 = weight_variable([1, 1, in_filter, f1])
  X = .conv2d(X_input, W_conv1, strides=[1, 1, 1, 1], padding='SAME')
  b_conv1 = bias_variable([f1])
  X = (X+ b_conv1)

  #second
  W_conv2 = weight_variable([kernel_size, kernel_size, f1, f2])
  X = .conv2d(X, W_conv2, strides=[1, 1, 1, 1], padding='SAME')
  b_conv2 = bias_variable([f2])
  X = (X+ b_conv2)

  #third

  W_conv3 = weight_variable([1, 1, f2, f3])
  X = .conv2d(X, W_conv3, strides=[1, 1, 1, 1], padding='SAME')
  b_conv3 = bias_variable([f3])
  X = (X+ b_conv3)
  #final step
  add = (X, X_shortcut)
  b_conv_fin = bias_variable([f3])
  add_result = (add+b_conv_fin)

 return add_result


#The conv_block module is defined here, and since the input and output scales are different when the module is defined, a convolution operation is needed to change the scales so that they can be added together.
def convolutional_block( X_input, kernel_size, in_filter,
    out_filters, stage, block, stride=2):
 """
 Implementation of the convolutional block as defined in Figure 4

 Arguments:
 X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
 kernel_size -- integer, specifying the shape of the middle CONV's window for the main path
 filters -- python list of integers, defining the number of filters in the CONV layers of the main path
 stage -- integer, used to name the layers, depending on their position in the network
 block -- string/character, used to name the layers, depending on their position in the network
 training -- train or test
 stride -- Integer, specifying the stride to be used

 Returns:
 X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C)
 """

 # defining name basis
 block_name = 'res' + str(stage) + block
 with tf.variable_scope(block_name):
  f1, f2, f3 = out_filters

  x_shortcut = X_input
  #first
  W_conv1 = weight_variable([1, 1, in_filter, f1])
  X = .conv2d(X_input, W_conv1,strides=[1, stride, stride, 1],padding='SAME')
  b_conv1 = bias_variable([f1])
  X = (X + b_conv1)

  #second
  W_conv2 =weight_variable([kernel_size, kernel_size, f1, f2])
  X = .conv2d(X, W_conv2, strides=[1,1,1,1], padding='SAME')
  b_conv2 = bias_variable([f2])
  X = (X+b_conv2)

  #third
  W_conv3 = weight_variable([1,1, f2,f3])
  X = .conv2d(X, W_conv3, strides=[1, 1, 1,1], padding='SAME')
  b_conv3 = bias_variable([f3])
  X = (X+b_conv3)
  #shortcut path
  W_shortcut =weight_variable([1, 1, in_filter, f3])
  x_shortcut = .conv2d(x_shortcut, W_shortcut, strides=[1, stride, stride, 1], padding='VALID')

  #final
  add = (x_shortcut, X)
  # Establishing weights for final fusion
  b_conv_fin = bias_variable([f3])
  add_result = (add+ b_conv_fin)


 return add_result



x = (x, [-1,28,28,1])
w_conv1 = weight_variable([2, 2, 1, 64])
x = .conv2d(x, w_conv1, strides=[1, 2, 2, 1], padding='SAME')
b_conv1 = bias_variable([64])
x = (x+b_conv1)
# Here the operation becomes 14x14x64
x = .max_pool(x, ksize=[1, 3, 3, 1],
    strides=[1, 1, 1, 1], padding='SAME')


#stage 2
x = convolutional_block(X_input=x, kernel_size=3, in_filter=64, out_filters=[64, 64, 256], stage=2, block='a', stride=1)
# After the above conv_block operation, the size becomes 14x14x256
x = identity_block(x, 3, 256, [64, 64, 256], stage=2, block='b' )
x = identity_block(x, 3, 256, [64, 64, 256], stage=2, block='c')
#The size of the sheet becomes 14x14x256 after the above operation.
x = .max_pool(x, [1, 2, 2, 1], strides=[1,2,2,1], padding='SAME')
# becomes 7x7x256
flat = (x, [-1,7*7*256])

w_fc1 = weight_variable([7 * 7 *256, 1024])
b_fc1 = bias_variable([1024])

h_fc1 = ((flat, w_fc1) + b_fc1)
keep_prob = (tf.float32)
h_fc1_drop = (h_fc1, keep_prob)
w_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = (h_fc1_drop, w_fc2) + b_fc2


# Build the loss function, here the cross entropy function is used
cross_entropy = tf.reduce_mean(
 .softmax_cross_entropy_with_logits(labels=y, logits=y_conv))

train_step = (1e-3).minimize(cross_entropy)
correct_prediction = ((y_conv,1), (y,1))
accuracy = tf.reduce_mean((correct_prediction, tf.float32))
# Initialize variables

(tf.global_variables_initializer())

print("cuiwei")
for i in range(2000):
 batch = .next_batch(10)
 if i%100 == 0:
 train_accuracy = (feed_dict={
 x:batch[0], y: batch[1], keep_prob: 1.0})
 print("step %d, training accuracy %g"%(i, train_accuracy))
 train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})

Above this tensorflow implementation of residual network way (mnist dataset) is all that I have shared with you, I hope to give you a reference, and I hope you support me more.