Detailed tensorflow of overfitting problem in practice

Overfitting problem in practice

1. Constructing the dataset

We use a dataset with sample feature vectors of length 2 and labels 0 or 1, representing 2 species, respectively. With the make_moons tool provided in the scikit-learn library we can generate training sets with any number of data.

import  as plt
# Imported dataset generation tools
import numpy as np
import seaborn as sns
from  import make_moons
from sklearn.model_selection import train_test_split
from  import layers, Sequential, regularizers
from mpl_toolkits.mplot3d import Axes3D

In order to demonstrate the overfitting phenomenon, we only sampled 1000 samples of data and added Gaussian noise data with a standard deviation of 0.25:

def load_dataset():
 # of sampling points
 N_SAMPLES = 1000
 # of tests ratio
 TEST_SIZE = None

 # Randomly sample 1000 points from the moon distribution and slice into training-test set
 X, y = make_moons(n_samples=N_SAMPLES, noise=0.25, random_state=100)
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
 return X, y, X_train, X_test, y_train, y_test

The make_plot function makes it easy to plot the distribution of the data based on the coordinates X of the samples and the labels y of the samples:

def make_plot(X, y, plot_name, file_name, XX=None, YY=None, preds=None, dark=False, output_dir=OUTPUT_DIR):
 # Plot the distribution of the dataset, x is the 2D coordinate, y is the label of the data point
 if dark:
  ('dark_background')
 else:
  sns.set_style("whitegrid")
 axes = ()
 axes.set_xlim([-2, 3])
 axes.set_ylim([-1.5, 2])
 (xlabel="$x_1$", ylabel="$x_2$")
 (plot_name, fontsize=20, fontproperties='SimHei')
 plt.subplots_adjust(left=0.20)
 plt.subplots_adjust(right=0.80)
 if XX is not None and YY is not None and preds is not None:
  (XX, YY, (), 25, alpha=0.08, cmap=)
  (XX, YY, (), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
 # Plot scatterplots, differentiate colors based on labels m=markers
 markers = ['o' if i == 1 else 's' for i in ()]
 mscatter(X[:, 0], X[:, 1], c=(), s=20, cmap=, edgecolors='none', m=markers, ax=axes)
 # Save the vectors
 (output_dir + '/' + file_name)
 ()

def mscatter(x, y, ax=None, m=None, **kw):
 import  as mmarkers
 if not ax: ax = ()
 sc = (x, y, **kw)
 if (m is not None) and (len(m) == len(x)):
  paths = []
  for marker in m:
   if isinstance(marker, ):
    marker_obj = marker
   else:
    marker_obj = (marker)
   path = marker_obj.get_path().transformed(
    marker_obj.get_transform())
   (path)
  sc.set_paths(paths)
 return sc

X, y, X_train, X_test, y_train, y_test = load_dataset()
make_plot(X,y,"haha",'Distribution of crescent-shaped binary dataset.svg')

在这里插入图片描述

2. Impact of the number of network layers

In order to explore the degree of overfitting at different network depths, we conducted a total of 5 training experiments. At 𝑛 ∈ [0,4], a fully connected layer network with n + 2 network layers is constructed and 500 Epochs are trained by the Adam optimizer

def network_layers_influence(X_train, y_train):
 # Construct 5 networks with different number of layers
 for n in range(5):
  # Create containers
  model = Sequential()
  # Create the first layer
  ((8, input_dim=2, activation='relu'))
  # Add n layers for a total of n+2 layers
  for _ in range(n):
   ((32, activation='relu'))
  # Create the final layer
  ((1, activation='sigmoid'))
  # Model assembly and training
  (loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  (X_train, y_train, epochs=N_EPOCHS, verbose=1)
  # Plotting decision boundary curves for networks with different number of layers
  # Visualize the x-coordinate in the range [-2, 3]
  xx = (-2, 3, 0.01)
  # Visualize the y-coordinate in the range [-1.5, 2]
  yy = (-1.5, 2, 0.01)
  # Generate x-y plane sampling grid points for easy visualization
  XX, YY = (xx, yy)
  preds = model.predict_classes(np.c_[(), ()])
  print(preds)
  title = "network layer：{0}".format(2 + n)
  file = "Network capacity_%" % (2 + n)
  make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/network_layers')

在这里插入图片描述

implication

To explore the effect of dropout layers on network training, we conducted five experiments, each using a 7-layer fully connected network, but inserting 0-4 dropout layers at intervals in the fully connected layers and training 500 epochs with the Adam optimizer.

def dropout_influence(X_train, y_train):
 # Build a network with 5 different numbers of Dropout layers
 for n in range(5):
  # Create containers
  model = Sequential()
  # Create the first layer
  ((8, input_dim=2, activation='relu'))
  counter = 0
  # The number of network layers is fixed at 5
  for _ in range(5):
   ((64, activation='relu'))
  # Add n Dropout layers
   if counter < n:
    counter += 1
    ((rate=0.5))

  # Output layer
  ((1, activation='sigmoid'))
  # Model assembly
  (loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  # Training
  (X_train, y_train, epochs=N_EPOCHS, verbose=1)
  # Plotting Decision Boundary Curves for Different Dropout Layers
  # Visualize the x-coordinate in the range [-2, 3]
  xx = (-2, 3, 0.01)
  # Visualize the y-coordinate in the range [-1.5, 2]
  yy = (-1.5, 2, 0.01)
  # Generate x-y plane sampling grid points for easy visualization
  XX, YY = (xx, yy)
  preds = model.predict_classes(np.c_[(), ()])
  title = "No Dropout Layer" if n == 0 else "{0}floor (of a building) Dropoutfloor (of a building)".format(n)
  file = "Dropout_%" % n
  make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/dropout')

在这里插入图片描述

4. Impact of regularization

In order to explore the effect of the regularization coefficient 𝜆 on the training of the network model, we constructed a 5-layered neural network using L2 regularization, where the weight tensor W of the 2nd,3rdand 4th neural network layers are added with L2 regularization constraint terms:

def build_model_with_regularization(_lambda):
 # Create neural networks with regularization terms
 model = Sequential()
 ((8, input_dim=2, activation='relu')) # Without regularization terms
 # Layers 2-4 are all with L2 regularization terms
 ((256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
 ((256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
 ((256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
 # Output layer
 ((1, activation='sigmoid'))
 (loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Model assembly
 return model

Below we first implement a weight visualization function

def plot_weights_matrix(model, layer_index, plot_name, file_name, output_dir=OUTPUT_DIR):
 # Plotting weight range functions
 # Extract the weight matrix for a given layer
 weights = [layer_index].get_weights()[0]
 shape = 
 # Generate grid coordinates equal in size to the weight matrix
 X = (range(shape[1]))
 Y = (range(shape[0]))
 X, Y = (X, Y)
 # 3D mapping
 fig = ()
 ax = (projection='3d')
 .set_pane_color((1.0, 1.0, 1.0, 0.0))
 .set_pane_color((1.0, 1.0, 1.0, 0.0))
 .set_pane_color((1.0, 1.0, 1.0, 0.0))
 (plot_name, fontsize=20, fontproperties='SimHei')
 # Plotting the range of weights matrix
 ax.plot_surface(X, Y, weights, cmap=plt.get_cmap('rainbow'), linewidth=0)
 # Set the axis name
 ax.set_xlabel('Grid x-coordinate', fontsize=16, rotation=0, fontproperties='SimHei')
 ax.set_ylabel('Grid y-coordinate', fontsize=16, rotation=0, fontproperties='SimHei')
 ax.set_zlabel('Weights', fontsize=16, rotation=90, fontproperties='SimHei')
 # Save matrix range charts
 (output_dir + "/" + file_name + ".svg")
 (fig)

Keeping the structure of the network unchanged, we adjust the regularization coefficients by𝜆 = 0.00001,0.001,0.1,0.12,0.13 to test the training effectiveness of the network and plot the decision boundary curves of the learning model on the training set

def regularizers_influence(X_train, y_train):
 for _lambda in [1e-5, 1e-3, 1e-1, 0.12, 0.13]: # Setting different regularization factors
  # Create models with regularized terms
  model = build_model_with_regularization(_lambda)
  # Model training
  (X_train, y_train, epochs=N_EPOCHS, verbose=1)
  # Plotting the range of weights
  layer_index = 2
  plot_title = "regularization factor：{}".format(_lambda)
  file_name = "Regularized network weights_." + str(_lambda)
  # Mapping the range of network weights
  plot_weights_matrix(model, layer_index, plot_title, file_name, output_dir=OUTPUT_DIR + '/regularizers')
  # Plotting decision boundary lines with different regularization coefficients
  # Visualize the x-coordinate in the range [-2, 3]
  xx = (-2, 3, 0.01)
  # Visualize the y-coordinate in the range [-1.5, 2]
  yy = (-1.5, 2, 0.01)
  # Generate x-y plane sampling grid points for easy visualization
  XX, YY = (xx, yy)
  preds = model.predict_classes(np.c_[(), ()])
  title = "regularization factor：{}".format(_lambda)
  file = "Regularization_%" % _lambda
  make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/regularizers')

regularizers_influence(X_train, y_train)

在这里插入图片描述

To this point this article on the detailed tensorflow overfitting problem combat article is introduced to this, more related tensorflow overfitting content please search for my previous articles or continue to browse the following related articles I hope you will support me more in the future!