Example of python implementation of gradient descent algorithm in detail

python version selection

The python version chosen here is 2.7, because I tried a few times before with python3 and found that it reported errors when drawing 3d diagrams, so I switched to 2.7.

Data set selection

For the dataset I chose a dataset with two variables and three parameters so that I could draw 3d graphs to validate the results.

Partial function summary

symbols() function: first you have to install sympy library before you can use it. Usage:

>>> x1 = symbols('x2')
>>> x1 + 1
x2 + 1

In this example, x1 and x2 are not the same, x2 stands for a variable of a function, while x1 stands for a variable in python, which can denote a variable of a function or any other quantity, which replaces x2 for the function's computation. In practice we can name both x1,x2 as x, but we need to know the difference between them both.
Look at this example again:

>>> x = symbols('x')
>>> expr = x + 1
>>> x = 2
>>> print(expr)
x + 1

The x as a python variable is overwritten by the value 2, so it now no longer represents the function variable x. expr is still an alias for the function variable x+1, so the result is still x+1.
subs() function: since ordinary methods can't assign a value to a function variable, there must be a function to do this, usage:

>>> (1 + x*y).subs(x, pi)Usage of #One Parameter
pi*y + 1
>>> (1 + x*y).subs({x:pi, y:2})#Usage with multiple parameters
1 + 2*pi

diff () function: the partial derivative, use: result = diff (fun, x), this is the fun function on the x variable of the partial derivative, the result result is also a variable, need to be assigned to get the exact result.

Code Implementation:

from __future__ import division
from sympy import symbols, diff, expand
import numpy as np
import  as plt
from mpl_toolkits.mplot3d import Axes3D

data = {'x1': [100, 50, 100, 100, 50, 80, 75, 65, 90, 90],
        'x2': [4, 3, 4, 2, 2, 2, 3, 4, 3, 2],
        'y': [9.3, 4.8, 8.9, 6.5, 4.2, 6.2, 7.4, 6.0, 7.6, 6.1]}# Initialize the dataset
theta0, theta1, theta2 = symbols('theta0 theta1 theta2', real=True)  # y=theta0+theta1*x1+theta2*x2, define parameters
costfuc = 0 * theta0
for i in range(10):
    costfuc += (theta0 + theta1 * data['x1'][i] + theta2 * data['x2'][i] - data['y'][i]) ** 2
costfuc /= 20# Initialize the cost function
dtheta0 = diff(costfuc, theta0)
dtheta1 = diff(costfuc, theta1)
dtheta2 = diff(costfuc, theta2)

rtheta0 = 1
rtheta1 = 1
rtheta2 = 1# Assign initial values to parameters

costvalue = ({theta0: rtheta0, theta1: rtheta1, theta2: rtheta2})
newcostvalue = 0# Use the degree of change in the cost value to determine if the minimum value has been reached.
count = 0
alpha = 0.0001#Set the learning rate, make sure to set it relatively small or you won't be able to reach the minimum value
while (costvalue - newcostvalue > 0.00001 or newcostvalue - costvalue > 0.00001) and count < 1000:
    count += 1
    costvalue = newcostvalue
    rtheta0 = rtheta0 - alpha * ({theta0: rtheta0, theta1: rtheta1, theta2: rtheta2})
    rtheta1 = rtheta1 - alpha * ({theta0: rtheta0, theta1: rtheta1, theta2: rtheta2})
    rtheta2 = rtheta2 - alpha * ({theta0: rtheta0, theta1: rtheta1, theta2: rtheta2})
    newcostvalue = ({theta0: rtheta0, theta1: rtheta1, theta2: rtheta2})
rtheta0 = round(rtheta0, 4)
rtheta1 = round(rtheta1, 4)
rtheta2 = round(rtheta2, 4)# Keep 4 decimals for the result to prevent overflow of values
print(rtheta0, rtheta1, rtheta2)

fig = ()
ax = Axes3D(fig)
(data['x1'], data['x2'], data['y'])  # Scatterplotting
xx = (20, 100, 1)
yy = (1, 5, 0.05)
X, Y = (xx, yy)
Z = X * rtheta1 + Y * rtheta2 + rtheta0
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))

()#draft3dThe graphs are validated

Results:

Instance Extension:

'''
Gradient Descent Algorithm
Batch Gradient Descent
Stochastic Gradient Descent SGD
'''
__author__ = 'epleone'
import numpy as np
import  as plt
from mpl_toolkits.mplot3d import Axes3D
import sys

# Use a random number seed to generate the same random number each time for debugging purposes.
# (111111111)


class GradientDescent(object):
 eps = 1.0e-8
 max_iter = 1000000 # Not for the time being
 dim = 1
 func_args = [2.1, 2.7] # [w_0, .., w_dim, b]

 def __init__(self, func_arg=None, N=1000):
 self.data_num = N
 if func_arg is not None:
  = func_arg
 self._getData()

 def _getData(self):
 x = 20 * ((self.data_num, ) - 0.5)
 b_1 = ((self.data_num, 1), dtype=)
 # x = ((x, b_1), axis=1)
  = ((x, b_1), axis=1)

 def func(self, x):
 # If the noise is too big, the gradient descent method is useless #
 noise = 0.01 * (self.data_num) + 0
 w = (self.func_args)
 # y1 = w * [0, ] # Direct multiplication
 y = (, w) # Matrix multiplication
 y += noise
 return y

 @property
 def FuncArgs(self):
 return self.func_args

 @
 def FuncArgs(self, args):
 if not isinstance(args, list):
 raise Exception(
 'args is not list, it should be like [w_0, ..., w_dim, b]')
 if len(args) == 0:
 raise Exception('args is empty list!!')
 if len(args) == 1:
 (0.0)
 self.func_args = args
  = len(args) - 1
 self._getData()

 @property
 def EPS(self):
 return 

 @
 def EPS(self, value):
 if not isinstance(value, float) and not isinstance(value, int):
 raise Exception("The type of eps should be an float number")
  = value

 def plotFunc(self):
 # One-dimensional drawing
 if  == 1:
 # x = (, axis=0)
 x = 
 y = (x)
 fig, ax = ()
 (x, y, 'o')
 (xlabel='x ', ylabel='y', title='Loss Curve')
 ()
 ()
 # 2D drawing
 if  == 2:
 # x = (, axis=0)
 x = 
 y = (x)
 xs = x[:, 0]
 ys = x[:, 1]
 zs = y
 fig = ()
 ax = fig.add_subplot(111, projection='3d')
 (xs, ys, zs, c='r', marker='o')

 ax.set_xlabel('X Label')
 ax.set_ylabel('Y Label')
 ax.set_zlabel('Z Label')
 ()
 else:
 # ('off')
 (
 0.5,
 0.5,
 "The dimension( > 2) \n is too high to draw",
 size=17,
 rotation=0.,
 ha="center",
 va="center",
 bbox=dict(
 boxstyle="round",
 ec=(1., 0.5, 0.5),
 fc=(1., 0.8, 0.8), ))
 ()
 ()
 # print('The dimension( > 2) is too high to draw')

 # Gradient descent can only solve for convex functions
 def _gradient_descent(self, bs, lr, epoch):
 x = 
 # shuffle dataset is not necessary
 # (x)
 y = (x)
 w = (( + 1, 1), dtype=float)
 for e in range(epoch):
 print('epoch:' + str(e), end=',')
 # Batch gradient descent with bs of 1 Equivalent single sample gradient descent
 for i in range(0, self.data_num, bs):
 y_ = (x[i:i + bs], w)
 loss = y_ - y[i:i + bs].reshape(-1, 1)
 d = loss * x[i:i + bs]
 d = (axis=0) / bs
 d = lr * d
  = (-1, 1)
 w = w - d

 y_ = (, w)
 loss_ = abs((y_ - y).sum())
 print('\tLoss = ' + str(loss_))
 print('The result of the fit is:', end=',')
 print(sum((), []))
 print()
 if loss_ < :
 print('The Gradient Descent algorithm has converged!!\n')
 break
 pass

 def __call__(self, bs=1, lr=0.1, epoch=10):
 if sys.version_info < (3, 4):
 raise RuntimeError('At least Python 3.4 is required')
 if not isinstance(bs, int) or not isinstance(epoch, int):
 raise Exception(
 "The type of BatchSize/Epoch should be an integer number")
 self._gradient_descent(bs, lr, epoch)
 pass

 pass


if __name__ == "__main__":
 if sys.version_info < (3, 4):
 raise RuntimeError('At least Python 3.4 is required')

 gd = GradientDescent([1.2, 1.4, 2.1, 4.5, 2.1])
 # gd = GradientDescent([1.2, 1.4, 2.1])
 print("The result of the parameter to be fitted is: ")
 print()
 print("===================\n\n")
 #  = 0.0
 ()
 gd(10, 0.01)
 print("Finished!")

To this article on the python implementation of the gradient descent algorithm example details of the article is introduced to this, more related to teach you to implement the gradient descent algorithm with python content, please search for my previous articles or continue to browse the following related articles I hope that you will have more support for me in the future!