How pytorch defines new auto derivative functions

pytorch defines new autoderivative function

You want to customize the derivation function in pytorch to define your own automatic derivation operations by implementing and overriding the forward and backward functions. Refer to the demo on the official website: Portal

Straight to the code, define a ReLu to implement automatic derivatives

import torch

class MyRelu():
    @staticmethod
    def forward(ctx, input):
        # We use ctx context objects to cache for use in backpropagation, ctx storage can only hold tensor when stored
        # In forward propagation, we receive a context object ctx and a tensor input containing the input;
        # We must return a tensor containing the output of
        # (min = 0) means that all values in the input are specified to range from 0 to positive infinity, e.g. input=[-1,-2,3] is converted to input=[0,0,3].
        ctx.save_for_backward(input)
        
        # Returns several values, and backward accepts parameters containing ctx and those values.
        return (min = 0)

    @staticmethod
    def backward(ctx, grad_output):
        # Read out the input tensor stored in the ctx
        input, = ctx.saved_tensors
        
        # grad_output holds the gradient of the backpropagation process
        grad_input = grad_output.clone()
        
        # Here is the rule for ReLu, which says that if the original data is less than 0, then relu is 0, so the gradient of the corresponding index is set to 0.
        grad_input[input < 0] = 0
        return grad_input

Perform input data and test

dtype = 
device = ('cuda' if .is_available() else 'cpu')
# Use torch's generator to define random numbers, note whether cpu or gpu random numbers are generated
generator=(device).manual_seed(42)

# N is the Batch, H is the hidden dimension. #
# D_in is input dimension;D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

x = (N, D_in, device=device, dtype=dtype,generator=generator)
y = (N, D_out, device=device, dtype=dtype, generator=generator)

w1 = (D_in, H, device=device, dtype=dtype, requires_grad=True, generator=generator)
w2 = (H, D_out, device=device, dtype=dtype, requires_grad=True, generator=generator)

learning_rate = 1e-6
for t in range(500):
    relu = 
    # Arithmetic with function incoming parameters
    y_pred = relu((w1)).mm(w2)
	# Calculated losses
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, ())
    # Dissemination
    ()
    with torch.no_grad():
        w1 -= learning_rate * 
        w2 -= learning_rate * 
       	
        .zero_()
        .zero_()

pytorch autoderivative and logistic regression

automatic derivation

The return_graph is set to True, and backpropagation can be performed twice.

logistic regression

import torch
import  as nn
import  as plt
import numpy as np
torch.manual_seed(10)
#======== generates data =============
sample_nums = 100
mean_value = 1.7
bias = 1
n_data = (sample_nums,2)
x0 = (mean_value*n_data,1)+bias# Category 0 data
y0 = (sample_nums)#category0 tags
x1 = (-mean_value*n_data,1)+bias# Category 1 data
y1 = (sample_nums)# Category 1 labels
train_x = ((x0,x1),0)
train_y = ((y0,y1),0)
#========== select models ===========
class LR():
    def __init__(self):
        super(LR,self).__init__()
         = (2,1)
         = ()

    def forward(self,x):
        x = (x)
        x = (x)
        return x

lr_net = LR()# Instantiate logistic regression models

#============== selective loss function ===============
loss_fn = ()
#============== selecting the optimizer =================
lr = 0.01
optimizer = (lr_net.parameters(),lr = lr,momentum=0.9)

#=============== model training ==================
for iteration in range(1000):
    # Forward propagation
    y_pred = lr_net(train_x)# Output of the model
    #Calculate the loss
    loss = loss_fn(y_pred.squeeze(),train_y)
    # Backpropagation
    ()
    #Update parameters
    ()

    # Drawing
    if iteration % 20 == 0:
        mask = y_pred.ge(0.5).float().squeeze() # Categorized in 0.5
        correct = (mask==train_y).sum()# of samples correctly predicted
        acc = ()/train_y.size(0)# Classification accuracy

        (()[:,0],()[:,1],c='r',label='class0')
        (()[:,0],()[:,1],c='b',label='class1')

        w0,w1 = lr_net.[0]
        w0,w1 = float(()),float(())
        plot_b = float(lr_net.[0].item())
        plot_x = (-6,6,0.1)
        plot_y = (-w0*plot_x-plot_b)/w1

        (-5,7)
        (-7,7)
        (plot_x,plot_y)

        (-5,5,'Loss=%.4f'%(),fontdict={'size':20,'color':'red'})
        ('Iteration:{}\nw0:{:.2f} w1:{:.2f} b{:.2f} accuracy:{:2%}'.format(iteration,w0,w1,plot_b,acc))
        ()
        ()
        (0.5)
        if acc > 0.99:
            break

summarize

The above is a personal experience, I hope it can give you a reference, and I hope you can support me more.