When we use TensorRT for accelerated inference, we need to convert the onnx format to tensorrt format first. The following is the conversion code and the corresponding files using C++.
Operating system: ubuntu20.04
C++ code:
// #include <iostream> #include <memory> #include <fstream> #include <> #include "" #include "" #include "" class Logger : public nvinfer1::ILogger{ void log(Severity severity, const char* msg) noexcept override { // suppress info-level messages if (severity <= Severity::kWARNING) std::cout << msg << std::endl; } } logger; int main(int argc, char** argv){ if(argc !=2){ std::cerr << "usage: ./build [onnx_file_path]" <<std::endl; return -1; } // Get the onnx file path char* onnx_file_path = argv[1]; //=======================1.Create builder================================================= Create a builder============ Create a builder============ Create a builder========== auto builder = std::unique_ptr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger)); if(!builder){ std::cerr << "Failed to creater builder" <<std::endl; return -1; } //===================2.Create network=================================================== Create network=========== Create network============ Create network============ Create network============ Create network=========== Create network=========== Create network=========== Create network=========== Create network=========== Create network============ Create network=========== Create network=========== Create network=========== Create network============ Create network============ Create network============ Create network============ Create network============= Create network============= Create network============= Create network============= Create network============== Create network============== Create network=============== Create network============== Create network=============== Create network================ Create network================ Create network================ Create network====== const auto explicitBatch= 1U <<static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); auto network = std::unique_ptr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch)); if (!network){ std::cout << "Failed to create network" << std::endl; return -1; } // =============== Create onnxparser for parsing onnx files =============== auto parser = std::unique_ptr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger)); // Call onnxparser's parseFromFile method to parse onnx file auto parsed = parser->parseFromFile(onnx_file_path, static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)); if (!parsed){ std::cout << "Failed to parse onnx file" << std::endl; return -1; } // Configure network parameters auto input = network->getInput(0); auto profile = builder->createOptimizationProfile(); profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims4{1, 3, 960, 960}); // Set the minimum size profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims4{1, 3, 960, 960}); // Set the optimal size profile->setDimensions(input->getName(), nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims4{1, 3, 960, 960}); // Set the maximum size //================== Create config configuration======================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================= auto config = std::unique_ptr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig()); if (!config){ std::cout << "Failed to create config" << std::endl; return -1; } config->addOptimizationProfile(profile); // Set the accuracy, set to FP16, and set to INT8 requires an additional calibrator config->setFlag(nvinfer1::BuilderFlag::kFP16); // Set the maximum batchsize builder->setMaxBatchSize(1); // Set the maximum workspace config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 1 << 30); //Create a stream auto profileStream = samplesCommon::makeCudaStream(); if(!profileStream){ return -1; } config->setProfileStream(*profileStream); // ============= Creation engine =============== auto plan = std::unique_ptr<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config)); if (!plan){ std::cout << "Failed to create engine" << std::endl; return -1; } // ========== 5. Serialization save engine ============= std::ofstream engine_file("./", std::ios::binary); assert(engine_file.is_open() && "Failed to open engine file"); engine_file.write((char *)plan->data(), plan->size()); engine_file.close(); std::cout << "Engine build success!" << std::endl; return 0; }
document:
cmake_minimum_required(VERSION 3.10) project(TensorRT_Test LANGUAGES CXX CUDA) set(CMAKE_CUDA_STANDARD 14) set(CMAKE_CXX_STANDARD 14) # Add header file path cuda tensorRTinclude_directories(/usr/local/cuda-11.8/include) include_directories(/xxx/tensorRT/TensorRT-8.6.1.6/include) include_directories(/xxx/tensorRT/TensorRT-8.6.1.6/samples/common/) # Add library filelink_directories(/usr/local/cuda-11.8/lib64) link_directories(/xxx/tensorRT/TensorRT-8.6.1.6/lib) add_executable(build ) target_link_libraries(build nvinfer nvonnxparser cudart)
Notice:
The header file and library file paths of cuda and TensorRT need to be changed to their own
How to use:
Taking yolov8 as an example, after using cmake, a build executable file will be generated. Execute the following command to wait for the generated file
./build <onnx_path>
This is the article about using C++ to convert yolov8 onnx format into tensorrt format. For more related content to convert yolov8 onnx to tensorrt, please search for my previous articles or continue browsing the related articles below. I hope everyone will support me in the future!