目前正在学习c++的onnxruntime部分,所以用了别人博客的代码,把代码的一些地方进行修改后发现在session.Run这一步一直出现内存报错,我已经把输入图像的维度都修改到了生成onnx模型时的维度了,也没有进行GPU的推理,实在想不出是哪里错了,也给输入进行一维化了,采用的是(NCHW)的格式。不知道到底哪里错了,特来请教各位!!代码如下:
#include <iostream>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <chrono>
#include <string>
//onnxruntime
#include <onnxruntime_cxx_api.h>
#include <onnxruntime_c_api.h>
using namespace std;
int main(int argc, char** argv)
{
//输入网络的维度
static constexpr const int width = 640;
static constexpr const int height = 480;
static constexpr const int channel = 1;
std::array<int64_t, 4> input_shape_{ 1, 1, 480, 640 };
//-------------------------------------------------------------onnxruntime-------------------------------------------------
//图片和模型位置
#ifdef _WIN32
const wchar_t* model_path = L"C:\\Users\\dell\\PycharmProjects\\cyh_torch\\SuperGlue\\Onnx\\CYH_SuperPoint_0.5.onnx";
#else
const char* model_path = "C:\\Users\\dell\\PycharmProjects\\cyh_torch\\SuperGlue\\Onnx\\CYH_SuperPoint_0.5.onnx";
#endif
cv::Mat imgSource = cv::imread("C:/Users/dell/Desktop/CYH/SuperGlue/SuperGlue_load/1.png", 0);
Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "Detection");
Ort::SessionOptions session_options;
//CUDA加速开启
//OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
Ort::AllocatorWithDefaultOptions allocator;
//加载ONNX模型
Ort::Session session(env, model_path, session_options);
//获取输入输出的维度
std::vector<int64_t> input_dims = session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
std::vector<int64_t> output_dims = session.GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
/*
session.GetOutputName(1, allocator);
session.GetInputName(1, allocator);
//输出模型输入节点的数量
size_t num_input_nodes = session.GetInputCount();
size_t num_output_nodes = session.GetOutputCount();
*/
std::vector<const char*> input_node_names = { "image" };
std::vector<const char*> output_node_names = { "scores", "keypoints", "descriptors" };
input_dims[0] = output_dims[0] = 1;//batch size = 1
std::vector<Ort::Value> input_tensors;
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
//将图像存储到uchar数组中,BGR--->RGB
std::array<uchar, 1 * 480 * 640> input_image_{};
uchar* input = input_image_.data();
for (int i = 0; i < imgSource.rows; i++) {
for (int j = 0; j < imgSource.cols; j++) {
//NCHW 格式
input[i * imgSource.cols + j] = imgSource.ptr<uchar>(i)[j] / 255.0;
}
}
input_tensors.push_back(Ort::Value::CreateTensor<uchar>(
memory_info, input, input_image_.size(), input_shape_.data(), input_shape_.size()));
//不知道输入维度时
//input_tensors.push_back(Ort::Value::CreateTensor<uchar>(
// memory_info, input, input_image_.size(), input_dims.data(), input_dims.size()));
chrono::steady_clock::time_point t1 = chrono::steady_clock::now();
std::vector<Ort::Value> output_tensors;
output_tensors = session.Run(Ort::RunOptions{ nullptr },
input_node_names.data(), //输入节点名
input_tensors.data(), //input tensors
input_tensors.size(), //1
output_node_names.data(), //输出节点名
output_node_names.size()); //4
chrono::steady_clock::time_point t2 = chrono::steady_clock::now();
chrono::duration<double> delay_time = chrono::duration_cast<chrono::duration<double>>(t2 - t1); //milliseconds 毫秒
cout << "前向传播平均耗时:" << delay_time.count() * 1000 / 100.0 << "ms" << endl;
return 0;
}
报错的位置如下: