#include <stdio.h>
#include <stdlib.h>
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#include <iostream>
#ifdef __HIP_PLATFORM_AMD__
#include <hip/hip_ext.h>
#endif
#define HIP_CHECK(command){\
hipError_t status = command;\
if(status != hipSuccess){\
std::cerr <<"Error:HIP reports"\
<<hipGetErrorString(status)\
<<std::endl;\
std::abort();\
}}
#define WIDTH 16
__global__ void matrix_mul(double* Md, double* Nd, double* Pd, int width)
{
int i = threadIdx.x;
int j = threadIdx.y;
double sum = 0;
for (int k = 0; k < width; k++)
{
double a = Md[j * width + k]; //首先从全局内存读取对应此线程索引的数组元素,储存到浮点型变量f
double b = Nd[k * width + i];
sum += a * b;
}
Pd[j * width + i] = sum;
}
int main(void)
{
double M[16][16], N[16][16], P[16][16];
int Width = 16;
//初始化示例数据
for (int i = 0; i < 16; i++)
{
for (int j = 0; j < 16; j++)
{
M[i][j] = 2;
N[i][j] = 3;
}
}
size_t size = Width * Width * sizeof(double);
//malloc device memory
double *Md;
double *Nd;
double *Pd;
HIP_CHECK(hipMalloc((void**)&Md, size));
HIP_CHECK(hipMalloc((void**)&Nd, size));
HIP_CHECK(hipMalloc((void**)&Pd, size));
HIP_CHECK(hipMemcpy(Md, M, size, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Nd, N, size, hipMemcpyHostToDevice));
dim3 dimBlock(WIDTH, WIDTH);
dim3 dimGrid(1, 1);
hipLaunchKernelGGL(matrix_mul, dimGrid, dimBlock, 0, 0, Md, Nd, Pd, Width );
HIP_CHECK(hipGetLastError());
//拷贝计算数据-一级数据指针
HIP_CHECK(hipMemcpy(P, Pd, size, hipMemcpyDeviceToHost));
//打印结果
for (int i = 0; i < 16; i++)
{
for (int j = 0; j < 16; j++)
{
printf("%.2f ", P[i][j]);
}
printf("\n");
}
//释放内存
free(M);
free(N);
free(P);
HIP_CHECK(hipFree(Md));
HIP_CHECK(hipFree(Nd));
HIP_CHECK(hipFree(Pd));
return 0;
}
应该是这块有错误,报错说不合法:
#define HIP_CHECK(command){\
hipError_t status = command;\
if(status != hipSuccess){\
std::cerr <<"Error:HIP reports"\
<<hipGetErrorString(status)\
<<std::endl;\
std::abort();\
}}