CSRmatrix matTrans(int height, int width, size_t nnz, const float* elem, const int* colInd, const int* rowPtr) {
CSRmatrix csc(width, height, nnz);
float* csrVal_d, * cscVal_d;
int* colInd_d, * rowPtr_d, * colPtr_d, * rowInd_d;
checkCuda(cudaMalloc((void**)&csrVal_d, (size_t)nnz * sizeof(float)));
checkCuda(cudaMalloc((void**)&colInd_d, (size_t)nnz * sizeof(int)));
checkCuda(cudaMalloc((void**)&rowPtr_d, (size_t)(height + 1) * sizeof(int)));
checkCuda(cudaMemcpy(csrVal_d, elem, (size_t)nnz * sizeof(float), cudaMemcpyHostToDevice));
checkCuda(cudaMemcpy(colInd_d, colInd, (size_t)nnz * sizeof(int), cudaMemcpyHostToDevice));
checkCuda(cudaMemcpy(rowPtr_d, rowPtr, (size_t)(height + 1) * sizeof(int), cudaMemcpyHostToDevice));
checkCuda(cudaMalloc((void**)&cscVal_d, (size_t)nnz * sizeof(float)));
checkCuda(cudaMalloc((void**)&colPtr_d, (size_t)(width + 1) * sizeof(int)));
checkCuda(cudaMalloc((void**)&rowInd_d, (size_t)nnz * sizeof(int)));
cusparseHandle_t handle = NULL;
checkCusparse(cusparseCreate(&handle));
void* Buffer = NULL;
size_t bufferSize = 0;
checkCusparse(cusparseCsr2cscEx2_bufferSize(handle, height, width, nnz, csrVal_d, rowPtr_d, colInd_d, cscVal_d, colPtr_d, rowInd_d,
CUDA_R_32F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, &bufferSize));
checkCuda(cudaMalloc((void**)&Buffer, bufferSize));
checkCusparse(cusparseCsr2cscEx2(handle, height, width, nnz, csrVal_d, rowPtr_d, colInd_d, cscVal_d, colPtr_d, rowInd_d,
CUDA_R_32F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, Buffer));
//拷贝结果
checkCuda(cudaMemcpy(csc.elements.data(), cscVal_d, (size_t)nnz * sizeof(float), cudaMemcpyDeviceToHost)); //报错行
checkCuda(cudaMemcpy(csc.colInd.data(), rowInd_d, (size_t)nnz * sizeof(int), cudaMemcpyDeviceToHost));
checkCuda(cudaMemcpy(csc.rowPtr.data(), colPtr_d, (size_t)(width + 1) * sizeof(int), cudaMemcpyDeviceToHost));
//释放内存
checkCusparse(cusparseDestroy(handle));
checkCuda(cudaFree(csrVal_d));
checkCuda(cudaFree(cscVal_d));
checkCuda(cudaFree(colInd_d));
checkCuda(cudaFree(rowPtr_d));
checkCuda(cudaFree(colPtr_d));
checkCuda(cudaFree(rowInd_d));
checkCuda(cudaFree(Buffer));
return csc;
}
CUDA API failed at line 155 with error: an illegal memory access was encountered (700)