#include <stdio.h>
#include <stdlib.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <ctime>
#define MAX 4000000
#define CHAR_MINS 'a'
#define CHAR_MAXS 'z'
typedef struct
{
char a;
int c;
}ZF;
void zifunums(char *zf, int n)
{
srand(time(NULL));//通过时间函数设置随机数种子,使得每次运行结果随机。
for (int i = 0; i < n; i++)
{
zf[i] = rand() % (CHAR_MAXS - CHAR_MINS + 1) + CHAR_MINS; //生成要求范围内的随机数。
}
}
/*
for (int i = 0; i < sizeof(chs); i++) {
if(*(chs+i)){
arr[*(chs+i) - 'a']++;
}
}
*/
__global__ void compute(int *zf, int n, char *chs, int m)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
// for (int j = 0; j < n; j++) {
if (i < n) {
if (*(chs + i)) {
if (*(chs + i) - 'a' < m)
zf[*(chs + i) - 'a'] = zf[*(chs + i) - 'a'] + 1;
}
}
}
int main(int argc, char **argv)
{
char *zf, *dz;
int n, m = 26;
n = 4000;// 0000;
printf("input n:");
scanf("%d",&n);
zf = (char*)malloc(MAX * sizeof(char));
zifunums(zf, n);
int *arr = (int*)malloc(sizeof(int)*m);//26个字母
for (int i = 0; i < m; i++)
arr[i] = 0;
int *da;
cudaMalloc((void**)&da, m * sizeof(int));
cudaMalloc((void**)&dz, n * sizeof(char));
cudaMemcpy(da, arr, m * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dz, zf, n * sizeof(char), cudaMemcpyHostToDevice);
//划分的块数以及网格数
int dimx = 2;
int dimy = 2;
dim3 block(dimx, 1);
dim3 grid((n + block.x - 1) / block.x, 1);
cudaEvent_t gpustart, gpustop;
float elapsedTime = 0.0;
cudaEventCreate(&gpustart);
cudaEventCreate(&gpustop);
cudaEventRecord(gpustart, 0);
compute << < dimx, block >> > (da, n, dz, m);
cudaEventRecord(gpustop, 0);
cudaEventSynchronize(gpustop);
cudaEventElapsedTime(&elapsedTime, gpustart, gpustop);
cudaEventDestroy(gpustart);
cudaEventDestroy(gpustop);
//将计算结果从设备拷贝到主存
cudaMemcpy(arr, da, m * sizeof(int), cudaMemcpyDeviceToHost);
for (int j = 0; j < m; j++) {
if (arr[j] != 0) {
printf("%c = %d\n", 'a' + j, arr[j]);
}
}
printf("运行时间为:%fs\n", elapsedTime / 1000);
system("pause");
return 0;
}
https://blog.csdn.net/OpenHero/article/details/3457704
你看看这个对你有帮助麽