|
CPU version
#include <stdio.h>
#define N 7
void add_arrays(int i,char *a, int *b)
{
a[i] += b[i];
}
int main()
{
// Setup the arrays
char a[N] = "Hello ";
int b[N] = {15, 10, 6, 0, -11, 1,0};
// print the contents of a[]
printf("%s", a);
// Perform the array addition
int i;
for ( i = 0; i< N;i++)
add_arrays(i,a,b);
// Display the results
printf("%s\n", a);
return 1;
}
|
Cuda version
|
Cpu version
void add_arrays(int i,char *a, int *b)
{
a[i] += b[i];
}
Cuda Version
__global__
void add_arrays(char *a, int *b)
{
a[threadIdx.x] += b[threadIdx.x];
}
char *ad; int *bd; const int csize = N*sizeof(char); const int isize = N*sizeof(int); cudaMalloc( (void**)&ad, csize ); cudaMalloc( (void**)&bd, isize );
cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice ); cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice );
Cpu version
int i;
for ( i = 0; i< N;i++)
add_arrays(i,a,b);
Cuda Version dim3 dimBlock( N ); dim3 dimGrid ( 1 ); add_arrays<<<dimGrid, dimBlock>>>(ad, bd);
cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost );
cudaFree( ad );
printf("%s\n", a);
The cpu version is compiled with gcc cpu.c -o cpuVersionThe cuda version is compiled with nvcc cuda.cu -o cudaVersion