#include <cuda.h>
__global__ void my_kernel() {
printf(“Hello World from GPU!\n”);
}
int main()
{
printf("Hello World from CPU!\n"); my_kernel<<<1,1>>>(); cudaDeviceSynchronize() return 0;
}
Hello World from CPU!
Hello World from GPU!
#include <stdio.h>
#include <cuda.h>
__global__ void my_kernel() {
printf(“GPU blk %i thread %i %i\n”, blockIdx.x, threadIdx.x, threadIdx.y);
}
int main()
{
const dim3 threadsPerBlock(2,2);
const dim3 blocksPerGrid(2);
my_kernel<<<threadsPerBlock, blocksPerGrid>>>();
return 0;
}
$ nvcc hello_world.cu
$ ./a.out
GPU block 0 thread 0 0
GPU block 0 thread 0 1
GPU block 0 thread 1 0
GPU block 0 thread 1 1
GPU block 1 thread 0 0
GPU block 1 thread 0 1
GPU block 1 thread 1 0
GPU block 1 thread 1 1