“CUDA”版本间的差异

来自iCenter Wiki
跳转至: 导航搜索
第28行: 第28行:
 
  Hello World from CPU!
 
  Hello World from CPU!
 
  Hello World from GPU!
 
  Hello World from GPU!
 +
 +
#include <stdio.h>
 +
#include <cuda.h>
 +
__global__ void my_kernel() {
 +
printf(“GPU blk %i thread %i %i\n”, blockIdx.x, threadIdx.x, threadIdx.y);
 +
}
 +
 +
int main()
 +
{
 +
const dim3 threadsPerBlock(2,2);
 +
const dim3 blocksPerGrid(2); 
 +
my_kernel<<<threadsPerBlock, blocksPerGrid>>>();
 +
return 0;
 +
}
 +
 +
 +
$ nvcc hello_world.cu
 +
$ ./a.out
 +
GPU block 0 thread 0 0
 +
GPU block 0 thread 0 1
 +
GPU block 0 thread 1 0
 +
GPU block 0 thread 1 1
 +
GPU block 1 thread 0 0
 +
GPU block 1 thread 0 1
 +
GPU block 1 thread 1 0
 +
GPU block 1 thread 1 1

2017年4月19日 (三) 12:22的版本

CUDA(Compute Unified Device Architecture,统一计算架构)

异构平行计算系统(heterogeneous parallel computing systems)

latency devices (CPU cores)

throughput devices (GPU cores)

单进程多数据(single process and mutiple data, SPMD) 模型

#include <stdio.h>
#include <cuda.h>
__global__ void my_kernel() { 
printf(“Hello World from GPU!\n”);
} 
int main() 
{ 
printf("Hello World from CPU!\n"); 
my_kernel<<<1,1>>>();   
cudaDeviceSynchronize()
return 0; 
} 

编译输出

$ nvcc hello_world.cu 
$ ./a.out 
Hello World from CPU!
Hello World from GPU!
#include <stdio.h>
#include <cuda.h>
__global__ void my_kernel() { 
printf(“GPU blk %i thread %i %i\n”, blockIdx.x, threadIdx.x, threadIdx.y);
} 

int main() {

const dim3 threadsPerBlock(2,2); 
const dim3 blocksPerGrid(2);   
my_kernel<<<threadsPerBlock, blocksPerGrid>>>();
return 0;
} 


$ nvcc hello_world.cu $ ./a.out

GPU block 0 thread 0 0
GPU block 0 thread 0 1
GPU block 0 thread 1 0
GPU block 0 thread 1 1
GPU block 1 thread 0 0
GPU block 1 thread 0 1
GPU block 1 thread 1 0
GPU block 1 thread 1 1