“CUDA”版本间的差异
来自iCenter Wiki
第35行: | 第35行: | ||
} | } | ||
int main() | int main() | ||
− | { | + | { |
const dim3 threadsPerBlock(2,2); | const dim3 threadsPerBlock(2,2); | ||
const dim3 blocksPerGrid(2); | const dim3 blocksPerGrid(2); |
2017年4月20日 (四) 01:59的最后版本
CUDA(Compute Unified Device Architecture,统一计算架构)
异构平行计算系统(heterogeneous parallel computing systems)
latency devices (CPU cores)
throughput devices (GPU cores)
单进程多数据(single process and mutiple data, SPMD) 模型
#include <stdio.h> #include <cuda.h> __global__ void my_kernel() { printf(“Hello World from GPU!\n”); } int main() { printf("Hello World from CPU!\n"); my_kernel<<<1,1>>>(); cudaDeviceSynchronize() return 0; }
编译输出
$ nvcc hello_world.cu $ ./a.out Hello World from CPU! Hello World from GPU!
#include <stdio.h> #include <cuda.h> __global__ void my_kernel() { printf(“GPU blk %i thread %i %i\n”, blockIdx.x, threadIdx.x, threadIdx.y); } int main() { const dim3 threadsPerBlock(2,2); const dim3 blocksPerGrid(2); my_kernel<<<threadsPerBlock, blocksPerGrid>>>(); return 0; }
$ nvcc hello_world.cu $ ./a.out GPU block 0 thread 0 0 GPU block 0 thread 0 1 GPU block 0 thread 1 0 GPU block 0 thread 1 1 GPU block 1 thread 0 0 GPU block 1 thread 0 1 GPU block 1 thread 1 0 GPU block 1 thread 1 1