Skip to content

Commit 6b99289

Browse files
committed
更新第三节
1 parent 09f7005 commit 6b99289

File tree

8 files changed

+91
-39
lines changed

8 files changed

+91
-39
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pycuda.autoinit
2+
from pycuda.compiler import SourceModule
3+
4+
kernel_code = r"""
5+
__global__ void print_id(void)
6+
{
7+
printf("blockIdx.x = %d; threadIdx.x = %d;\n", blockIdx.x, threadIdx.x);
8+
}
9+
"""
10+
mod = SourceModule(kernel_code)
11+
print_id = mod.get_function("print_id")
12+
print_id(grid=(2,), block=(3,1,1))
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import numpy
2+
num = 4
3+
A = numpy.random.rand(num)
4+
B = numpy.random.rand(num)
5+
C = A + B
6+
print('A=', A)
7+
print('B=', B)
8+
print('C=', C)
9+
10+
11+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import pycuda.autoinit
2+
import pycuda.gpuarray as gpuarray
3+
import numpy
4+
num = 4
5+
A = numpy.random.rand(num)
6+
B = numpy.random.rand(num)
7+
A_GPU = gpuarray.to_gpu(A.astype(numpy.float32))
8+
B_GPU = gpuarray.to_gpu(B.astype(numpy.float32))
9+
C_GPU = A_GPU + B_GPU
10+
C = C_GPU.get()
11+
print('A=', A)
12+
print('B=', B)
13+
print('C=', C)
14+
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
from pycuda.compiler import SourceModule
4+
import numpy
5+
mod = SourceModule(r"""
6+
void __global__ add(const float *x, const float *y, float *z)
7+
{
8+
const int n = blockDim.x * blockIdx.x + threadIdx.x;
9+
z[n] = x[n] + y[n];
10+
}
11+
""")
12+
add = mod.get_function("add")
13+
num = 6
14+
A = numpy.random.rand(num).astype(numpy.float32)
15+
B = numpy.random.rand(num).astype(numpy.float32)
16+
C = numpy.zeros(num).astype(numpy.float32)
17+
A_GPU = drv.mem_alloc(A.nbytes)
18+
B_GPU = drv.mem_alloc(B.nbytes)
19+
C_GPU = drv.mem_alloc(C.nbytes)
20+
drv.memcpy_htod(A_GPU, A)
21+
drv.memcpy_htod(B_GPU, B)
22+
add(A_GPU, B_GPU, C_GPU, grid=(2, 1), block=(4,1,1))
23+
drv.memcpy_dtoh(C, C_GPU)
24+
A_GPU.free()
25+
B_GPU.free()
26+
C_GPU.free()
27+
print('A=', A)
28+
print('B=', B)
29+
print('C=', C)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pycuda.autoinit
2+
from pycuda.compiler import SourceModule
3+
import pycuda.gpuarray as gpuarray
4+
import numpy
5+
mod = SourceModule(r"""
6+
void __global__ add(const float *x, const float *y, float *z)
7+
{
8+
const int n = blockDim.x * blockIdx.x + threadIdx.x;
9+
z[n] = x[n] + y[n];
10+
}
11+
""")
12+
add = mod.get_function("add")
13+
num = 6
14+
A = numpy.random.rand(num)
15+
B = numpy.random.rand(num)
16+
C = numpy.zeros(num)
17+
A_GPU = gpuarray.to_gpu(A.astype(numpy.float32))
18+
B_GPU = gpuarray.to_gpu(B.astype(numpy.float32))
19+
C_GPU = gpuarray.to_gpu(B.astype(numpy.float32))
20+
add(A_GPU, B_GPU, C_GPU, grid=(2,), block=(4,1,1))
21+
C = C_GPU.get()
22+
print('A=', A)
23+
print('B=', B)
24+
print('C=', C)
25+

src(pyCUDA speeds up your python code)/add1.py

Lines changed: 0 additions & 13 deletions
This file was deleted.

src(pyCUDA speeds up your python code)/add2.py

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)