Skip to content

Commit 2897cff

Browse files
authored
Add files via upload
1 parent 2bece9d commit 2897cff

File tree

24 files changed

+918
-0
lines changed

24 files changed

+918
-0
lines changed

src/02-thread-organization/hello1.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
print('Hello World!\n')

src/02-thread-organization/hello2.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
from pycuda.compiler import SourceModule
4+
5+
mod = SourceModule(r"""
6+
__global__ void hello_from_gpu(void)
7+
{
8+
printf("hello World from the GPU!\n");
9+
}
10+
""")
11+
12+
hello_from_gpu = mod.get_function("hello_from_gpu")
13+
hello_from_gpu(grid=(1,1,1),block=(1,1,1))

src/02-thread-organization/hello3.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
from pycuda.compiler import SourceModule
4+
5+
mod = SourceModule(r"""
6+
__global__ void hello_from_gpu(void)
7+
{
8+
printf("hello World from the GPU!\n");
9+
}
10+
""")
11+
12+
hello_from_gpu = mod.get_function("hello_from_gpu")
13+
hello_from_gpu(grid=(2,1,1),block=(4,1,1))

src/02-thread-organization/hello4.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
from pycuda.compiler import SourceModule
4+
5+
mod = SourceModule(r"""
6+
__global__ void hello_from_gpu(void)
7+
{
8+
const int bid = blockIdx.x;
9+
const int tid = threadIdx.x;
10+
printf("Hello World from block %d and thread %d!\n", bid, tid);
11+
}
12+
""")
13+
14+
hello_from_gpu = mod.get_function("hello_from_gpu")
15+
hello_from_gpu(grid=(2,1,1), block=(4,1,1))

src/02-thread-organization/hello5.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
from pycuda.compiler import SourceModule
4+
5+
mod = SourceModule(r"""
6+
__global__ void hello_from_gpu(void)
7+
{
8+
const int b = blockIdx.x;
9+
const int tx = threadIdx.x;
10+
const int ty = threadIdx.y;
11+
printf("Hello World from block-%d and thread-(%d, %d)!\n", b, tx, ty);
12+
}
13+
""")
14+
15+
hello_from_gpu = mod.get_function("hello_from_gpu")
16+
hello_from_gpu(grid=(1,1,1), block=(2,4,1))

src/03-basic-framework/add.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import numpy
2+
3+
EPSILON = 1e-15
4+
a = 1.23
5+
b = 2.34
6+
c = 3.57
7+
N = 100000000
8+
x = numpy.full((N,1), a)
9+
y = numpy.full((N,1), b)
10+
z = x + y
11+
print('No errors' if (abs(z-c)<EPSILON).all() else 'Has errors')

src/03-basic-framework/add1.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
import numpy
4+
from pycuda.compiler import SourceModule
5+
6+
mod = SourceModule(r"""
7+
void __global__ add(const double *x, const double *y, double *z, const int N)
8+
{
9+
const int n = blockDim.x * blockIdx.x + threadIdx.x;
10+
z[n] = x[n] + y[n];
11+
}
12+
""")
13+
add = mod.get_function("add")
14+
15+
EPSILON = 1e-15
16+
a = 1.23
17+
b = 2.34
18+
c = 3.57
19+
N = 100000000
20+
h_x = numpy.full((N,1), a)
21+
h_y = numpy.full((N,1), b)
22+
h_z = numpy.zeros_like(h_x)
23+
d_x = drv.mem_alloc(h_x.nbytes)
24+
d_y = drv.mem_alloc(h_y.nbytes)
25+
d_z = drv.mem_alloc(h_z.nbytes)
26+
drv.memcpy_htod(d_x, h_x)
27+
drv.memcpy_htod(d_y, h_y)
28+
add(d_x, d_y, d_z, grid=(N//128, 1), block=(128,1,1))
29+
drv.memcpy_dtoh(h_z, d_z)
30+
print('No errors' if (abs(h_z-c)<EPSILON).all() else 'Has errors')

src/03-basic-framework/add2wrong.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
import numpy
4+
from pycuda.compiler import SourceModule
5+
6+
mod = SourceModule(r"""
7+
void __global__ add(const double *x, const double *y, double *z, const int N)
8+
{
9+
const int n = blockDim.x * blockIdx.x + threadIdx.x;
10+
z[n] = x[n] + y[n];
11+
}
12+
""")
13+
add = mod.get_function("add")
14+
15+
EPSILON = 1e-15
16+
a = 1.23
17+
b = 2.34
18+
c = 3.57
19+
N = 100000000
20+
h_x = numpy.full((N,1), a)
21+
h_y = numpy.full((N,1), b)
22+
h_z = numpy.zeros_like(h_x)
23+
d_x = drv.mem_alloc(h_x.nbytes)
24+
d_y = drv.mem_alloc(h_y.nbytes)
25+
d_z = drv.mem_alloc(h_z.nbytes)
26+
drv.memcpy_dtoh(d_x, h_x)
27+
drv.memcpy_dtoh(d_y, h_y)
28+
add(d_x, d_y, d_z, grid=(N//128, 1), block=(128,1,1))
29+
drv.memcpy_dtoh(h_z, d_z)
30+
print('No errors' if (abs(h_z-c)<EPSILON).all() else 'Has errors')

src/03-basic-framework/add3if.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
import numpy
4+
from pycuda.compiler import SourceModule
5+
6+
mod = SourceModule(r"""
7+
void __global__ add(const double *x, const double *y, double *z, const int N)
8+
{
9+
const int n = blockDim.x * blockIdx.x + threadIdx.x;
10+
if(n < N)
11+
{
12+
z[n] = x[n] + y[n];
13+
}
14+
}
15+
""")
16+
add = mod.get_function("add")
17+
18+
EPSILON = 1e-15
19+
a = 1.23
20+
b = 2.34
21+
c = 3.57
22+
N = 100000000
23+
h_x = numpy.full((N,1), a)
24+
h_y = numpy.full((N,1), b)
25+
h_z = numpy.zeros_like(h_x)
26+
d_x = drv.mem_alloc(h_x.nbytes)
27+
d_y = drv.mem_alloc(h_y.nbytes)
28+
d_z = drv.mem_alloc(h_z.nbytes)
29+
drv.memcpy_htod(d_x, h_x)
30+
drv.memcpy_htod(d_y, h_y)
31+
add(d_x, d_y, d_z, numpy.int32(N), grid=((N-1)//128+1, 1), block=(128, 1, 1))
32+
drv.memcpy_dtoh(h_z, d_z)
33+
print('No errors' if (abs(h_z-c)<EPSILON).all() else 'Has errors')

src/03-basic-framework/add4device.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import pycuda.autoinit
2+
import pycuda.driver as drv
3+
import numpy
4+
from pycuda.compiler import DynamicSourceModule
5+
6+
import numpy
7+
8+
9+
mod1 = DynamicSourceModule(r"""
10+
double __device__ add1_device(double x, double y)
11+
{
12+
return (x + y);
13+
}
14+
15+
void __global__ add1(double *x, double *y, double *z, int N)
16+
{
17+
const int n = blockDim.x * blockIdx.x + threadIdx.x;
18+
if(n < N)
19+
{
20+
z[n] = add1_device(x[n], y[n]);
21+
}
22+
}
23+
""")
24+
add1 = mod1.get_function("add1")
25+
26+
mod2 = DynamicSourceModule(r"""
27+
double __device__ add2_device(double x, double y, double *z)
28+
{
29+
*z = x + y;
30+
}
31+
32+
void __global__ add2(double *x, double *y, double *z, int N)
33+
{
34+
const int n = blockDim.x * blockIdx.x + threadIdx.x;
35+
if(n < N)
36+
{
37+
add2_device(x[n], y[n], &z[n]);
38+
}
39+
}
40+
""")
41+
add2 = mod2.get_function("add2")
42+
43+
mod3 = DynamicSourceModule(r"""
44+
double __device__ add3_device(double x, double y, double &z)
45+
{
46+
z = x + y;
47+
}
48+
49+
void __global__ add3(double *x, double *y, double *z, int N)
50+
{
51+
const int n = blockDim.x * blockIdx.x + threadIdx.x;
52+
if(n < N)
53+
{
54+
add3_device(x[n], y[n], z[n]);
55+
}
56+
}
57+
""")
58+
add3 = mod3.get_function("add3")
59+
60+
EPSILON = 1e-15
61+
a = 1.23
62+
b = 2.34
63+
c = 3.57
64+
N = 100000001
65+
h_x = numpy.full((N,1), a)
66+
h_y = numpy.full((N,1), b)
67+
h_z = numpy.zeros_like(h_x)
68+
d_x = drv.mem_alloc(h_x.nbytes)
69+
d_y = drv.mem_alloc(h_y.nbytes)
70+
d_z = drv.mem_alloc(h_z.nbytes)
71+
drv.memcpy_htod(d_x, h_x)
72+
drv.memcpy_htod(d_y, h_y)
73+
74+
add1(d_x, d_y, d_z, numpy.int32(N), grid=((N-1)//128+1, 1), block=(128, 1, 1))
75+
drv.memcpy_dtoh(h_z, d_z)
76+
print('No errors' if (abs(h_z-c)<EPSILON).all() else 'Has errors')
77+
78+
add2(d_x, d_y, d_z, numpy.int32(N), grid=((N-1)//128+1, 1), block=(128, 1, 1))
79+
drv.memcpy_dtoh(h_z, d_z)
80+
print('No errors' if (abs(h_z-c)<EPSILON).all() else 'Has errors')
81+
82+
add3(d_x, d_y, d_z, numpy.int32(N), grid=((N-1)//128+1, 1), block=(128, 1, 1))
83+
drv.memcpy_dtoh(h_z, d_z)
84+
print('No errors' if (abs(h_z-c)<EPSILON).all() else 'Has errors')

0 commit comments

Comments
 (0)