Skip to content

Commit eba3252

Browse files
committed
增添19章内容
1 parent 2897cff commit eba3252

File tree

11 files changed

+1576
-181
lines changed

11 files changed

+1576
-181
lines changed

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"python.pythonPath": "C:\\Users\\youqi\\AppData\\Local\\Programs\\Python\\Python37\\python.exe"
3+
}

src/19-md-memory-optimization/GPU.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
import math, time, sys, os
2+
import numpy as np
3+
import pycuda.autoinit
4+
import pycuda.driver as drv
5+
from pycuda.compiler import DynamicSourceModule
6+
7+
8+
class kernel(object):
9+
"""
10+
kernel doc
11+
"""
12+
def __init__(self, number, MN, kernel=None, double=False):
13+
14+
if not isinstance(number, int):
15+
raise ValueError(self.__doc__)
16+
17+
if not isinstance(MN, int):
18+
raise ValueError(self.__doc__)
19+
20+
if double:
21+
self.real_py = 'float64'
22+
self.real_cpp = 'double'
23+
else:
24+
self.real_py = 'float32'
25+
self.real_cpp = 'float'
26+
27+
if kernel == None:
28+
kernel = os.path.join(sys.path[0], 'kernel.cu')
29+
if os.path.exists(kernel):
30+
with open(os.path.join(sys.path[0], kernel), 'r') as f:
31+
kernel_code = f.read()
32+
else:
33+
raise ValueError(self.__doc__)
34+
35+
self.MN = np.int32(MN)
36+
self.atomic_number = np.int32(number)
37+
self.block = (128,1,1)
38+
self.grid = ((number-1)//128+1, 1)
39+
size_real = np.array(0, dtype=self.real_py).itemsize
40+
size_int = np.array(0, dtype=np.int32).itemsize
41+
42+
kernel_program = DynamicSourceModule(kernel_code.replace('real', self.real_cpp))
43+
self._find_neighbor = kernel_program.get_function('gpu_find_neighbor')
44+
self._find_force = kernel_program.get_function('gpu_find_force')
45+
self._integrate = kernel_program.get_function('gpu_integrate')
46+
self._g_sum = kernel_program.get_function('gpu_sum')
47+
self._scale_velocity = kernel_program.get_function('gpu_scale_velocity')
48+
49+
self.g_sum = drv.mem_alloc(size_real)
50+
self.lj = drv.mem_alloc(size_real*5)
51+
self.box = drv.mem_alloc(size_real*6)
52+
self.neighbor_number = drv.mem_alloc(size_int*number)
53+
self.neighbor_index = drv.mem_alloc(size_int*number*MN)
54+
self.atomic_mass = drv.mem_alloc(size_real*number)
55+
self.coordinate_x = drv.mem_alloc(size_real*number)
56+
self.coordinate_y = drv.mem_alloc(size_real*number)
57+
self.coordinate_z = drv.mem_alloc(size_real*number)
58+
self.velocity_x = drv.mem_alloc(size_real*number)
59+
self.velocity_y = drv.mem_alloc(size_real*number)
60+
self.velocity_z = drv.mem_alloc(size_real*number)
61+
self.force_x = drv.mem_alloc(size_real*number)
62+
self.force_y = drv.mem_alloc(size_real*number)
63+
self.force_z = drv.mem_alloc(size_real*number)
64+
self.pe = drv.mem_alloc(size_real*number)
65+
self.ke = drv.mem_alloc(size_real*number)
66+
67+
def upload(self, atomic_mass, coordinate, velocity, lj, box):
68+
drv.memcpy_htod(self.atomic_mass, np.array(atomic_mass, dtype=self.real_py))
69+
drv.memcpy_htod(self.coordinate_x, np.array(coordinate[0], dtype=self.real_py))
70+
drv.memcpy_htod(self.coordinate_y, np.array(coordinate[1], dtype=self.real_py))
71+
drv.memcpy_htod(self.coordinate_z, np.array(coordinate[2], dtype=self.real_py))
72+
drv.memcpy_htod(self.velocity_x, np.array(velocity[0], dtype=self.real_py))
73+
drv.memcpy_htod(self.velocity_y, np.array(velocity[1], dtype=self.real_py))
74+
drv.memcpy_htod(self.velocity_z, np.array(velocity[2], dtype=self.real_py))
75+
drv.memcpy_htod(self.lj, np.array(lj, dtype=self.real_py))
76+
drv.memcpy_htod(self.box, np.array(box, dtype=self.real_py))
77+
78+
def _sum(self, GPU_Array):
79+
M = (self.atomic_number - 1) // 25600 + 1
80+
self.h_sum = np.array(0.0, dtype=self.real_py)
81+
drv.memcpy_htod(self.g_sum, self.h_sum)
82+
self._g_sum(
83+
self.atomic_number,
84+
np.int32(M),
85+
GPU_Array,
86+
self.g_sum,
87+
grid=(int((self.atomic_number-1)//(128*M)+1), 1, 1),
88+
block=(128,1,1),
89+
)
90+
drv.memcpy_dtoh(self.h_sum, self.g_sum)
91+
92+
return self.h_sum
93+
94+
def sum_ke(self):
95+
return self._sum(self.ke)
96+
97+
def sum_pe(self):
98+
return self._sum(self.pe)
99+
100+
def integrate(self, time_step, flag):
101+
self._integrate(
102+
self.atomic_number,
103+
np.__dict__[self.real_py](time_step),
104+
np.__dict__[self.real_py](time_step*0.5),
105+
self.atomic_mass,
106+
self.coordinate_x,
107+
self.coordinate_y,
108+
self.coordinate_z,
109+
self.velocity_x,
110+
self.velocity_y,
111+
self.velocity_z,
112+
self.force_x,
113+
self.force_y,
114+
self.force_z,
115+
self.ke,
116+
np.int32(flag),
117+
grid=self.grid,
118+
block=self.block,
119+
)
120+
121+
122+
def find_neighbor(self, cutoff):
123+
self._find_neighbor(
124+
self.atomic_number,
125+
self.MN,
126+
self.neighbor_number,
127+
self.neighbor_index,
128+
self.box,
129+
self.coordinate_x,
130+
self.coordinate_y,
131+
self.coordinate_z,
132+
np.__dict__[self.real_py](cutoff*cutoff),
133+
grid=self.grid,
134+
block=self.block,
135+
)
136+
137+
138+
def find_force(self):
139+
self._find_force(
140+
self.lj,
141+
self.atomic_number,
142+
self.neighbor_number,
143+
self.neighbor_index,
144+
self.box,
145+
self.coordinate_x,
146+
self.coordinate_y,
147+
self.coordinate_z,
148+
self.force_x,
149+
self.force_y,
150+
self.force_z,
151+
self.pe,
152+
grid=self.grid,
153+
block=self.block,
154+
)
155+
156+
157+
def scale_velocity(self, target_temperature):
158+
kB = 8.617343e-5
159+
temperature = self._sum(self.ke) / (1.5 * kB * self.atomic_number)
160+
self._scale_velocity(
161+
self.atomic_number,
162+
np.__dict__[self.real_py](math.sqrt(target_temperature/temperature)),
163+
self.velocity_x,
164+
self.velocity_y,
165+
self.velocity_z,
166+
grid=self.grid,
167+
block=self.block,
168+
)
169+
Binary file not shown.

0 commit comments

Comments
 (0)