高性能的Python扩展:第一部分
高性能的Python扩展:第一部分
# lib/sim.py class World(object): """World is a structure that holds the state of N bodies and additional variables. threads : (int) The number of threads to use for multithreaded implementations. STATE OF THE WORLD: N : (int) The number of bodies in the simulation. m : (1D ndarray) The mass of each body. r : (2D ndarray) The position of each body. v : (2D ndarray) The velocity of each body. F : (2D ndarray) The force on each body. TEMPORARY VARIABLES: Ft : (3D ndarray) A 2D force array for each thread‘s local storage. s : (2D ndarray) The vectors from one body to all others. s3 : (1D ndarray) The norm of each s vector. NOTE: Ft is used by parallel algorithms for thread-local storage. s and s3 are only used by the Python implementation. """ def __init__(self, N, threads=1, m_min=1, m_max=30.0, r_max=50.0, v_max=4.0, dt=1e-3): self.threads = threads self.N = N self.m = np.random.uniform(m_min, m_max, N) self.r = np.random.uniform(-r_max, r_max, (N, 2)) self.v = np.random.uniform(-v_max, v_max, (N, 2)) self.F = np.zeros_like(self.r) self.Ft = np.zeros((threads, N, 2)) self.s = np.zeros_like(self.r) self.s3 = np.zeros_like(self.m) self.dt = dt |
# lib/sim.py def compute_F(w): """Compute the force on each body in the world, w.""" for i in xrange(w.N): w.s[:] = w.r - w.r[i] w.s3[:] = (w.s[:,0]**2 + w.s[:,1]**2)**1.5 w.s3[i] = 1.0 # This makes the self-force zero. w.F[i] = (w.m[i] * w.m[:,None] * w.s / w.s3[:,None]).sum(0) def evolve(w, steps): """Evolve the world, w, through the given number of steps.""" for _ in xrange(steps): compute_F(w) w.v += w.F * w.dt / w.m[:,None] w.r += w.v * w.dt |
428710 function calls (428521 primitive calls) in 16.836 seconds Ordered by: cumulative time ncalls tottime percall cumtime percall filename:lineno(function) 1 0.000 0.000 16.837 16.837 bench.py:2(<module>) 1 0.062 0.062 16.756 16.756 sim.py:60(evolve) 4096 15.551 0.004 16.693 0.004 sim.py:51(compute_F) 413696 1.142 0.000 1.142 0.000 {method ‘sum‘ ... 3 0.002 0.001 0.115 0.038 __init__.py:1(<module>) ... |
static inline void compute_F(npy_int64 N, PyArrayObject *py_m, PyArrayObject *py_r, PyArrayObject *py_F) { npy_int64 i, j; npy_float64 sx, sy, Fx, Fy, s3, tmp; // Set all forces to zero. for(i = 0; i < N; ++i) { F(i, 0) = F(i, 1) = 0; } // Compute forces between pairs of bodies. for(i = 0; i < N; ++i) { for(j = i + 1; j < N; ++j) { sx = r(j, 0) - r(i, 0); sy = r(j, 1) - r(i, 1); s3 = sqrt(sx*sx + sy*sy); s3 *= s3 * s3; tmp = m(i) * m(j) / s3; Fx = tmp * sx; Fy = tmp * sy; F(i, 0) += Fx; F(i, 1) += Fy; F(j, 0) -= Fx; F(j, 1) -= Fy; } } } |
static PyObject *evolve(PyObject *self, PyObject *args) { // Declare variables. npy_int64 N, threads, steps, step, i; npy_float64 dt; PyArrayObject *py_m, *py_r, *py_v, *py_F; // Parse arguments. if (!PyArg_ParseTuple(args, "ldllO!O!O!O!", &threads, &dt, &steps, &N, &PyArray_Type, &py_m, &PyArray_Type, &py_r, &PyArray_Type, &py_v, &PyArray_Type, &py_F)) { return NULL; } // Evolve the world. for(step = 0; step< steps; ++step) { compute_F(N, py_m, py_r, py_F); for(i = 0; i < N; ++i) { v(i, 0) += F(i, 0) * dt / m(i); v(i, 1) += F(i, 1) * dt / m(i); r(i, 0) += v(i, 0) * dt; r(i, 1) += v(i, 1) * dt; } } Py_RETURN_NONE; } |
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。