python - PyTorch と比較して Numpy 有限差分計算が法外に遅い

Question

私の目標は、有限差分を介して反応拡散微分方程式をシミュレートすることですが、2d 波動方程式から始めました。これにはかなりの時間がかかるため、基本的な実装をできるだけ高速にしたいと考えており、numpy/cupy/numba/pytorch を使ってどれが最速かを調べています。本当に奇妙に感じるのは、PyTorch を使用した私の実装は、反復の実行にかかる時間はほとんど一定ですが、numpy はますます遅くなることです。torch には何らかのインテリジェントなガベージコレクションがバックグラウンドで実行されており、numpy は未使用の配列を削除しないと想定しています (torch は一定ですが、メモリ使用量の増加に見られます)。ただし、numpy コードを手動で最適化してインプレースで実行し、未使用の配列を削除した後でも、まだそうではありません。

以下は、コメント内のトーチコードとともに、numpy コードです。

import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import laplace

# import torch
# import torch.nn.functional as F
import matplotlib.animation as animation
import tqdm


def I(X, Y):
    d = (X - 0.5)**2 + (Y - 0.5)**2
    inner = (d < 0.01)
    # inner = (d < 0.01).float()
    return 0 * X * (1 - inner) + (0 * X + 1) * inner


def laplace(u):
    # u_pad = F.pad(u.unsqueeze(0).unsqueeze_(0), (1, 1, 1, 1), mode='replicate').squeeze()
    u_pad = np.pad(u, (1, 1), mode='edge')
    u = u_pad[2:, 1:-1] + u_pad[1:-1, 2:] + u_pad[:-2, 1:-1] + u_pad[1:-1, :-2] - 4 * u
    u[:, 0] = u[0, :] = u[:, -1] = u[-1, :] = 0
    return u


def initial_step(u, C):
    return u - laplace(u) * C**2 / 2

def step(u, u_prev, C):
    return -u_prev + 2 * u + laplace(u) * C**2


class Simulate2d():
    def __init__(self, init_fn, initial_step_fn, step_fn, c=0.5, T=1, n_grid=1000, n_steps=200, n_levels=30):
        # x = torch.linspace(0, 1, n_grid)
        x = np.linspace(0, 1, n_grid)
        dx = x[1] - x[0]
        dt = T / n_steps

        self.t = 0
        self.n_steps = n_steps

        # self.mesh = torch.meshgrid(x, x)
        # self.cvals = torch.linspace(-1.1, 1.1, n_levels)
        self.mesh = np.meshgrid(x, x)
        self.cvals = np.linspace(-1.1, 1.1, n_levels)

        self.C = (c * dt / dx)

        self.U_prev = None
        self.U = init_fn(*self.mesh)
        self.cont = ax.contourf(*self.mesh, self.U, self.cvals)

        self.step_fn = step_fn
        self.initial_step_fn = initial_step_fn
        self.pbar = None

    def forward(self, u, u_prev):
        return self.initial_step_fn(u, self.C) if self.t == 0 else self.step_fn(u, u_prev, self.C)

    def animation_step(self, t):
        self.t = t
        for c in self.cont.collections:
            c.remove()

        u_next = self.forward(self.U, self.U_prev)
        # del self.U_prev
        self.U, self.U_prev = u_next, self.U
        self.cont = plt.contourf(*self.mesh, self.U, self.cvals)

        plt.title(f't={t}')
        if self.pbar is not None:
            self.pbar.update(1)

        return self.cont

    def test(self):
        for t in tqdm.trange(self.n_steps):
            self.animation_step(t)

    def create_animation(self, file, dpi=300):
        self.pbar = tqdm.tqdm(total=self.n_steps)
        ani = animation.FuncAnimation(fig, self.animation_step, frames=self.n_steps)
        ani.save(file, dpi=dpi)
        print(f'{file} saved')
        return ani


fig, ax = plt.subplots(figsize=(15, 15))
plt.axis('equal')
plt.xlim(0, 1)
plt.ylim(0, 1)

simu = Simulate2d(init_fn=I, initial_step_fn=initial_step, step_fn=step)
# simu.create_animation('reaction-diffusion.gif')
simu.test()

> Executing task: python -m IPython /home/fe/willis/git/reaction-diffusion/reaction_diffusion_torch.py --no-banner --no-confirm-exit <

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [01:02<00:00,  3.20it/s]

Terminal will be reused by tasks, press any key to close it.

> Executing task: python -m IPython /home/fe/willis/git/reaction-diffusion/reaction_diffusion.py --no-banner --no-confirm-exit <

...[hiding numpy runtime warnings]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [02:54<00:00,  1.15it/s]

Terminal will be reused by tasks, press any key to close it.

これは、操作がインプレースになるように変更された部分です。

def laplace(u):
    u_pad = np.pad(u, (1, 1), mode='edge')
    u = - 4 * u
    u += u_pad[2:, 1:-1]
    u += u_pad[1:-1, 2:]
    u += u_pad[:-2, 1:-1]
    u += u_pad[1:-1, :-2]
    u[:, 0] = u[0, :] = u[:, -1] = u[-1, :] = 0
    del u_pad
    return u

def step(u, u_prev, C):
    lap = laplace(u)
    lap *= C**2
    out = 2 * u
    out += lap
    out -= u_prev
    del lap
    return out

    ...
    def animation_step(self, t):
        self.t = t
        for c in self.cont.collections:
            c.remove()

        u_next = self.forward(self.U, self.U_prev)
        del self.U_prev
        ...

基本的に私は尋ねています: numpy コードが PyTorch 実装と同じくらい速くなるために何をする必要がありますか?

python - PyTorch と比較して Numpy 有限差分計算が法外に遅い

0 に答える 0

Related

Reference