c# - ネイティブコレクションを使用した Unity ジョブシステムは、C# コレクションを使用したジョブシステムよりも遅い

Question

私は現在、非常に CPU 負荷の高いシミュレーションに取り組んでおり、ジョブシステム + バーストコンパイルに移行し、パフォーマンスが大幅に向上する可能性があります。とはいえ、ジョブシステムへの切り替えは新たなボトルネックを生み出しました。これを回避する方法があるかどうかを尋ねたかったのです。

これが私のシミュレーションの仕組みです。基本的に、ワールド空間で 2 次元の四角形を移動し、パス上のグリッド座標をキャプチャします。ジョブがなければ、最大のボトルネックはでしSampleLineた。ジョブシステムでは、ボトルネックはtrajectory.Add/.AddNativeとoccupied.IndexOfです。

    NativeList<JobHandle> jobHandleList = new NativeList<JobHandle>(Allocator.Temp);
    List<NativeArray<float>> timestampList = new List<NativeArray<float>>();
    List<NativeList<int2>> resultList = new List<NativeList<int2>>();
    while (simulate)
    {
        move(dummy);
        time += timestep;

        NativeArray<float> timestampArr = new NativeArray<float>(1, Allocator.TempJob);
        NativeList<int2> result = new NativeList<int2>(Allocator.TempJob);
        JobHandle jobHandle = GetOccupiedTilesJob(timestep, dummy, grid, ref timestampArr, ref result);
        jobHandleList.Add(jobHandle);
        timestampList.Add(timestampArr);
        resultList.Add(result);
    }

    // get data from jobs and clean up
    JobHandle.CompleteAll(jobHandleList);
    for(int i = 0; i < timestampList.Count; i++)
    {
        // BOTTLENECK HERE
        // TODO: once unity supports it, use resultList[i].ToList()
        /*List<int2> coordinates = new List<int2>();
        for(int j = 0; j < resultList[i].Length; j++)
        {
            coordinates.Add(resultList[i][j]);
        }
        trajectory.Add(timestampList[i][0], coordinates);*/
        trajectory.AddNative(timestampList[i][0], resultList[i]);

        timestampList[i].Dispose();
        resultList[i].Dispose();
    }

    jobHandleList.Dispose();
    return trajectory;

trajectory.Addを使用しDictionary<int2, float2>、.AddNative はNativeHashMap<int2, float2>値を格納するためにを使用します。最初に NativeList のすべての値を役に立たないリストにコピーする必要がありますが、AddNative のtrajectory.Addほぼ2 倍高速です。配列をバッファーとして使用することで、おそらくさらに高速化できます。

使用したジョブは次のとおりです。

[BurstCompile]
public struct GetOccupiedTiles : IJob
{
    public float2 A;
    public float2 B;
    public float2 C;
    public float2 D;

    public float timestamp;
    public NativeArray<float> timestampArr; // only for storage
    public NativeList<int2> result;

    [ReadOnly] public float2 gridCenter;
    [ReadOnly] public float cellSize;

    public void Execute()
    {
        float rate = cellSize / 4f;

        result.Add(WorldToCell(A, gridCenter, cellSize));
        result.Add(WorldToCell(B, gridCenter, cellSize));
        result.Add(WorldToCell(C, gridCenter, cellSize));
        result.Add(WorldToCell(D, gridCenter, cellSize));

        SampleLine(A, B, rate, ref result, gridCenter, cellSize);
        SampleLine(B, C, rate, ref result, gridCenter, cellSize);
        SampleLine(C, D, rate, ref result, gridCenter, cellSize);
        SampleLine(D, A, rate, ref result, gridCenter, cellSize);

        float2 AB = (B - A);
        float2 AB_normalized = math.normalize(AB);
        float2 DC_normalized = math.normalize(C - D);
        float length = math.sqrt(AB.x * AB.x + AB.y * AB.y);
        float current = rate;
        while (current < length)
        {
            float2 X = A + AB_normalized * current;
            float2 Y = D + DC_normalized * current;

            SampleLine(X, Y, rate, ref result, gridCenter, cellSize);
            current += rate;
        }

        timestampArr[0] = timestamp;
    }

    private static void SampleLine(float2 A, float2 B, float sampleRate, ref NativeList<int2> occupied, float2 gridCenter, float cellSize)
    {
        float2 AB = (B - A);
        float2 AB_scaled = math.normalize(AB) * sampleRate;
        float2 test = A + AB_scaled;
        float length = math.sqrt(AB.x * AB.x + AB.y * AB.y);
        float current = sampleRate;

        while (current < length)
        {
            int2 cell = WorldToCell(test, gridCenter, cellSize);
            // optimally i want to use a hashset here because .IndexOf is very slow
            if (occupied.IndexOf(cell) == -1) occupied.Add(cell);
            test += AB_scaled;
            current += sampleRate;
        }
    }

    public static int2 WorldToCell(float2 vector, float2 gridCenter, float cellSize)
    {
        return (int2)math.floor(-(gridCenter - vector) / cellSize);
    }
}

今私の問題は、どうすればコストのかかるNativeList対List変換を防ぐことができますか? a を a に変換してそのリストを a に追加するよりも、 aを aNativeListに追加する方が遅いのはなぜですか?NativeHashMapNativeListListDictionary

また、並行して呼び出すことSampleLineもできるので、ジョブを小さなジョブに分割した方がよいでしょうか? WorldToCellしかし、ジョブが他のジョブを開始できるかどうか、またはジョブを作成WorldToCellするだけで (最も呼び出されるため)、通常の C# で残りを実行する必要があるかどうかはわかりません。私はジョブシステムにまったく慣れていないので、何がどのように「ジョブ化」されるべきかわかりません。1 つの大きなジョブをそのままにしておくことも、小さなジョブをたくさん作成WorldToCellすることも、3 つの計算すべてを 1 つのジョブにすることもできます。

c# - ネイティブ コレクションを使用した Unity ジョブ システムは、C# コレクションを使用したジョブ システムよりも遅い

0 に答える 0

Related

Reference

c# - ネイティブコレクションを使用した Unity ジョブシステムは、C# コレクションを使用したジョブシステムよりも遅い