cuda - 等しくない入力/出力タイプで推力削減が機能しない

Question

Thrust を使用して値の配列の最小値と最大値を削減しようとしていますが、行き詰まっているようです。フロートの配列が与えられた場合、最小値と最大値を 1 回のパスで減らしたいのですが、スラストの reduce メソッドを使用すると、代わりにすべてのテンプレートコンパイルエラーの母 (または少なくともおばさん) が得られます。

私の元のコードには、削減したい 2 つの float4 配列にまたがる 5 つの値のリストが含まれていますが、この短い例に要約しました。

struct ReduceMinMax {
    __host__ __device__
    float2 operator()(float lhs, float rhs) {
        return make_float2(Min(lhs, rhs), Max(lhs, rhs));
    }
};

int main(int argc, char *argv[]){

    thrust::device_vector<float> hat(4);
    hat[0] = 3;
    hat[1] = 5;
    hat[2] = 6;
    hat[3] = 1;

    ReduceMinMax binary_op_of_dooooom;
    thrust::reduce(hat.begin(), hat.end(), 4.0f, binary_op_of_dooooom);
}

代わりに2つの削減に分割すると、もちろん機能します。私の質問は次のとおりです。スラストを使用して1回のパスで最小値と最大値の両方を減らすことは可能ですか? そうでない場合、上記の削減を達成する最も効率的な方法は何ですか? 変換イテレータは役に立ちますか (もしそうなら、リダクションはワンパスリダクションになりますか?)

いくつかの追加情報: 私は Thrust 1.5 (CUDA 4.2.7 で提供) を使用しています。私の実際のコードは、reduce だけでなく reduce_by_key を使用しています。この質問を書いているときに transform_reduce を見つけましたが、キーは考慮されていません。

score 4 · Accepted Answer

talonmiesが指摘しているようにthrust::reduce、二項演算子の引数の型が結果の型と一致することを期待しているため、リダクションはコンパイルされませんが、ReduceMinMaxの引数の型はfloatであり、結果の型はfloat2です。

thrust::minmax_elementこの操作を直接実装しますが、必要に応じて、代わりに、を使用して削減を実装できます。これによりthrust::inner_product、次のようになりますthrust::reduce。

#include <thrust/inner_product.h>
#include <thrust/device_vector.h>
#include <thrust/extrema.h>
#include <cassert>

struct minmax_float
{
  __host__ __device__
  float2 operator()(float lhs, float rhs)
  {
    return make_float2(thrust::min(lhs, rhs), thrust::max(lhs, rhs));
  }
};

struct minmax_float2
{
  __host__ __device__
  float2 operator()(float2 lhs, float2 rhs)
  {
    return make_float2(thrust::min(lhs.x, rhs.x), thrust::max(lhs.y, rhs.y));
  }
};

float2 minmax1(const thrust::device_vector<float> &x)
{
  return thrust::inner_product(x.begin(), x.end(), x.begin(), make_float2(4.0, 4.0f), minmax_float2(), minmax_float());
}

float2 minmax2(const thrust::device_vector<float> &x)
{
  using namespace thrust;
  pair<device_vector<float>::const_iterator, device_vector<float>::const_iterator> ptr_to_result;

  ptr_to_result = minmax_element(x.begin(), x.end());

  return make_float2(*ptr_to_result.first, *ptr_to_result.second);
}

int main()
{
  thrust::device_vector<float> hat(4);
  hat[0] = 3;
  hat[1] = 5;
  hat[2] = 6;
  hat[3] = 1;

  float2 result1 = minmax1(hat);
  float2 result2 = minmax2(hat);

  assert(result1.x == result2.x);
  assert(result1.y == result2.y);
}

cuda - 等しくない入力/出力タイプで推力削減が機能しない

1 に答える 1

Related

Reference