2

私はmongodbを初めて使用しますが、アドバイスをいただけないでしょうか。私は次のコレクションを持っています

{ "_id" : "u1", "item" : [ "a", "b", "c" ] }
{ "_id" : "u2", "item" : [ "b", "d", "e" ] }
{ "_id" : "u3", "item" : [ "a", "c", "f" ] }
{ "_id" : "u4", "item" : [ "c" ] }

ユーザー1と2、4の場合など、ユーザーの各ペアのアイテムの和集合と共通部分を計算する新しいコレクションを作成したいと思います。結果は次のようになります。

{ "_id" : "u12", "intersect_count":1,"union_count":6 }
{ "_id" : "u14", "intersect_count":1,"union_count":4}

非効率的であるため、ペアごとにペアワイズ操作を実行したくありません。それをより効率的に行うためのトリックはありますか?

4

1 に答える 1

2

My solution is this:

map_func = function() {
  self = this;
  ids.forEach(function(id) {
    if (id === self._id) return;
    emit([id, self._id].sort().join('_'), self.item);
  });
};

reduce_func = function(key, vals) {
  return {
    intersect_count: intersect_func.apply(null, vals).length,
    union_count: union_func.apply(null, vals).length
  };
};

opts = {
  out: "redused_items",
  scope: {
    ids: db.items.distinct('_id'),
    union_func: union_func,
    intersect_func: intersect_func
  }
}

db.items.mapReduce( map_func, reduce_func, opts )

If you have N elemets in your collection then map_func will emit N*(N-1) elements for future reduction. Then reduce_func will reduce them into N*(N-1)/2 new elements.

I used scope to pass global variables (ids) and helper methods (union_func, intersect_func) into map_func and reduce_func. Otherwise MapReduce will fail with error, because it evaluates map_func and reduce_func in special environment.

Result of calling MapReduce:

> db.redused_items.find()
{ "_id" : "u1_u2", "value" : { "intersect_count" : 1, "union_count" : 6 } }
{ "_id" : "u1_u3", "value" : { "intersect_count" : 2, "union_count" : 6 } }
{ "_id" : "u1_u4", "value" : { "intersect_count" : 1, "union_count" : 4 } }
{ "_id" : "u2_u3", "value" : { "intersect_count" : 0, "union_count" : 6 } }
{ "_id" : "u2_u4", "value" : { "intersect_count" : 0, "union_count" : 4 } }
{ "_id" : "u3_u4", "value" : { "intersect_count" : 1, "union_count" : 4 } }

I used the following helpers for my tests:

union_func = function(a1, a2) {
  return a1.concat(a2);
};

intersect_func = function(a1, a2) {
  return a1.filter(function(x) {
    return a2.indexOf(x) >= 0;
  });
};

Alternative way is to use mongo cursor instead of global ids object:

map_func = function() {
  self = this;
  db.items.find({},['_id']).forEach(function(elem) {
    if (elem._id === self._id) return;
    emit([elem._id, self._id].sort().join('_'), self.item);
  });
};

opts = {
  out: "redused_items",
  scope: {
    union_func: union_func,
    intersect_func: intersect_func
  }
}

db.items.mapReduce( map_func, reduce_func, opts )

Result will be the same.

于 2012-12-18T21:59:27.397 に答える