php - node-mongodb-native の挿入パフォーマンス

Question

MongoDB で Node.js のパフォーマンスをテストしています。これらのそれぞれが他とは無関係に問題ないことはわかっていますが、いくつかのテストを試して感触をつかもうとしています。この問題に遭遇しましたが、ソースを特定するのに苦労しています。

問題

1 つの Node.js プログラムに 1,000,000 レコードを挿入しようとしています。 絶対に這う。 実行時間は 20 分です。これは、Mac と CentOS のどちらでも発生しますが、両者の動作はわずかに異なります。それは最終的に完了します。

効果はスワッピングに似ていますが、そうではありません (メモリが 2 GB を超えることはありません)。MongoDB に対して開かれている接続は 3 つだけで、ほとんどの場合、データが挿入されていません。多くのコンテキスト切り替えを行っているようで、Node.js の CPU コアが使い果たされています。

効果は、このスレッドで言及されているものと似ています。

PHPを使用して同じことを試してみると、2〜3分で終了します。ドラマはありません。

なんで？

考えられる原因

私は現在、これが Node.js ソケットの問題、舞台裏で libev で起こっていること、またはその他の node-mongodb-native の問題であると考えています。私は完全に間違っているかもしれないので、ここで少しガイダンスを探しています。

他の Node.js MongoDB アダプターについては、Mongolian を試してみましたが、ドキュメントをバッチ挿入するためにドキュメントをキューに入れているように見え、最終的にメモリ不足になります。それで終わりです。(補足: 16 GB のボックス制限にさえ近づいていないため、これについても理由はわかりませんが、それ以上の調査は行っていません。)

実際、(クアッドコアマシン上で) 4 つのワーカーを使用してマスター/ワーカークラスターをテストしたところ、2 ～ 3 分で完了したことを言及しておく必要があります。

コード

これが私の Node.js CoffeeScript プログラムです。

mongodb = require "mongodb"
microtime = require "microtime"
crypto = require "crypto"

times = 1000000
server = new mongodb.Server "127.0.0.1", 27017
db = mongodb.Db "test", server
db.open (error, client) ->
  throw error if error?

  collection = mongodb.Collection client, "foo"

  for i in [0...times]
    console.log "Inserting #{i}..." if i % 100000 == 0

    hash = crypto.createHash "sha1"
    hash.update "" + microtime.now() + (Math.random() * 255 | 0)
    key = hash.digest "hex"

    doc =
      key: key,
      foo1: 1000,
      foo2: 1000,
      foo3: 1000,
      bar1: 2000,
      bar2: 2000,
      bar3: 2000,
      baz1: 3000,
      baz2: 3000,
      baz3: 3000

    collection.insert doc, safe: true, (error, response) ->
      console.log error.message if error

これとほぼ同等の PHP プログラムを次に示します。

<?php
$mongo = new Mongo();
$collection = $mongo->test->foo;

$times = 1000000;
for ($i = 0; $i < $times; $i++) {
    if ($i % 100000 == 0) {
        print "Inserting $i...\n";
    }

    $doc = array(
        "key" => sha1(microtime(true) + rand(0, 255)),
        "foo1" => 1000,
        "foo2" => 1000,
        "foo3" => 1000,
        "bar1" => 2000,
        "bar2" => 2000,
        "bar3" => 2000,
        "baz1" => 3000,
        "baz2" => 3000,
        "baz3" => 3000
    );
    try {
        $collection->insert($doc, array("safe" => true));
    } catch (MongoCursorException $e) {
        print $e->getMessage() . "\n";
    }
}

score 2 · Accepted Answer

V8 のデフォルトのヒープ制限に達しているようです。この制限の削除についてのブログ投稿を書きました。

ガベージコレクターは、1.4GB の制限を下回るまで常に実行されるため、おそらくおかしくなり、CPU を食い尽くしています。

score 1 · Accepted Answer

db.open コールバック関数の最後で明示的に値を返すとどうなりますか? 生成された JavaScript コードは、collection.insert のすべての戻り値を大きな「_results」配列にプッシュしているため、ますます遅くなると思います。

db.open(function(error, client) {
  var collection, doc, hash, i, key, _i, _results;
  if (error != null) {
    throw error;
  }
  collection = mongodb.Collection(client, "foo");
  _results = [];
  for (i = _i = 0; 0 <= times ? _i < times : _i > times; i = 0 <= times ? ++_i : --_i) {
    ...
    _results.push(collection.insert(doc, {
      safe: true
    }, function(error, response) {
      if (error) {
        return console.log(error.message);
      }
    }));
  }
  return _results;
});

これを coffeescript の最後に追加してみてください:

    collection.insert doc, safe: true, (error, response) ->
      console.log error.message if error

  return

*更新: *それで、私は実際にあなたのプログラムを実行しようとしましたが、さらにいくつかの問題に気付きました:

最大の問題は、100 万回の挿入を同期的に生成しようとしていることです。これにより、RAM が実際に消費され、最終的には挿入が停止します (少なくとも、私にとってはそうでした)。私は800MBのRAMかそこらでそれを殺しました。

collection.insert() を呼び出す方法を変更して、非同期で動作するようにする必要があります。

わかりやすくするためにいくつかの機能を分割して、次のように書き直しました。

mongodb = require "mongodb"
microtime = require "microtime"
crypto = require "crypto"

gen  = () ->
  hash = crypto.createHash "sha1"
  hash.update "" + microtime.now() + (Math.random() * 255 | 0)
  key = hash.digest "hex"

  key: key,
  foo1: 1000,
  foo2: 1000,
  foo3: 1000,
  bar1: 2000,
  bar2: 2000,
  bar3: 2000,
  baz1: 3000,
  baz2: 3000,
  baz3: 3000

times = 1000000
i = times

insertDocs = (collection) ->
  collection.insert gen(), {safe:true}, () ->
    console.log "Inserting #{times-i}..." if i % 100000 == 0
    if --i > 0
      insertDocs(collection)
    else
      process.exit 0
  return

server = new mongodb.Server "127.0.0.1", 27017
db = mongodb.Db "test", server
db.open (error, db) ->
  throw error if error?
  db.collection "foo", (err, collection) ->
    insertDocs(collection)
    return
  return

〜3分で終了しました：

wfreeman$ time coffee mongotest.coffee
Inserting 0...
Inserting 100000...
Inserting 200000...
Inserting 300000...
Inserting 400000...
Inserting 500000...
Inserting 600000...
Inserting 700000...
Inserting 800000...
Inserting 900000...

real    3m31.991s
user    1m55.211s
sys 0m23.420s

また、100MB 未満の RAM、ノードで 70% の CPU、mongod で 40% の CPU を使用するという副次的な利点もあります (2 コアボックスでは、CPU を使い果たしていないように見えます)。

php - node-mongodb-native の挿入パフォーマンス

2 に答える 2

Related

Reference