c - Ruby C APIで2つのハッシュを効率的にマージするには?

Question

2 つのハッシュをマージする必要がある Ruby の C 拡張機能を作成していますが、Ruby 1.8.6 では rb_hash_merge() 関数は静的です。私は代わりに使用しようとしました:

rb_funcall(hash1, rb_intern("merge"), 1, hash2);

しかし、これは遅すぎるため、このアプリケーションではパフォーマンスが非常に重要です。

効率とスピードを念頭に置いてこのマージを実行する方法を知っている人はいますか?

(私は単に rb_hash_merge() のソースを見て複製しようとしましたが、他の静的関数でRIDDLEDになっていることに注意してください。それ自体はさらに多くの静的関数でいっぱいになっているため、解きほぐすことはほとんど不可能に思えます...別の方法が必要です)

score 7 · Accepted Answer

わかりました。公開された API 内で最適化することはできないようです。

テストコード:

#extconf.rb
require 'mkmf'
dir_config("hello")
create_makefile("hello")


// hello.c
#include "ruby.h"

static VALUE rb_mHello;
static VALUE rb_cMyCalc;

static void calc_mark(void *f) { }
static void calc_free(void *f) { }
static VALUE calc_alloc(VALUE klass) { return Data_Wrap_Struct(klass, calc_mark, calc_free, NULL); }

static VALUE calc_init(VALUE obj) { return Qnil; }

static VALUE calc_merge(VALUE obj, VALUE h1, VALUE h2) {
  return rb_funcall(h1, rb_intern("merge"), 1, h2);
}

static VALUE
calc_merge2(VALUE obj, VALUE h1, VALUE h2)
{
  VALUE h3 = rb_hash_new();
  VALUE keys;
  VALUE akey;
  keys = rb_funcall(h1, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h3, akey, rb_hash_aref(h1, akey));
  }
  keys = rb_funcall(h2, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h3, akey, rb_hash_aref(h2, akey));
  }
  return h3;
}

static VALUE
calc_merge3(VALUE obj, VALUE h1, VALUE h2)
{
  VALUE keys;
  VALUE akey;
  keys = rb_funcall(h1, rb_intern("keys"), 0);
  while (akey = rb_each(keys)) {
    rb_hash_aset(h2, akey, rb_hash_aref(h1, akey));
  }
  return h2;
}

void
Init_hello()
{
  rb_mHello = rb_define_module("Hello");
  rb_cMyCalc = rb_define_class_under(rb_mHello, "Calculator", rb_cObject);
  rb_define_alloc_func(rb_cMyCalc, calc_alloc);
  rb_define_method(rb_cMyCalc, "initialize", calc_init, 0);
  rb_define_method(rb_cMyCalc, "merge", calc_merge, 2);
  rb_define_method(rb_cMyCalc, "merge2", calc_merge, 2);
  rb_define_method(rb_cMyCalc, "merge3", calc_merge, 2);
}


# test.rb
require "hello"

h1 = Hash.new()
h2 = Hash.new()

1.upto(100000) { |x| h1[x] = x+1; }
1.upto(100000) { |x| h2["#{x}-12"] = x+1; }

c = Hello::Calculator.new()

puts c.merge(h1, h2).keys.length if ARGV[0] == "1"
puts c.merge2(h1, h2).keys.length if ARGV[0] == "2"
puts c.merge3(h1, h2).keys.length if ARGV[0] == "3"

テスト結果は次のとおりです。

$ time ruby test.rb

real    0m1.021s
user    0m0.940s
sys     0m0.080s
$ time ruby test.rb 1
200000

real    0m1.224s
user    0m1.148s
sys     0m0.076s
$ time ruby test.rb 2
200000

real    0m1.219s
user    0m1.132s
sys     0m0.084s
$ time ruby test.rb 3
200000

real    0m1.220s
user    0m1.128s
sys     0m0.092s

したがって、0.2 秒の操作で最大 ~0.004 秒削れる可能性があるようです。

値を設定する以外におそらくそれほど多くはないことを考えると、さらに最適化する余地はあまりないかもしれません。ruby のソース自体をハックしてみるのもいいかもしれませんが、その時点で「拡張機能」を実際に開発するのではなく、言語を変更するので、おそらくうまくいかないでしょう。

ハッシュの結合が C の部分で何度も行う必要がある場合、おそらく内部データ構造を使用し、最終パスでそれらを Ruby ハッシュにエクスポートするだけで、最適化する唯一の方法になります。

psこの優れたチュートリアルから借用したコードの初期スケルトン

c - Ruby C APIで2つのハッシュを効率的にマージするには?

1 に答える 1

Related

Reference