ruby - 最も一般的なハッシュ値を見つける

Question

次のハッシュがあります。h = Hash["a","foo", "b","bar", "c","foo"] 最も一般的な値を返したいと思います。この場合はfoo. これを行う最も効率的な方法は何ですか?

この質問に似ていますが、ハッシュに適応しています。

score 4 · Accepted Answer

値を配列として取得し、リンクしたソリューションにプラグインするだけです。

h.values.group_by { |e| e }.values.max_by(&:size).first
#=> foo

score 3 · Accepted Answer

できるよ：

h = Hash["a","foo", "b","bar", "c","foo", "d", "bar", 'e', 'foobar']
p h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
# >> "foo"

UPDATE（私の最初の解決策よりも遅い）

h = Hash["a","foo", "b","bar", "c","foo"]
h.group_by { |_,v| v }.max_by{|_,v| v.size}.first
# >> "foo"

基準

require 'benchmark'

def seanny123(h)
  h.values.group_by { |e| e }.values.max_by(&:size).first
end

def stefan(h)
  frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
  value, count = frequencies.max_by { |k, v| v }
  value
end

def yevgeniy_anfilofyev(h)
  h.group_by{|(_,v)| v }.sort_by{|(_,v)| v.size }[-1][0]
end

def acts_as_geek(h)
  v = h.values
  max = v.map {|i| v.count(i)}.max
  v.select {|i| v.count(i) == max}.uniq
end

def squiguy(h)
  v = h.values
  v.reduce do |memo, val|
    v.count(memo) > v.count(val) ? memo : val
  end
end

def babai1(h)
  h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end

def babai2(h)
  h.group_by { |_,v| v }.max_by{|_,v| v.size}.first
end


def benchmark(h,n)
  Benchmark.bm(20) do |x|
    x.report("Seanny123")           { n.times { seanny123(h) } }
    x.report("Stefan")              { n.times { stefan(h) } }
    x.report("Yevgeniy Anfilofyev") { n.times { yevgeniy_anfilofyev(h) } }
    x.report("acts_as_geek")        { n.times { acts_as_geek(h) } }
    x.report("squiguy")             { n.times { squiguy(h) } }
    x.report("Babai1")               { n.times { babai1(h) } }
    x.report("Babai2")               { n.times { babai2(h) } }
  end
end

n = 10
h = {}
1000.times do |i|
  h["a#{i}"] = "foo"
  h["b#{i}"] = "bar"
  h["c#{i}"] = "foo"
end
benchmark(h, n)

結果：-

                           user     system      total        real
Seanny123              0.020000   0.000000   0.020000 (  0.015550)
Stefan                 0.040000   0.000000   0.040000 (  0.044666)
Yevgeniy Anfilofyev    0.020000   0.000000   0.020000 (  0.023162)
acts_as_geek          16.160000   0.000000  16.160000 ( 16.223582)
squiguy               15.740000   0.000000  15.740000 ( 15.768917)
Babai1                 0.020000   0.000000   0.020000 (  0.015430)
Babai2                 0.020000   0.000000   0.020000 (  0.025711)

score 1 · Accepted Answer

次の方法で周波数を計算できますEnumerable#inject。

frequencies = h.inject(Hash.new(0)) { |h, (k,v)| h[v] += 1 ; h }
#=> {"foo"=>2, "bar"=>1}

またはEnumerable#each_with_object:

frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
#=> {"foo"=>2, "bar"=>1}

そして最大値Enumerable#max_by:

value, count = frequencies.max_by { |k, v| v }
#=> ["foo", 2]

value
#=> "foo"

score 1 · Accepted Answer

ベンチマーク

小さなハッシュの場合:

n = 100000
h = Hash["a","foo", "b","bar", "c","foo"]
benchmark(h, n)

結果：

                           user     system      total        real
Seanny123              0.220000   0.000000   0.220000 (  0.222342)
Stefan                 0.260000   0.000000   0.260000 (  0.263583)
Yevgeniy Anfilofyev    0.350000   0.000000   0.350000 (  0.341685)
acts_as_geek           0.300000   0.000000   0.300000 (  0.306601)
squiguy                0.140000   0.000000   0.140000 (  0.139141)
Babai                  0.220000   0.000000   0.220000 (  0.218616)

大きなハッシュの場合:

n = 10
h = {}
1000.times do |i|
  h["a#{i}"] = "foo"
  h["b#{i}"] = "bar"
  h["c#{i}"] = "foo"
end
benchmark(h, n)

結果：

                           user     system      total        real
Seanny123              0.060000   0.000000   0.060000 (  0.059068)
Stefan                 0.100000   0.000000   0.100000 (  0.100760)
Yevgeniy Anfilofyev    0.080000   0.000000   0.080000 (  0.080988)
acts_as_geek          97.020000   0.020000  97.040000 ( 97.072220)
squiguy               97.480000   0.020000  97.500000 ( 97.535130)
Babai                  0.050000   0.000000   0.050000 (  0.058653)

ベンチマークコード:

require 'benchmark'

def seanny123(h)
  h.values.group_by { |e| e }.values.max_by(&:size).first
end

def stefan(h)
  frequencies = h.each_with_object(Hash.new(0)) { |(k,v), h| h[v] += 1 }
  value, count = frequencies.max_by { |k, v| v }
  value
end

def yevgeniy_anfilofyev(h)
  h.group_by{|(_,v)| v }.sort_by{|(_,v)| v.size }[-1][0]
end

def acts_as_geek(h)
  v = h.values
  max = v.map {|i| v.count(i)}.max
  v.select {|i| v.count(i) == max}.uniq
end

def squiguy(h)
  v = h.values
  v.reduce do |memo, val|
    v.count(memo) > v.count(val) ? memo : val
  end
end

def babai(h)
  h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
end


def benchmark(h,n)
  Benchmark.bm(20) do |x|
    x.report("Seanny123")           { n.times { seanny123(h) } }
    x.report("Stefan")              { n.times { stefan(h) } }
    x.report("Yevgeniy Anfilofyev") { n.times { yevgeniy_anfilofyev(h) } }
    x.report("acts_as_geek")        { n.times { acts_as_geek(h) } }
    x.report("squiguy")             { n.times { squiguy(h) } }
    x.report("Babai")               { n.times { babai(h) } }
  end
end

score 0 · Accepted Answer

あり、group_byなしvalues、ありsort_by:

h.group_by{|(_,v)| v }
 .sort_by{|(_,v)| v.size }[-1][0]

score 0 · Accepted Answer

更新:私のソリューションを使用しないでください。私のコンピューターは、それを計算するために多くのクロックサイクルを使用する必要がありました。ここで機能的なアプローチが非常に遅いとは知りませんでした。

を使用してはEnumerable#reduceどうですか？

h = Hash["a","foo", "b","bar", "c","foo"]
v = h.values
most = v.reduce do |memo, val|
         v.count(memo) > v.count(val) ? memo : val
       end
p most

注意点として、2 つ以上がハッシュで同じ「最大」カウント値を共有している場合、これは 1 つの値のみを返します。すべての「最大」値を気にする場合、これは使用するソリューションではありません。

これがベンチマークです。

#!/usr/bin/env ruby

require 'benchmark'

MULT = 10
arr = []
letters = ("a".."z").to_a
10000.times do
  arr << letters.sample
end
10000.times do |i|
  h[i] = arr[i]
end

Benchmark.bm do |rep|
  rep.report("Seanny123") {
    MULT.times do
      h.values.group_by { |e| e }.values.max_by(&:size).first
    end
  }

  rep.report("squiguy") {
    MULT.times do
      v = h.values
      most = v.reduce do |memo, val|
        v.count(memo) > v.count(val) ? memo : val
      end
    end
  }

  rep.report("acts_as_geek") {
    MULT.times do
      v = h.values
      max = v.map {|i| v.count(i)}.max
      v.select {|i| v.count(i) == max}.uniq
    end
  }

  rep.report("Yevgeniy Anfilofyev") {
    MULT.times do
      h.group_by{|(_,v)| v }
       .sort_by{|(_,v)| v.size }[-1][0]
    end
  }

  rep.report("Stefan") {
    MULT.times do
      frequencies = h.inject(Hash.new(0)) { |h, (k,v)| h[v] += 1 ; h }
      value, count = frequencies.max_by { |k, v| v }
    end
  }

  rep.report("Babai") {
    MULT.times do
      h.values.group_by { |e| e }.max_by{|_,v| v.size}.first
    end
  }

end

ruby - 最も一般的なハッシュ値を見つける

6 に答える 6

ベンチマーク

Related

Reference