mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-23 00:28:26 -05:00
69 lines
1.9 KiB
Ruby
69 lines
1.9 KiB
Ruby
|
# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
|
||
|
# BSD license, See the COPYING file for more information.
|
||
|
#
|
||
|
# This program is suited to output average and maximum errors of
|
||
|
# the Redis HyperLogLog implementation in a format suitable to print
|
||
|
# graphs using gnuplot.
|
||
|
|
||
|
require 'rubygems'
|
||
|
require 'redis'
|
||
|
require 'digest/sha1'
|
||
|
|
||
|
# Generate an array of [cardinality,relative_error] pairs
|
||
|
# in the 0 - max range with step of 1000*step.
|
||
|
#
|
||
|
# 'r' is the Redis object used to perform the queries.
|
||
|
# 'seed' must be different every time you want a test performed
|
||
|
# with a different set. The function guarantees that if 'seed' is the
|
||
|
# same, exactly the same dataset is used, and when it is different,
|
||
|
# a totally unrelated different data set is used (without any common
|
||
|
# element in practice).
|
||
|
def run_experiment(r,seed,max,step)
|
||
|
r.del('hll')
|
||
|
i = 0
|
||
|
samples = []
|
||
|
while i < max do
|
||
|
step.times {
|
||
|
elements = []
|
||
|
1000.times {
|
||
|
ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
|
||
|
elements << ele
|
||
|
i += 1
|
||
|
}
|
||
|
r.hlladd('hll',*elements)
|
||
|
}
|
||
|
approx = r.hllcount('hll')
|
||
|
err = approx-i
|
||
|
rel_err = 100.to_f*err/i
|
||
|
samples << [i,rel_err]
|
||
|
end
|
||
|
samples
|
||
|
end
|
||
|
|
||
|
def filter_samples(numsets,filter)
|
||
|
r = Redis.new
|
||
|
dataset = {}
|
||
|
(0...numsets).each{|i|
|
||
|
dataset[i] = run_experiment(r,i,100000,1)
|
||
|
}
|
||
|
dataset[0].each_with_index{|ele,index|
|
||
|
card,err=ele
|
||
|
if filter == :max
|
||
|
(1...numsets).each{|i|
|
||
|
err = dataset[i][index][1] if err < dataset[i][index][1]
|
||
|
}
|
||
|
elsif filter == :avg
|
||
|
(1...numsets).each{|i|
|
||
|
err += dataset[i][index][1]
|
||
|
}
|
||
|
err /= numsets
|
||
|
else
|
||
|
raise "Unknown filter #{filter}"
|
||
|
end
|
||
|
puts "#{card} #{err}"
|
||
|
}
|
||
|
end
|
||
|
|
||
|
filter_samples(100,:max)
|
||
|
#filter_samples(100,:avg)
|