-
Notifications
You must be signed in to change notification settings - Fork 70
/
Copy pathcas.lua
92 lines (75 loc) · 2.54 KB
/
cas.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
-- Check And Set
-- Check if the item is already present in one of the layers and
-- only add the item if it wasn't.
-- Returns 1 if the item was already present.
--
-- If only this script is used to add items to the filter the :count
-- key will accurately indicate the number of unique items added to
-- the filter.
local entries = ARGV[2]
local precision = ARGV[3]
local hash = redis.sha1hex(ARGV[4])
local countkey = ARGV[1] .. ':count'
local count = redis.call('GET', countkey)
if not count then
count = 1
else
count = count + 1
end
local factor = math.ceil((entries + count) / entries)
-- 0.69314718055995 = ln(2)
local index = math.ceil(math.log(factor) / 0.69314718055995)
local scale = math.pow(2, index - 1) * entries
-- This uses a variation on:
-- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
-- https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
local h = { }
h[0] = tonumber(string.sub(hash, 1 , 8 ), 16)
h[1] = tonumber(string.sub(hash, 9 , 16), 16)
h[2] = tonumber(string.sub(hash, 17, 24), 16)
h[3] = tonumber(string.sub(hash, 25, 32), 16)
-- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
-- Combined with: http://www.sciencedirect.com/science/article/pii/S0020019006003127
-- 0.4804530139182 = ln(2)^2
local maxbits = math.floor((scale * math.log(precision * math.pow(0.5, index))) / -0.4804530139182)
-- 0.69314718055995 = ln(2)
local maxk = math.floor(0.69314718055995 * maxbits / scale)
local b = { }
for i=1, maxk do
table.insert(b, h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)])
end
-- Only do this if we have data already.
if index > 1 then
-- The last fiter will be handled below.
for n=1, index-1 do
local key = ARGV[1] .. ':' .. n
local scale = math.pow(2, n - 1) * entries
-- 0.4804530139182 = ln(2)^2
local bits = math.floor((scale * math.log(precision * math.pow(0.5, n))) / -0.4804530139182)
-- 0.69314718055995 = ln(2)
local k = math.floor(0.69314718055995 * bits / scale)
local found = true
for i=1, k do
if redis.call('GETBIT', key, b[i] % bits) == 0 then
found = false
break
end
end
if found then
return 1
end
end
end
-- For the last filter we do a SETBIT where we check the result value.
local key = ARGV[1] .. ':' .. index
local found = 1
for i=1, maxk do
if redis.call('SETBIT', key, b[i] % maxbits, 1) == 0 then
found = 0
end
end
if found == 0 then
-- INCR is a little bit faster than SET.
redis.call('INCR', countkey)
end
return found