Working !
This commit is contained in:
parent
1f63d1788c
commit
acc396cdfe
BIN
data/filters
BIN
data/filters
Binary file not shown.
@ -19,6 +19,7 @@ This script has been written by Phyks and is in the public domain (or whatever
|
|||||||
is closer to public domain in your country).
|
is closer to public domain in your country).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import ctypes
|
||||||
import math
|
import math
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -55,7 +56,7 @@ class BloomFilter():
|
|||||||
self.m = m
|
self.m = m
|
||||||
self.k = k
|
self.k = k
|
||||||
|
|
||||||
kbytes = 1 << math.ceil(math.log(math.ceil(math.log(m, 2) / 8), 2))
|
kbytes = ctypes.c_int(1 << math.ceil(math.log(math.ceil(math.log(m, 2) / 8), 2))).value
|
||||||
self.buckets = np.zeros(n, dtype=np.int32)
|
self.buckets = np.zeros(n, dtype=np.int32)
|
||||||
if kbytes == 1:
|
if kbytes == 1:
|
||||||
loc_type = np.uint8
|
loc_type = np.uint8
|
||||||
@ -65,34 +66,44 @@ class BloomFilter():
|
|||||||
loc_type = np.int32
|
loc_type = np.int32
|
||||||
self._locations = np.zeros(k, dtype=loc_type)
|
self._locations = np.zeros(k, dtype=loc_type)
|
||||||
|
|
||||||
|
def mod(self, a, b):
|
||||||
|
"""
|
||||||
|
Tweak the % operator so that it behaves like in C and in JS.
|
||||||
|
"""
|
||||||
|
if a > 0:
|
||||||
|
return a % b
|
||||||
|
else:
|
||||||
|
return - (abs(a) % b)
|
||||||
|
|
||||||
def locations(self, v):
|
def locations(self, v):
|
||||||
r = self._locations
|
r = self._locations
|
||||||
a = self.fnv_1a(v)
|
a = self.fnv_1a(v)
|
||||||
b = self.fnv_1a_b(a)
|
b = self.fnv_1a_b(a)
|
||||||
print(b)
|
|
||||||
i = 0
|
i = 0
|
||||||
x = a % self.m
|
x = self.mod(a, self.m)
|
||||||
while i < self.k:
|
while i < self.k:
|
||||||
r[i] = (x + self.m) if x < 0 else x
|
r[i] = (x + self.m) if x < 0 else x
|
||||||
x = (x + b) % self.m
|
x = self.mod(x + b, self.m)
|
||||||
i += 1
|
i += 1
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def add(self, v):
|
def add(self, v):
|
||||||
l = self.locations(v + "")
|
l = self.locations(str(v))
|
||||||
i = 0
|
i = 0
|
||||||
buckets = self.buckets
|
buckets = self.buckets
|
||||||
while i < self.k:
|
while i < self.k:
|
||||||
buckets[math.floor(l[i] / 32)] |= 1 << int(l[i] % 32)
|
index = math.floor(l[i] / 32)
|
||||||
|
buckets[index] |= ctypes.c_int(1 << self.mod(l[i], 32)).value
|
||||||
|
buckets[index] = ctypes.c_int(buckets[index]).value
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
def test(self, v):
|
def test(self, v):
|
||||||
l = self.locations(v + "")
|
l = self.locations(str(v))
|
||||||
i = 0
|
i = 0
|
||||||
buckets = self.buckets
|
buckets = self.buckets
|
||||||
while i < self.k:
|
while i < self.k:
|
||||||
b = l[i]
|
b = l[i]
|
||||||
if buckets[math.floor(b / 32)] & (1 << int(b % 32)) == 0:
|
if ctypes.c_int(buckets[math.floor(b / 32)] & ctypes.c_int(1 << (self.mod(b, 32))).value).value == 0:
|
||||||
return False
|
return False
|
||||||
i += 1
|
i += 1
|
||||||
return True
|
return True
|
||||||
@ -111,51 +122,74 @@ class BloomFilter():
|
|||||||
"""
|
"""
|
||||||
http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||||
"""
|
"""
|
||||||
v -= (v >> 1) & 0x55555555
|
v -= ctypes.c_int(ctypes.c_int(v >> 1).value & ctypes.c_int(0x55555555).value).value
|
||||||
v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
|
v = ctypes.c_int(v & 0x33333333).value + c_types.c_int(ctypes.c_int(v >> 2).value & 0x33333333).value
|
||||||
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24
|
return ctypes.c_int((ctypes.c_int((v + ctypes.c_int(v >> 4).value) & 0xF0F0F0F).value * 0x1010101) >> 24).value
|
||||||
|
|
||||||
|
|
||||||
|
def rshift(self,val, n):
|
||||||
|
"""
|
||||||
|
Implements the >>> JS operator.
|
||||||
|
|
||||||
|
From https://stackoverflow.com/questions/5832982/how-to-get-the-logical-right-binary-shift-in-python
|
||||||
|
"""
|
||||||
|
return (val % 0x100000000) >> n
|
||||||
|
|
||||||
def fnv_1a(self, v):
|
def fnv_1a(self, v):
|
||||||
"""
|
"""
|
||||||
Fowler/Noll/Vo hashing.
|
Fowler/Noll/Vo hashing.
|
||||||
|
|
||||||
|
Uses a lot of ctypes.c_int because int in JS are represented as 64 bits
|
||||||
|
floats. This representation is used for every arithmetical operations
|
||||||
|
but not for bitwise operations. In this case they are treated as 32 bits
|
||||||
|
integers !
|
||||||
"""
|
"""
|
||||||
n = len(v)
|
n = len(v)
|
||||||
a = 2166136261
|
a = 2166136261
|
||||||
i = 0
|
i = 0
|
||||||
while i < n:
|
while i < n:
|
||||||
c = ord(v[i])
|
c = ord(v[i])
|
||||||
d = c & 0xff000000
|
d = ctypes.c_int(c & 0xff000000).value
|
||||||
if d:
|
if d:
|
||||||
a ^= d >> 24
|
a ^= ctypes.c_int(d >> 24).value
|
||||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
a = ctypes.c_int(a).valu
|
||||||
d = c & 0xff0000
|
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||||
|
d = ctypes.c_int(c & 0xff0000).value
|
||||||
if d:
|
if d:
|
||||||
a ^= d >> 16
|
a ^= ctypes.c_int(d >> 16).value
|
||||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
a = ctypes.c_int(a).valu
|
||||||
d = c & 0xff00
|
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||||
|
d = ctypes.c_int(c & 0xff00).value
|
||||||
if d:
|
if d:
|
||||||
a ^= d >> 8
|
a ^= ctypes.c_int(d >> 8).value
|
||||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
a = ctypes.c_int(a).value
|
||||||
a ^= c & 0xff
|
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
a ^= ctypes.c_int(c & 0xff).value
|
||||||
|
a = ctypes.c_int(a).value
|
||||||
|
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||||
i += 1
|
i += 1
|
||||||
# From http://home.comcast.net/~bretm/hash/6.html
|
# From http://home.comcast.net/~bretm/hash/6.html
|
||||||
a += a << 13
|
a += ctypes.c_int(a << 13).value
|
||||||
a ^= a >> 7
|
a ^= ctypes.c_int(self.rshift(a, 7)).value
|
||||||
a += a << 3
|
a = ctypes.c_int(a).value
|
||||||
a ^= a >> 17
|
a += ctypes.c_int(a << 3).value
|
||||||
a += a << 5
|
a ^= ctypes.c_int(self.rshift(a, 17)).value
|
||||||
return a & 0xffffffff
|
a = ctypes.c_int(a).value
|
||||||
|
a += ctypes.c_int(a << 5).value
|
||||||
|
return ctypes.c_int(a & 0xffffffff).value
|
||||||
|
|
||||||
def fnv_1a_b(self, a):
|
def fnv_1a_b(self, a):
|
||||||
"""
|
"""
|
||||||
One additional iteration of FNV, given a hash.
|
One additional iteration of FNV, given a hash.
|
||||||
|
|
||||||
|
ctypes used, as explained above.
|
||||||
"""
|
"""
|
||||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||||
a += a << 13
|
a += ctypes.c_int(a << 13).value
|
||||||
a ^= a >> 7
|
a ^= ctypes.c_int(self.rshift(a, 7)).value
|
||||||
a += a << 3
|
a = ctypes.c_int(a).value
|
||||||
a ^= a >> 17
|
a += ctypes.c_int(a << 3).value
|
||||||
a += a << 5
|
a ^= ctypes.c_int(self.rshift(a, 17)).value
|
||||||
print(a)
|
a = ctypes.c_int(a).value
|
||||||
return a & 0xffffffff
|
a += ctypes.c_int(a << 5).value
|
||||||
|
return ctypes.c_int(a & 0xffffffff).value
|
||||||
|
@ -63,8 +63,6 @@ if __name__ == "__main__":
|
|||||||
tmp_filter.add(word)
|
tmp_filter.add(word)
|
||||||
|
|
||||||
filters.append(tmp_filter.buckets)
|
filters.append(tmp_filter.buckets)
|
||||||
print(tmp_filter.buckets)
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
# First Int32 is length
|
# First Int32 is length
|
||||||
filters_to_write = struct.pack("<i", len(filters))
|
filters_to_write = struct.pack("<i", len(filters))
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
<h1>Bloom.JS demo</h1>
|
<h1>Bloom.JS demo</h1>
|
||||||
<p>This page runs the <code>bloom.js</code> library unit-tests. Look at your console output for <code>assert</code> error and verbose debugging.
|
<p>This page runs the <code>bloom.js</code> library unit-tests. Look at your console output for <code>assert</code> error and verbose debugging.
|
||||||
<script type="text/javascript" src="js/bloom.js"></script>
|
<script type="text/javascript" src="js/bloom.js"></script>
|
||||||
|
<script type="text/javascript" src="js/test.js"></script>
|
||||||
<script type="text/javascript" src="js/test2.js"></script>
|
<script type="text/javascript" src="js/test2.js"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
Loading…
Reference in New Issue
Block a user