Working !
This commit is contained in:
parent
1f63d1788c
commit
acc396cdfe
BIN
data/filters
BIN
data/filters
Binary file not shown.
@ -19,6 +19,7 @@ This script has been written by Phyks and is in the public domain (or whatever
|
||||
is closer to public domain in your country).
|
||||
"""
|
||||
|
||||
import ctypes
|
||||
import math
|
||||
|
||||
try:
|
||||
@ -55,7 +56,7 @@ class BloomFilter():
|
||||
self.m = m
|
||||
self.k = k
|
||||
|
||||
kbytes = 1 << math.ceil(math.log(math.ceil(math.log(m, 2) / 8), 2))
|
||||
kbytes = ctypes.c_int(1 << math.ceil(math.log(math.ceil(math.log(m, 2) / 8), 2))).value
|
||||
self.buckets = np.zeros(n, dtype=np.int32)
|
||||
if kbytes == 1:
|
||||
loc_type = np.uint8
|
||||
@ -65,34 +66,44 @@ class BloomFilter():
|
||||
loc_type = np.int32
|
||||
self._locations = np.zeros(k, dtype=loc_type)
|
||||
|
||||
def mod(self, a, b):
|
||||
"""
|
||||
Tweak the % operator so that it behaves like in C and in JS.
|
||||
"""
|
||||
if a > 0:
|
||||
return a % b
|
||||
else:
|
||||
return - (abs(a) % b)
|
||||
|
||||
def locations(self, v):
|
||||
r = self._locations
|
||||
a = self.fnv_1a(v)
|
||||
b = self.fnv_1a_b(a)
|
||||
print(b)
|
||||
i = 0
|
||||
x = a % self.m
|
||||
x = self.mod(a, self.m)
|
||||
while i < self.k:
|
||||
r[i] = (x + self.m) if x < 0 else x
|
||||
x = (x + b) % self.m
|
||||
x = self.mod(x + b, self.m)
|
||||
i += 1
|
||||
return r
|
||||
|
||||
def add(self, v):
|
||||
l = self.locations(v + "")
|
||||
l = self.locations(str(v))
|
||||
i = 0
|
||||
buckets = self.buckets
|
||||
while i < self.k:
|
||||
buckets[math.floor(l[i] / 32)] |= 1 << int(l[i] % 32)
|
||||
index = math.floor(l[i] / 32)
|
||||
buckets[index] |= ctypes.c_int(1 << self.mod(l[i], 32)).value
|
||||
buckets[index] = ctypes.c_int(buckets[index]).value
|
||||
i += 1
|
||||
|
||||
def test(self, v):
|
||||
l = self.locations(v + "")
|
||||
l = self.locations(str(v))
|
||||
i = 0
|
||||
buckets = self.buckets
|
||||
while i < self.k:
|
||||
b = l[i]
|
||||
if buckets[math.floor(b / 32)] & (1 << int(b % 32)) == 0:
|
||||
if ctypes.c_int(buckets[math.floor(b / 32)] & ctypes.c_int(1 << (self.mod(b, 32))).value).value == 0:
|
||||
return False
|
||||
i += 1
|
||||
return True
|
||||
@ -111,51 +122,74 @@ class BloomFilter():
|
||||
"""
|
||||
http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
"""
|
||||
v -= (v >> 1) & 0x55555555
|
||||
v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
|
||||
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24
|
||||
v -= ctypes.c_int(ctypes.c_int(v >> 1).value & ctypes.c_int(0x55555555).value).value
|
||||
v = ctypes.c_int(v & 0x33333333).value + c_types.c_int(ctypes.c_int(v >> 2).value & 0x33333333).value
|
||||
return ctypes.c_int((ctypes.c_int((v + ctypes.c_int(v >> 4).value) & 0xF0F0F0F).value * 0x1010101) >> 24).value
|
||||
|
||||
|
||||
def rshift(self,val, n):
|
||||
"""
|
||||
Implements the >>> JS operator.
|
||||
|
||||
From https://stackoverflow.com/questions/5832982/how-to-get-the-logical-right-binary-shift-in-python
|
||||
"""
|
||||
return (val % 0x100000000) >> n
|
||||
|
||||
def fnv_1a(self, v):
|
||||
"""
|
||||
Fowler/Noll/Vo hashing.
|
||||
|
||||
Uses a lot of ctypes.c_int because int in JS are represented as 64 bits
|
||||
floats. This representation is used for every arithmetical operations
|
||||
but not for bitwise operations. In this case they are treated as 32 bits
|
||||
integers !
|
||||
"""
|
||||
n = len(v)
|
||||
a = 2166136261
|
||||
i = 0
|
||||
while i < n:
|
||||
c = ord(v[i])
|
||||
d = c & 0xff000000
|
||||
d = ctypes.c_int(c & 0xff000000).value
|
||||
if d:
|
||||
a ^= d >> 24
|
||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
||||
d = c & 0xff0000
|
||||
a ^= ctypes.c_int(d >> 24).value
|
||||
a = ctypes.c_int(a).valu
|
||||
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||
d = ctypes.c_int(c & 0xff0000).value
|
||||
if d:
|
||||
a ^= d >> 16
|
||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
||||
d = c & 0xff00
|
||||
a ^= ctypes.c_int(d >> 16).value
|
||||
a = ctypes.c_int(a).valu
|
||||
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||
d = ctypes.c_int(c & 0xff00).value
|
||||
if d:
|
||||
a ^= d >> 8
|
||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
||||
a ^= c & 0xff
|
||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
||||
a ^= ctypes.c_int(d >> 8).value
|
||||
a = ctypes.c_int(a).value
|
||||
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||
a ^= ctypes.c_int(c & 0xff).value
|
||||
a = ctypes.c_int(a).value
|
||||
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||
i += 1
|
||||
# From http://home.comcast.net/~bretm/hash/6.html
|
||||
a += a << 13
|
||||
a ^= a >> 7
|
||||
a += a << 3
|
||||
a ^= a >> 17
|
||||
a += a << 5
|
||||
return a & 0xffffffff
|
||||
a += ctypes.c_int(a << 13).value
|
||||
a ^= ctypes.c_int(self.rshift(a, 7)).value
|
||||
a = ctypes.c_int(a).value
|
||||
a += ctypes.c_int(a << 3).value
|
||||
a ^= ctypes.c_int(self.rshift(a, 17)).value
|
||||
a = ctypes.c_int(a).value
|
||||
a += ctypes.c_int(a << 5).value
|
||||
return ctypes.c_int(a & 0xffffffff).value
|
||||
|
||||
def fnv_1a_b(self, a):
|
||||
"""
|
||||
One additional iteration of FNV, given a hash.
|
||||
|
||||
ctypes used, as explained above.
|
||||
"""
|
||||
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)
|
||||
a += a << 13
|
||||
a ^= a >> 7
|
||||
a += a << 3
|
||||
a ^= a >> 17
|
||||
a += a << 5
|
||||
print(a)
|
||||
return a & 0xffffffff
|
||||
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
|
||||
a += ctypes.c_int(a << 13).value
|
||||
a ^= ctypes.c_int(self.rshift(a, 7)).value
|
||||
a = ctypes.c_int(a).value
|
||||
a += ctypes.c_int(a << 3).value
|
||||
a ^= ctypes.c_int(self.rshift(a, 17)).value
|
||||
a = ctypes.c_int(a).value
|
||||
a += ctypes.c_int(a << 5).value
|
||||
return ctypes.c_int(a & 0xffffffff).value
|
||||
|
@ -63,8 +63,6 @@ if __name__ == "__main__":
|
||||
tmp_filter.add(word)
|
||||
|
||||
filters.append(tmp_filter.buckets)
|
||||
print(tmp_filter.buckets)
|
||||
sys.exit()
|
||||
|
||||
# First Int32 is length
|
||||
filters_to_write = struct.pack("<i", len(filters))
|
||||
|
@ -8,6 +8,7 @@
|
||||
<h1>Bloom.JS demo</h1>
|
||||
<p>This page runs the <code>bloom.js</code> library unit-tests. Look at your console output for <code>assert</code> error and verbose debugging.
|
||||
<script type="text/javascript" src="js/bloom.js"></script>
|
||||
<script type="text/javascript" src="js/test.js"></script>
|
||||
<script type="text/javascript" src="js/test2.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
Loading…
Reference in New Issue
Block a user