Working !

This commit is contained in:
Phyks 2014-10-28 20:08:47 +01:00
parent 1f63d1788c
commit acc396cdfe
4 changed files with 71 additions and 38 deletions

Binary file not shown.

View File

@ -19,6 +19,7 @@ This script has been written by Phyks and is in the public domain (or whatever
is closer to public domain in your country). is closer to public domain in your country).
""" """
import ctypes
import math import math
try: try:
@ -55,7 +56,7 @@ class BloomFilter():
self.m = m self.m = m
self.k = k self.k = k
kbytes = 1 << math.ceil(math.log(math.ceil(math.log(m, 2) / 8), 2)) kbytes = ctypes.c_int(1 << math.ceil(math.log(math.ceil(math.log(m, 2) / 8), 2))).value
self.buckets = np.zeros(n, dtype=np.int32) self.buckets = np.zeros(n, dtype=np.int32)
if kbytes == 1: if kbytes == 1:
loc_type = np.uint8 loc_type = np.uint8
@ -65,34 +66,44 @@ class BloomFilter():
loc_type = np.int32 loc_type = np.int32
self._locations = np.zeros(k, dtype=loc_type) self._locations = np.zeros(k, dtype=loc_type)
def mod(self, a, b):
"""
Tweak the % operator so that it behaves like in C and in JS.
"""
if a > 0:
return a % b
else:
return - (abs(a) % b)
def locations(self, v): def locations(self, v):
r = self._locations r = self._locations
a = self.fnv_1a(v) a = self.fnv_1a(v)
b = self.fnv_1a_b(a) b = self.fnv_1a_b(a)
print(b)
i = 0 i = 0
x = a % self.m x = self.mod(a, self.m)
while i < self.k: while i < self.k:
r[i] = (x + self.m) if x < 0 else x r[i] = (x + self.m) if x < 0 else x
x = (x + b) % self.m x = self.mod(x + b, self.m)
i += 1 i += 1
return r return r
def add(self, v): def add(self, v):
l = self.locations(v + "") l = self.locations(str(v))
i = 0 i = 0
buckets = self.buckets buckets = self.buckets
while i < self.k: while i < self.k:
buckets[math.floor(l[i] / 32)] |= 1 << int(l[i] % 32) index = math.floor(l[i] / 32)
buckets[index] |= ctypes.c_int(1 << self.mod(l[i], 32)).value
buckets[index] = ctypes.c_int(buckets[index]).value
i += 1 i += 1
def test(self, v): def test(self, v):
l = self.locations(v + "") l = self.locations(str(v))
i = 0 i = 0
buckets = self.buckets buckets = self.buckets
while i < self.k: while i < self.k:
b = l[i] b = l[i]
if buckets[math.floor(b / 32)] & (1 << int(b % 32)) == 0: if ctypes.c_int(buckets[math.floor(b / 32)] & ctypes.c_int(1 << (self.mod(b, 32))).value).value == 0:
return False return False
i += 1 i += 1
return True return True
@ -111,51 +122,74 @@ class BloomFilter():
""" """
http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
""" """
v -= (v >> 1) & 0x55555555 v -= ctypes.c_int(ctypes.c_int(v >> 1).value & ctypes.c_int(0x55555555).value).value
v = (v & 0x33333333) + ((v >> 2) & 0x33333333) v = ctypes.c_int(v & 0x33333333).value + c_types.c_int(ctypes.c_int(v >> 2).value & 0x33333333).value
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24 return ctypes.c_int((ctypes.c_int((v + ctypes.c_int(v >> 4).value) & 0xF0F0F0F).value * 0x1010101) >> 24).value
def rshift(self,val, n):
"""
Implements the >>> JS operator.
From https://stackoverflow.com/questions/5832982/how-to-get-the-logical-right-binary-shift-in-python
"""
return (val % 0x100000000) >> n
def fnv_1a(self, v): def fnv_1a(self, v):
""" """
Fowler/Noll/Vo hashing. Fowler/Noll/Vo hashing.
Uses a lot of ctypes.c_int because int in JS are represented as 64 bits
floats. This representation is used for every arithmetical operations
but not for bitwise operations. In this case they are treated as 32 bits
integers !
""" """
n = len(v) n = len(v)
a = 2166136261 a = 2166136261
i = 0 i = 0
while i < n: while i < n:
c = ord(v[i]) c = ord(v[i])
d = c & 0xff000000 d = ctypes.c_int(c & 0xff000000).value
if d: if d:
a ^= d >> 24 a ^= ctypes.c_int(d >> 24).value
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24) a = ctypes.c_int(a).valu
d = c & 0xff0000 a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
d = ctypes.c_int(c & 0xff0000).value
if d: if d:
a ^= d >> 16 a ^= ctypes.c_int(d >> 16).value
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24) a = ctypes.c_int(a).valu
d = c & 0xff00 a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
d = ctypes.c_int(c & 0xff00).value
if d: if d:
a ^= d >> 8 a ^= ctypes.c_int(d >> 8).value
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24) a = ctypes.c_int(a).value
a ^= c & 0xff a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24) a ^= ctypes.c_int(c & 0xff).value
a = ctypes.c_int(a).value
a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
i += 1 i += 1
# From http://home.comcast.net/~bretm/hash/6.html # From http://home.comcast.net/~bretm/hash/6.html
a += a << 13 a += ctypes.c_int(a << 13).value
a ^= a >> 7 a ^= ctypes.c_int(self.rshift(a, 7)).value
a += a << 3 a = ctypes.c_int(a).value
a ^= a >> 17 a += ctypes.c_int(a << 3).value
a += a << 5 a ^= ctypes.c_int(self.rshift(a, 17)).value
return a & 0xffffffff a = ctypes.c_int(a).value
a += ctypes.c_int(a << 5).value
return ctypes.c_int(a & 0xffffffff).value
def fnv_1a_b(self, a): def fnv_1a_b(self, a):
""" """
One additional iteration of FNV, given a hash. One additional iteration of FNV, given a hash.
ctypes used, as explained above.
""" """
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24) a += ctypes.c_int(a << 1).value + ctypes.c_int(a << 4).value + ctypes.c_int(a << 7).value + ctypes.c_int(a << 8).value + ctypes.c_int(a << 24).value
a += a << 13 a += ctypes.c_int(a << 13).value
a ^= a >> 7 a ^= ctypes.c_int(self.rshift(a, 7)).value
a += a << 3 a = ctypes.c_int(a).value
a ^= a >> 17 a += ctypes.c_int(a << 3).value
a += a << 5 a ^= ctypes.c_int(self.rshift(a, 17)).value
print(a) a = ctypes.c_int(a).value
return a & 0xffffffff a += ctypes.c_int(a << 5).value
return ctypes.c_int(a & 0xffffffff).value

View File

@ -63,8 +63,6 @@ if __name__ == "__main__":
tmp_filter.add(word) tmp_filter.add(word)
filters.append(tmp_filter.buckets) filters.append(tmp_filter.buckets)
print(tmp_filter.buckets)
sys.exit()
# First Int32 is length # First Int32 is length
filters_to_write = struct.pack("<i", len(filters)) filters_to_write = struct.pack("<i", len(filters))

View File

@ -8,6 +8,7 @@
<h1>Bloom.JS demo</h1> <h1>Bloom.JS demo</h1>
<p>This page runs the <code>bloom.js</code> library unit-tests. Look at your console output for <code>assert</code> error and verbose debugging. <p>This page runs the <code>bloom.js</code> library unit-tests. Look at your console output for <code>assert</code> error and verbose debugging.
<script type="text/javascript" src="js/bloom.js"></script> <script type="text/javascript" src="js/bloom.js"></script>
<script type="text/javascript" src="js/test.js"></script>
<script type="text/javascript" src="js/test2.js"></script> <script type="text/javascript" src="js/test2.js"></script>
</body> </body>
</html> </html>