Refactor JS view, working JS code for Bloom filters
This commit is contained in:
parent
a1fea61f4f
commit
775a8ab930
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
*/__pycache__
|
*/__pycache__
|
||||||
*/search_index*
|
*/search_index*
|
||||||
*/pages_index.json
|
*/pages_index.json
|
||||||
|
*.pyc
|
||||||
|
@ -14,5 +14,6 @@
|
|||||||
</form>
|
</form>
|
||||||
<div id="results"></div>
|
<div id="results"></div>
|
||||||
<script type="text/javascript" src="js/bloom.js"></script>
|
<script type="text/javascript" src="js/bloom.js"></script>
|
||||||
|
<script type="text/javascript" src="js/app.js"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
241
js/bloom.js
241
js/bloom.js
@ -1,104 +1,153 @@
|
|||||||
var loading = false;
|
/*
|
||||||
var usable = false;
|
* BloomFilters as implemented by https://github.com/jasondavies/bloomfilter.js
|
||||||
var search_index = false;
|
*
|
||||||
|
* Original license kept
|
||||||
|
*
|
||||||
|
* Modified by Phyks to be constructed using the (capacity, error_rate) syntax rather
|
||||||
|
* than the explicit (number of bits, number of hash functions) syntax.
|
||||||
|
*/
|
||||||
|
|
||||||
// Check endianness to serve right file
|
(function(exports) {
|
||||||
function checkEndian(){
|
exports.BloomFilter = BloomFilter;
|
||||||
var a = new ArrayBuffer(4);
|
exports.fnv_1a = fnv_1a;
|
||||||
var b = new Uint8Array(a);
|
exports.fnv_1a_b = fnv_1a_b;
|
||||||
var c = new Uint32Array(a);
|
var typedArrays = typeof ArrayBuffer !== "undefined";
|
||||||
b[0] = 0xa1;
|
|
||||||
b[1] = 0xb2;
|
|
||||||
b[2] = 0xc3;
|
|
||||||
b[3] = 0xd4;
|
|
||||||
if(c[0] == 0xd4c3b2a1) return "little";
|
|
||||||
if(c[0] == 0xa1b2c3d4) return "big";
|
|
||||||
else return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
document.getElementById('search_form').addEventListener('submit', function(e) {
|
// Creates a new bloom filter given its minimal capacity and an error_rate.
|
||||||
e.preventDefault();
|
// Calculation taken from https://en.wikipedia.org/wiki/Bloom_filter.
|
||||||
});
|
// If *capacity* is an array-like object, with a length
|
||||||
|
// property, then the bloom filter is loaded with data from the array, where
|
||||||
document.getElementById('search').addEventListener('click', function() {
|
// each element is a 32-bit integer.
|
||||||
if(this.value == "Search for articles...") {
|
// *error_rate* is an estimation of the required error_rate.
|
||||||
this.value = "";
|
function BloomFilter(capacity, error_rate) {
|
||||||
|
// *m* is the number of bits. Note that *m* is rounded up to
|
||||||
|
// the nearest multiple of 32. *k* specifies the number of hashing functions.
|
||||||
|
var a, i = -1;
|
||||||
|
// Number of slices, k
|
||||||
|
var k = Math.ceil(- Math.log(error_rate) / Math.log(2));
|
||||||
|
// Total number of bits, m
|
||||||
|
// Size of the UInt32 table, n
|
||||||
|
var m, n;
|
||||||
|
if (typeof capacity !== "number") {
|
||||||
|
a = capacity;
|
||||||
|
// Total number of bits, m
|
||||||
|
m = a.length * 32;
|
||||||
|
// Size of the UInt32 table, n
|
||||||
|
n = a.length;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Total number of bits, m
|
||||||
|
m = Math.ceil(capacity * Math.abs(Math.log(error_rate)) / (k * Math.pow(Math.log(2), 2))) * k;
|
||||||
|
// Size of the UInt32 table, n
|
||||||
|
n = Math.ceil(m / 32);
|
||||||
|
// Round total number of bits to closest multiple of 32
|
||||||
|
m = n * 32;
|
||||||
|
}
|
||||||
|
this.m = m;
|
||||||
|
this.k = k;
|
||||||
|
if (typedArrays) {
|
||||||
|
var kbytes = 1 << Math.ceil(Math.log(Math.ceil(Math.log(m) / Math.LN2 / 8)) / Math.LN2),
|
||||||
|
array = kbytes === 1 ? Uint8Array : kbytes === 2 ? Uint16Array : Uint32Array,
|
||||||
|
kbuffer = new ArrayBuffer(kbytes * k),
|
||||||
|
buckets = this.buckets = new Int32Array(n);
|
||||||
|
if (a) while (++i < n) buckets[i] = a[i];
|
||||||
|
this._locations = new array(kbuffer);
|
||||||
|
} else {
|
||||||
|
var buckets = this.buckets = [];
|
||||||
|
if (a) while (++i < n) buckets[i] = a[i];
|
||||||
|
else while (++i < n) buckets[i] = 0;
|
||||||
|
this._locations = [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// See http://willwhim.wordpress.com/2011/09/03/producing-n-hash-functions-by-hashing-only-once/
|
||||||
if(search_index === false) {
|
BloomFilter.prototype.locations = function(v) {
|
||||||
loading = true;
|
var k = this.k,
|
||||||
document.getElementById("loading").innerHTML = "Loading index file...";
|
m = this.m,
|
||||||
|
r = this._locations,
|
||||||
var oReq = new XMLHttpRequest();
|
a = fnv_1a(v),
|
||||||
oReq.open("GET", "data/search_index_"+checkEndian(), true);
|
b = fnv_1a_b(a),
|
||||||
oReq.responseType = "arraybuffer";
|
i = -1,
|
||||||
|
x = a % m;
|
||||||
oReq.onload = function (oEvent) {
|
while (++i < k) {
|
||||||
var arrayBuffer = oReq.response; // Note: not oReq.responseText
|
r[i] = x < 0 ? (x + m) : x;
|
||||||
|
x = (x + b) % m;
|
||||||
if (arrayBuffer) {
|
}
|
||||||
var tmp = new Uint8Array(arrayBuffer);
|
return r;
|
||||||
var nb_filters = tmp[0]*256+tmp[1];
|
};
|
||||||
|
BloomFilter.prototype.add = function(v) {
|
||||||
search_index = new Array(nb_filters);
|
var l = this.locations(v + ""),
|
||||||
|
i = -1,
|
||||||
// For each of the bitarrays, parse it
|
k = this.k,
|
||||||
var offset = 2;
|
buckets = this.buckets;
|
||||||
for (var i = 0; i < nb_filters; i++) {
|
while (++i < k) buckets[Math.floor(l[i] / 32)] |= 1 << (l[i] % 32);
|
||||||
// Size of the filter
|
};
|
||||||
var length = tmp[offset]*256+tmp[offset+1]; // length is a number of bytes
|
BloomFilter.prototype.test = function(v) {
|
||||||
|
var l = this.locations(v + ""),
|
||||||
var length_offset = Math.ceil(length/8);
|
i = -1,
|
||||||
|
k = this.k,
|
||||||
search_index[i] = new Uint8Array(length_offset);
|
b,
|
||||||
|
buckets = this.buckets;
|
||||||
// Parse filter
|
while (++i < k) {
|
||||||
for (var j = 2; j < 2 + length_offset; j++) {
|
b = l[i];
|
||||||
search_index[i][j] = tmp[offset + j];
|
if ((buckets[Math.floor(b / 32)] & (1 << (b % 32))) === 0) {
|
||||||
}
|
return false;
|
||||||
offset += 2 + length_offset;
|
|
||||||
}
|
|
||||||
console.log(search_index);
|
|
||||||
document.getElementById("loading").innerHTML = "";
|
|
||||||
loading = false;
|
|
||||||
usable = true;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
document.getElementById("loading").innerHTML = "Error while loading search index.";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
oReq.send(null);
|
|
||||||
|
|
||||||
var oReq2 = new XMLHttpRequest();
|
|
||||||
oReq2.open("GET", "data/pages_index.json", true);
|
|
||||||
oReq2.onreadystatechange = function() {
|
|
||||||
if (this.readyState == 4) {
|
|
||||||
if (this.status == 200) {
|
|
||||||
pages_index = window.JSON ? JSON.parse(this.responseText) : eval("("+this.responseText+")");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
document.getElementById("loading").innerHTML = "Error while loading pages index : HTTP error " + this.status + " " + this.statusText;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
oReq2.send();
|
return true;
|
||||||
|
};
|
||||||
|
// Estimated cardinality.
|
||||||
|
BloomFilter.prototype.size = function() {
|
||||||
|
var buckets = this.buckets,
|
||||||
|
bits = 0;
|
||||||
|
for (var i = 0, n = buckets.length; i < n; ++i) bits += popcnt(buckets[i]);
|
||||||
|
return -this.m * Math.log(1 - bits / this.m) / this.k;
|
||||||
|
};
|
||||||
|
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||||
|
function popcnt(v) {
|
||||||
|
v -= (v >> 1) & 0x55555555;
|
||||||
|
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
|
||||||
|
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
|
||||||
}
|
}
|
||||||
});
|
// Fowler/Noll/Vo hashing.
|
||||||
|
function fnv_1a(v) {
|
||||||
/*function callback_change() {
|
var n = v.length,
|
||||||
if(!usable) {
|
a = 2166136261,
|
||||||
return;
|
c,
|
||||||
}
|
d,
|
||||||
var search = document.getElementById("search").value;
|
i = -1;
|
||||||
document.getElementById("results").innerHTML = "<h2>Results :</h2>";
|
while (++i < n) {
|
||||||
//* for(var key in index) {
|
c = v.charCodeAt(i);
|
||||||
if(index[key].test(search)) {
|
if (d = c & 0xff000000) {
|
||||||
document.getElementById("results").innerHTML += "<p>"+key+"</p>";
|
a ^= d >> 24;
|
||||||
|
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24);
|
||||||
|
}
|
||||||
|
if (d = c & 0xff0000) {
|
||||||
|
a ^= d >> 16;
|
||||||
|
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24);
|
||||||
|
}
|
||||||
|
if (d = c & 0xff00) {
|
||||||
|
a ^= d >> 8;
|
||||||
|
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24);
|
||||||
|
}
|
||||||
|
a ^= c & 0xff;
|
||||||
|
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24);
|
||||||
}
|
}
|
||||||
}* //
|
// From http://home.comcast.net/~bretm/hash/6.html
|
||||||
if(!document.querySelectorAll("#results p").length) {
|
a += a << 13;
|
||||||
document.getElementById("results").innerHTML += "<p>No results...</p>";
|
a ^= a >> 7;
|
||||||
|
a += a << 3;
|
||||||
|
a ^= a >> 17;
|
||||||
|
a += a << 5;
|
||||||
|
return a & 0xffffffff;
|
||||||
}
|
}
|
||||||
}
|
// One additional iteration of FNV, given a hash.
|
||||||
|
function fnv_1a_b(a) {
|
||||||
document.getElementById("search").addEventListener('input', callback_change);*/
|
a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24);
|
||||||
|
a += a << 13;
|
||||||
|
a ^= a >> 7;
|
||||||
|
a += a << 3;
|
||||||
|
a ^= a >> 17;
|
||||||
|
a += a << 5;
|
||||||
|
return a & 0xffffffff;
|
||||||
|
}
|
||||||
|
})(typeof exports !== "undefined" ? exports : this);
|
||||||
|
36
js/test.js
Normal file
36
js/test.js
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/* These are some basic unit-tests for the bloom.js module */
|
||||||
|
|
||||||
|
var bloom = new BloomFilter(4, 0.1);
|
||||||
|
console.log(bloom);
|
||||||
|
|
||||||
|
// Add some elements to the filter.
|
||||||
|
bloom.add("foo");
|
||||||
|
bloom.add("bar");
|
||||||
|
|
||||||
|
// Test if an item is in our filter.
|
||||||
|
// Returns true if an item is probably in the set,
|
||||||
|
// or false if an item is definitely not in the set.
|
||||||
|
console.assert(bloom.test("foo") === true);
|
||||||
|
console.assert(bloom.test("bar") === true);
|
||||||
|
console.assert(bloom.test("blah") === false);
|
||||||
|
console.assert(bloom.test("blahahvhzfeh") === false);
|
||||||
|
console.assert(bloom.test("blahahvhzfehgfgahafgfa") === false);
|
||||||
|
|
||||||
|
// Serialisation. Note that bloom.buckets may be a typed array,
|
||||||
|
// so we convert to a normal array first.
|
||||||
|
var array = [].slice.call(bloom.buckets),
|
||||||
|
json = JSON.stringify(array);
|
||||||
|
|
||||||
|
console.log(array);
|
||||||
|
|
||||||
|
// Deserialisation. Note that the any array-like object is supported, but
|
||||||
|
// this will be used directly, so you may wish to use a typed array for
|
||||||
|
// performance.
|
||||||
|
var bloom = new BloomFilter(array, 0.1);
|
||||||
|
console.log(bloom);
|
||||||
|
|
||||||
|
console.assert(bloom.test("foo") === true);
|
||||||
|
console.assert(bloom.test("bar") === true);
|
||||||
|
console.assert(bloom.test("blah") === false);
|
||||||
|
console.assert(bloom.test("blahahvhzfeh") === false);
|
||||||
|
console.assert(bloom.test("blahahvhzfehgfgahafgfa") === false);
|
Loading…
Reference in New Issue
Block a user