diff --git a/index_generation/generate_index.py b/index_generation/generate_index.py index e4d7aca..3e5ae55 100755 --- a/index_generation/generate_index.py +++ b/index_generation/generate_index.py @@ -25,7 +25,7 @@ def remove_common_words(words): def padding_16(x): if x < 256: - return bytes([0,len(samples)]) + return bytes([0,x]) else: return bytes([int(x/256), x%256]) @@ -68,10 +68,18 @@ for sample in samples: "will have to change the way data is stored in the binary " "file to handle such amount of text.") - #write_little.extend(bitfield(filters[sample].bitarray.length(), 16)) - #write_little.extend(filters[sample].bitarray) - #write_big.extend(bitfield(filters[sample].bitarray.length(), 16)) - #write_big.extend(filters[sample].bitarray) + tmp = bitarray(endian="little") + tmp.frombytes(padding_16(filters[sample].bitarray.length())) + write_little.extend(tmp) + write_little.extend(filters[sample].bitarray) + write_little.extend([0 for i in range(filters[sample].bitarray.length() % + 8)]) + tmp = bitarray(endian="big") + tmp.frombytes(padding_16(filters[sample].bitarray.length())) + write_big.extend(tmp) + write_big.extend(filters[sample].bitarray) + write_big.extend([0 for i in range(filters[sample].bitarray.length() % + 8)]) with open('../data/search_index_little', 'wb') as index_fh: print(write_little) diff --git a/js/bloom.js b/js/bloom.js index fc08d16..ec89ac7 100644 --- a/js/bloom.js +++ b/js/bloom.js @@ -38,33 +38,27 @@ document.getElementById('search').addEventListener('click', function() { if (arrayBuffer) { var tmp = new Uint8Array(arrayBuffer); - var nb_filters = 0; - console.log(tmp); - return; + var nb_filters = tmp[0]*256+tmp[1]; - // First 16 bits == number of bitarrays - for (var i = 0; i < 16; i++) { - nb_filters += tmp[i] << i; - } search_index = new Array(nb_filters); // For each of the bitarrays, parse it - var offset = 0; + var offset = 2; for (var i = 0; i < nb_filters; i++) { // Size of the filter - var length = 0; - for (var j = offset; j < offset + 16; j++) { - length += tmp[j] << j; - } - search_index[i] = new Uint8Array(length); + var length = tmp[offset]*256+tmp[offset+1]; // length is a number of bytes + + var length_offset = Math.ceil(length/8); + + search_index[i] = new Uint8Array(length_offset); // Parse filter - for (var j = 16; j < 16 + length; j++) { - search_index[i][j] = tmp[j + offset]; + for (var j = 2; j < 2 + length_offset; j++) { + search_index[i][j] = tmp[offset + j]; } - - offset += 16 + length; + offset += 2 + length_offset; } + console.log(search_index); document.getElementById("loading").innerHTML = ""; loading = false; usable = true;