Addition of a Bloom Filter object (#2176)
* Initial checkin * Add bloom.md into mkdocs * Added reset and improved info * Update bloom.c * Update bloom.md * Add Wikipedia link
This commit is contained in:
parent
87a6a9bdb9
commit
ef91580c7b
|
@ -23,6 +23,7 @@
|
|||
//#define LUA_USE_MODULES_AM2320
|
||||
//#define LUA_USE_MODULES_APA102
|
||||
#define LUA_USE_MODULES_BIT
|
||||
//#define LUA_USE_MODULES_BLOOM
|
||||
//#define LUA_USE_MODULES_BMP085
|
||||
//#define LUA_USE_MODULES_BME280
|
||||
//#define LUA_USE_MODULES_BME680
|
||||
|
|
|
@ -0,0 +1,192 @@
|
|||
/*
|
||||
* Module for bloom filters
|
||||
*
|
||||
* Philip Gladstone, N1DQ
|
||||
*/
|
||||
|
||||
#include "module.h"
|
||||
#include "lauxlib.h"
|
||||
#include "c_types.h"
|
||||
#include "../crypto/sha2.h"
|
||||
|
||||
#if defined(LUA_USE_MODULES_BLOOM) && !defined(SHA2_ENABLE)
|
||||
#error Must have SHA2_ENABLE set for BLOOM module
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint8 fns;
|
||||
uint16 size;
|
||||
uint32 occupancy;
|
||||
uint32 buf[];
|
||||
} bloom_t;
|
||||
|
||||
static bool add_or_check(const uint8 *buf, size_t len, bloom_t *filter, bool add) {
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init(&ctx);
|
||||
SHA256_Update(&ctx, buf, len);
|
||||
|
||||
char hash[32];
|
||||
SHA256_Final(hash, &ctx);
|
||||
|
||||
int i;
|
||||
uint32 bits = filter->size << 5;
|
||||
uint8 *h = hash;
|
||||
bool prev = true;
|
||||
int hstep = filter->fns > 10 ? 2 : 3;
|
||||
for (i = 0; i < filter->fns; i++) {
|
||||
uint32 val = (((h[0] << 8) + h[1]) << 8) + h[2];
|
||||
h += hstep;
|
||||
val = val % bits;
|
||||
|
||||
uint32 offset = val >> 5;
|
||||
uint32 bit = 1 << (val & 31);
|
||||
|
||||
if (!(filter->buf[offset] & bit)) {
|
||||
prev = false;
|
||||
if (add) {
|
||||
filter->buf[offset] |= bit;
|
||||
filter->occupancy++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
static int bloom_filter_check(lua_State *L) {
|
||||
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
|
||||
size_t length;
|
||||
const uint8 *buffer = (uint8 *) luaL_checklstring(L, 2, &length);
|
||||
|
||||
bool rc = add_or_check(buffer, length, filter, false);
|
||||
|
||||
lua_pushboolean(L, rc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int bloom_filter_add(lua_State *L) {
|
||||
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
|
||||
size_t length;
|
||||
const uint8 *buffer = (uint8 *) luaL_checklstring(L, 2, &length);
|
||||
|
||||
bool rc = add_or_check(buffer, length, filter, true);
|
||||
|
||||
lua_pushboolean(L, rc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int bloom_filter_reset(lua_State *L) {
|
||||
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
|
||||
|
||||
memset(filter->buf, 0, filter->size << 2);
|
||||
filter->occupancy = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bloom_filter_info(lua_State *L) {
|
||||
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
|
||||
|
||||
lua_pushinteger(L, filter->size << 5);
|
||||
lua_pushinteger(L, filter->fns);
|
||||
lua_pushinteger(L, filter->occupancy);
|
||||
|
||||
// Now calculate the chance that a FP will be returned
|
||||
uint64 prob = 1000000;
|
||||
if (filter->occupancy > 0) {
|
||||
unsigned int ratio = (filter->size << 5) / filter->occupancy;
|
||||
int i;
|
||||
|
||||
prob = ratio;
|
||||
|
||||
for (i = 1; i < filter->fns && prob < 1000000; i++) {
|
||||
prob = prob * ratio;
|
||||
}
|
||||
|
||||
if (prob < 1000000) {
|
||||
// try again with some scaling
|
||||
unsigned int ratio256 = (filter->size << 13) / filter->occupancy;
|
||||
|
||||
uint64 prob256 = ratio256;
|
||||
|
||||
for (i = 1; i < filter->fns && prob256 < 256000000; i++) {
|
||||
prob256 = (prob256 * ratio256) >> 8;
|
||||
}
|
||||
|
||||
prob = prob256 >> 8;
|
||||
}
|
||||
}
|
||||
|
||||
lua_pushinteger(L, prob > 1000000 ? 1000000 : (int) prob);
|
||||
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int bloom_create(lua_State *L) {
|
||||
int items = luaL_checkinteger(L, 1);
|
||||
int error = luaL_checkinteger(L, 2);
|
||||
|
||||
int n = error;
|
||||
int logp = 0;
|
||||
while (n > 0) {
|
||||
n = n >> 1;
|
||||
logp--;
|
||||
}
|
||||
|
||||
int bits = -items * logp;
|
||||
bits += bits >> 1;
|
||||
|
||||
bits = (bits + 31) & ~31;
|
||||
|
||||
if (bits < 256) {
|
||||
bits = 256;
|
||||
}
|
||||
|
||||
int size = bits >> 3;
|
||||
|
||||
int fns = bits / items;
|
||||
fns = (fns >> 1) + fns / 6;
|
||||
|
||||
if (fns < 2) {
|
||||
fns = 2;
|
||||
}
|
||||
if (fns > 15) {
|
||||
fns = 15;
|
||||
}
|
||||
|
||||
bloom_t *filter = (bloom_t *) lua_newuserdata(L, sizeof(bloom_t) + size);
|
||||
//
|
||||
// Associate its metatable
|
||||
luaL_getmetatable(L, "bloom.filter");
|
||||
lua_setmetatable(L, -2);
|
||||
|
||||
memset(filter, 0, sizeof(bloom_t) + size);
|
||||
filter->size = size >> 2;
|
||||
filter->fns = fns;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static const LUA_REG_TYPE bloom_filter_map[] = {
|
||||
{ LSTRKEY( "add" ), LFUNCVAL( bloom_filter_add ) },
|
||||
{ LSTRKEY( "check" ), LFUNCVAL( bloom_filter_check ) },
|
||||
{ LSTRKEY( "reset" ), LFUNCVAL( bloom_filter_reset ) },
|
||||
{ LSTRKEY( "info" ), LFUNCVAL( bloom_filter_info ) },
|
||||
{ LSTRKEY( "__index" ), LROVAL( bloom_filter_map ) },
|
||||
{ LNILKEY, LNILVAL }
|
||||
};
|
||||
|
||||
// Module function map
|
||||
static const LUA_REG_TYPE bloom_map[] = {
|
||||
{ LSTRKEY( "create" ), LFUNCVAL( bloom_create ) },
|
||||
{ LNILKEY, LNILVAL }
|
||||
};
|
||||
|
||||
LUALIB_API int bloom_open(lua_State *L) {
|
||||
luaL_rometatable(L, "bloom.filter", (void *)bloom_filter_map);
|
||||
return 1;
|
||||
}
|
||||
|
||||
NODEMCU_MODULE(BLOOM, "bloom", bloom_map, bloom_open);
|
|
@ -0,0 +1,103 @@
|
|||
# Bloom Module
|
||||
| Since | Origin / Contributor | Maintainer | Source |
|
||||
| :----- | :-------------------- | :---------- | :------ |
|
||||
| 2017-11-13 | [Philip Gladstone](https://github.com/pjsg) | [Philip Gladstone](https://github.com/pjsg) | [bloom.c](../../../app/modules/bloom.c)|
|
||||
|
||||
|
||||
This module implements a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter). This is a probabilistic data structure that is used to test for set membership. There are two operations -- `add` and `check` that allow
|
||||
arbitrary strings to be added to the set or tested for set membership. Since this is a probabilistic data structure, the answer returned can be incorrect. However,
|
||||
if the string *is* a member of the set, then the `check` operation will always return `true`.
|
||||
|
||||
## bloom.create()
|
||||
Create a filter object.
|
||||
|
||||
#### Syntax
|
||||
`bloom.create(elements, errorrate)`
|
||||
|
||||
#### Parameters
|
||||
- `elements` The largest number of elements to be added to the filter.
|
||||
- `errorrate` The error rate (the false positive rate). This is represented as `n` where the false positive rate is `1 / n`. This is the maximum rate of `check` returning true when the string is *not* in the set.
|
||||
|
||||
#### Returns
|
||||
A `filter` object.
|
||||
|
||||
#### Example
|
||||
|
||||
```
|
||||
filter = bloom.create(10000, 100) -- this will use around 11kB of memory
|
||||
```
|
||||
|
||||
## filter:add()
|
||||
Adds a string to the set and returns an indication of whether the string was already present.
|
||||
|
||||
#### Syntax
|
||||
`filter:add(string)`
|
||||
|
||||
#### Parameters
|
||||
- `string` The string to be added to the filter set.
|
||||
|
||||
#### Returns
|
||||
`true` if the string was already present in the filter. `false` otherwise.
|
||||
|
||||
#### Example
|
||||
|
||||
```
|
||||
if filter:add("apple") then
|
||||
print ("Seen an apple before!")
|
||||
else
|
||||
print ("Noted that the first apple has been seen")
|
||||
end
|
||||
```
|
||||
|
||||
## filter:check()
|
||||
Checks to see if a string is present in the filter set.
|
||||
|
||||
#### Syntax
|
||||
`present = filter:check(string)`
|
||||
|
||||
#### Parameters
|
||||
- `string` The string to be checked for membership in the set.
|
||||
|
||||
#### Returns
|
||||
`true` if the string was already present in the filter. `false` otherwise.
|
||||
|
||||
#### Example
|
||||
|
||||
```
|
||||
if filter:check("apple") then
|
||||
print ("Seen an apple before!")
|
||||
end
|
||||
```
|
||||
|
||||
|
||||
## filter:reset()
|
||||
Empties the filter.
|
||||
|
||||
#### Syntax
|
||||
`filter:reset()`
|
||||
|
||||
#### Returns
|
||||
Nothing
|
||||
|
||||
#### Example
|
||||
```
|
||||
filter:reset()
|
||||
```
|
||||
|
||||
## filter:info()
|
||||
Get some status information on the filter.
|
||||
|
||||
#### Syntax
|
||||
`bits, fns, occupancy, fprate = filter:info()`
|
||||
|
||||
#### Returns
|
||||
- `bits` The number of bits in the filter.
|
||||
- `fns` The number of hash functions in use.
|
||||
- `occupancy` The number of bits set in the filter.
|
||||
- `fprate` The approximate chance that the next `check` will return `true` when it should return `false`. This is represented as the inverse of the probability -- i.e. as the n in 1-in-n chance. This value is limited to 1,000,000.
|
||||
|
||||
#### Example
|
||||
```
|
||||
bits, fns, occupancy, fprate = filter:info()
|
||||
```
|
||||
|
|
@ -41,6 +41,7 @@ pages:
|
|||
- 'am2320': 'en/modules/am2320.md'
|
||||
- 'apa102': 'en/modules/apa102.md'
|
||||
- 'bit': 'en/modules/bit.md'
|
||||
- 'bloom' : 'en/modules/bloom.md'
|
||||
- 'bme280': 'en/modules/bme280.md'
|
||||
- 'bmp085': 'en/modules/bmp085.md'
|
||||
- 'cjson': 'en/modules/cjson.md'
|
||||
|
|
Loading…
Reference in New Issue