Addition of a Bloom Filter object (#2176)
* Initial checkin * Add bloom.md into mkdocs * Added reset and improved info * Update bloom.c * Update bloom.md * Add Wikipedia link
This commit is contained in:
parent
87a6a9bdb9
commit
ef91580c7b
|
@ -23,6 +23,7 @@
|
||||||
//#define LUA_USE_MODULES_AM2320
|
//#define LUA_USE_MODULES_AM2320
|
||||||
//#define LUA_USE_MODULES_APA102
|
//#define LUA_USE_MODULES_APA102
|
||||||
#define LUA_USE_MODULES_BIT
|
#define LUA_USE_MODULES_BIT
|
||||||
|
//#define LUA_USE_MODULES_BLOOM
|
||||||
//#define LUA_USE_MODULES_BMP085
|
//#define LUA_USE_MODULES_BMP085
|
||||||
//#define LUA_USE_MODULES_BME280
|
//#define LUA_USE_MODULES_BME280
|
||||||
//#define LUA_USE_MODULES_BME680
|
//#define LUA_USE_MODULES_BME680
|
||||||
|
|
|
@ -0,0 +1,192 @@
|
||||||
|
/*
|
||||||
|
* Module for bloom filters
|
||||||
|
*
|
||||||
|
* Philip Gladstone, N1DQ
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "module.h"
|
||||||
|
#include "lauxlib.h"
|
||||||
|
#include "c_types.h"
|
||||||
|
#include "../crypto/sha2.h"
|
||||||
|
|
||||||
|
#if defined(LUA_USE_MODULES_BLOOM) && !defined(SHA2_ENABLE)
|
||||||
|
#error Must have SHA2_ENABLE set for BLOOM module
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint8 fns;
|
||||||
|
uint16 size;
|
||||||
|
uint32 occupancy;
|
||||||
|
uint32 buf[];
|
||||||
|
} bloom_t;
|
||||||
|
|
||||||
|
static bool add_or_check(const uint8 *buf, size_t len, bloom_t *filter, bool add) {
|
||||||
|
SHA256_CTX ctx;
|
||||||
|
SHA256_Init(&ctx);
|
||||||
|
SHA256_Update(&ctx, buf, len);
|
||||||
|
|
||||||
|
char hash[32];
|
||||||
|
SHA256_Final(hash, &ctx);
|
||||||
|
|
||||||
|
int i;
|
||||||
|
uint32 bits = filter->size << 5;
|
||||||
|
uint8 *h = hash;
|
||||||
|
bool prev = true;
|
||||||
|
int hstep = filter->fns > 10 ? 2 : 3;
|
||||||
|
for (i = 0; i < filter->fns; i++) {
|
||||||
|
uint32 val = (((h[0] << 8) + h[1]) << 8) + h[2];
|
||||||
|
h += hstep;
|
||||||
|
val = val % bits;
|
||||||
|
|
||||||
|
uint32 offset = val >> 5;
|
||||||
|
uint32 bit = 1 << (val & 31);
|
||||||
|
|
||||||
|
if (!(filter->buf[offset] & bit)) {
|
||||||
|
prev = false;
|
||||||
|
if (add) {
|
||||||
|
filter->buf[offset] |= bit;
|
||||||
|
filter->occupancy++;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bloom_filter_check(lua_State *L) {
|
||||||
|
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
|
||||||
|
size_t length;
|
||||||
|
const uint8 *buffer = (uint8 *) luaL_checklstring(L, 2, &length);
|
||||||
|
|
||||||
|
bool rc = add_or_check(buffer, length, filter, false);
|
||||||
|
|
||||||
|
lua_pushboolean(L, rc);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bloom_filter_add(lua_State *L) {
|
||||||
|
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
|
||||||
|
size_t length;
|
||||||
|
const uint8 *buffer = (uint8 *) luaL_checklstring(L, 2, &length);
|
||||||
|
|
||||||
|
bool rc = add_or_check(buffer, length, filter, true);
|
||||||
|
|
||||||
|
lua_pushboolean(L, rc);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bloom_filter_reset(lua_State *L) {
|
||||||
|
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
|
||||||
|
|
||||||
|
memset(filter->buf, 0, filter->size << 2);
|
||||||
|
filter->occupancy = 0;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bloom_filter_info(lua_State *L) {
|
||||||
|
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
|
||||||
|
|
||||||
|
lua_pushinteger(L, filter->size << 5);
|
||||||
|
lua_pushinteger(L, filter->fns);
|
||||||
|
lua_pushinteger(L, filter->occupancy);
|
||||||
|
|
||||||
|
// Now calculate the chance that a FP will be returned
|
||||||
|
uint64 prob = 1000000;
|
||||||
|
if (filter->occupancy > 0) {
|
||||||
|
unsigned int ratio = (filter->size << 5) / filter->occupancy;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
prob = ratio;
|
||||||
|
|
||||||
|
for (i = 1; i < filter->fns && prob < 1000000; i++) {
|
||||||
|
prob = prob * ratio;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prob < 1000000) {
|
||||||
|
// try again with some scaling
|
||||||
|
unsigned int ratio256 = (filter->size << 13) / filter->occupancy;
|
||||||
|
|
||||||
|
uint64 prob256 = ratio256;
|
||||||
|
|
||||||
|
for (i = 1; i < filter->fns && prob256 < 256000000; i++) {
|
||||||
|
prob256 = (prob256 * ratio256) >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
prob = prob256 >> 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lua_pushinteger(L, prob > 1000000 ? 1000000 : (int) prob);
|
||||||
|
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bloom_create(lua_State *L) {
|
||||||
|
int items = luaL_checkinteger(L, 1);
|
||||||
|
int error = luaL_checkinteger(L, 2);
|
||||||
|
|
||||||
|
int n = error;
|
||||||
|
int logp = 0;
|
||||||
|
while (n > 0) {
|
||||||
|
n = n >> 1;
|
||||||
|
logp--;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bits = -items * logp;
|
||||||
|
bits += bits >> 1;
|
||||||
|
|
||||||
|
bits = (bits + 31) & ~31;
|
||||||
|
|
||||||
|
if (bits < 256) {
|
||||||
|
bits = 256;
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = bits >> 3;
|
||||||
|
|
||||||
|
int fns = bits / items;
|
||||||
|
fns = (fns >> 1) + fns / 6;
|
||||||
|
|
||||||
|
if (fns < 2) {
|
||||||
|
fns = 2;
|
||||||
|
}
|
||||||
|
if (fns > 15) {
|
||||||
|
fns = 15;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom_t *filter = (bloom_t *) lua_newuserdata(L, sizeof(bloom_t) + size);
|
||||||
|
//
|
||||||
|
// Associate its metatable
|
||||||
|
luaL_getmetatable(L, "bloom.filter");
|
||||||
|
lua_setmetatable(L, -2);
|
||||||
|
|
||||||
|
memset(filter, 0, sizeof(bloom_t) + size);
|
||||||
|
filter->size = size >> 2;
|
||||||
|
filter->fns = fns;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const LUA_REG_TYPE bloom_filter_map[] = {
|
||||||
|
{ LSTRKEY( "add" ), LFUNCVAL( bloom_filter_add ) },
|
||||||
|
{ LSTRKEY( "check" ), LFUNCVAL( bloom_filter_check ) },
|
||||||
|
{ LSTRKEY( "reset" ), LFUNCVAL( bloom_filter_reset ) },
|
||||||
|
{ LSTRKEY( "info" ), LFUNCVAL( bloom_filter_info ) },
|
||||||
|
{ LSTRKEY( "__index" ), LROVAL( bloom_filter_map ) },
|
||||||
|
{ LNILKEY, LNILVAL }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Module function map
|
||||||
|
static const LUA_REG_TYPE bloom_map[] = {
|
||||||
|
{ LSTRKEY( "create" ), LFUNCVAL( bloom_create ) },
|
||||||
|
{ LNILKEY, LNILVAL }
|
||||||
|
};
|
||||||
|
|
||||||
|
LUALIB_API int bloom_open(lua_State *L) {
|
||||||
|
luaL_rometatable(L, "bloom.filter", (void *)bloom_filter_map);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
NODEMCU_MODULE(BLOOM, "bloom", bloom_map, bloom_open);
|
|
@ -0,0 +1,103 @@
|
||||||
|
# Bloom Module
|
||||||
|
| Since | Origin / Contributor | Maintainer | Source |
|
||||||
|
| :----- | :-------------------- | :---------- | :------ |
|
||||||
|
| 2017-11-13 | [Philip Gladstone](https://github.com/pjsg) | [Philip Gladstone](https://github.com/pjsg) | [bloom.c](../../../app/modules/bloom.c)|
|
||||||
|
|
||||||
|
|
||||||
|
This module implements a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter). This is a probabilistic data structure that is used to test for set membership. There are two operations -- `add` and `check` that allow
|
||||||
|
arbitrary strings to be added to the set or tested for set membership. Since this is a probabilistic data structure, the answer returned can be incorrect. However,
|
||||||
|
if the string *is* a member of the set, then the `check` operation will always return `true`.
|
||||||
|
|
||||||
|
## bloom.create()
|
||||||
|
Create a filter object.
|
||||||
|
|
||||||
|
#### Syntax
|
||||||
|
`bloom.create(elements, errorrate)`
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `elements` The largest number of elements to be added to the filter.
|
||||||
|
- `errorrate` The error rate (the false positive rate). This is represented as `n` where the false positive rate is `1 / n`. This is the maximum rate of `check` returning true when the string is *not* in the set.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
A `filter` object.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```
|
||||||
|
filter = bloom.create(10000, 100) -- this will use around 11kB of memory
|
||||||
|
```
|
||||||
|
|
||||||
|
## filter:add()
|
||||||
|
Adds a string to the set and returns an indication of whether the string was already present.
|
||||||
|
|
||||||
|
#### Syntax
|
||||||
|
`filter:add(string)`
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `string` The string to be added to the filter set.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
`true` if the string was already present in the filter. `false` otherwise.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```
|
||||||
|
if filter:add("apple") then
|
||||||
|
print ("Seen an apple before!")
|
||||||
|
else
|
||||||
|
print ("Noted that the first apple has been seen")
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
## filter:check()
|
||||||
|
Checks to see if a string is present in the filter set.
|
||||||
|
|
||||||
|
#### Syntax
|
||||||
|
`present = filter:check(string)`
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `string` The string to be checked for membership in the set.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
`true` if the string was already present in the filter. `false` otherwise.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
```
|
||||||
|
if filter:check("apple") then
|
||||||
|
print ("Seen an apple before!")
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## filter:reset()
|
||||||
|
Empties the filter.
|
||||||
|
|
||||||
|
#### Syntax
|
||||||
|
`filter:reset()`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
Nothing
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
```
|
||||||
|
filter:reset()
|
||||||
|
```
|
||||||
|
|
||||||
|
## filter:info()
|
||||||
|
Get some status information on the filter.
|
||||||
|
|
||||||
|
#### Syntax
|
||||||
|
`bits, fns, occupancy, fprate = filter:info()`
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
- `bits` The number of bits in the filter.
|
||||||
|
- `fns` The number of hash functions in use.
|
||||||
|
- `occupancy` The number of bits set in the filter.
|
||||||
|
- `fprate` The approximate chance that the next `check` will return `true` when it should return `false`. This is represented as the inverse of the probability -- i.e. as the n in 1-in-n chance. This value is limited to 1,000,000.
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
```
|
||||||
|
bits, fns, occupancy, fprate = filter:info()
|
||||||
|
```
|
||||||
|
|
|
@ -41,6 +41,7 @@ pages:
|
||||||
- 'am2320': 'en/modules/am2320.md'
|
- 'am2320': 'en/modules/am2320.md'
|
||||||
- 'apa102': 'en/modules/apa102.md'
|
- 'apa102': 'en/modules/apa102.md'
|
||||||
- 'bit': 'en/modules/bit.md'
|
- 'bit': 'en/modules/bit.md'
|
||||||
|
- 'bloom' : 'en/modules/bloom.md'
|
||||||
- 'bme280': 'en/modules/bme280.md'
|
- 'bme280': 'en/modules/bme280.md'
|
||||||
- 'bmp085': 'en/modules/bmp085.md'
|
- 'bmp085': 'en/modules/bmp085.md'
|
||||||
- 'cjson': 'en/modules/cjson.md'
|
- 'cjson': 'en/modules/cjson.md'
|
||||||
|
|
Loading…
Reference in New Issue