2017-03-27 09:26:56 -04:00
|
|
|
#ifndef RAX_H
|
|
|
|
#define RAX_H
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
/* Representation of a radix tree as implemented in this file, that contains
|
|
|
|
* the strings "foo", "foobar" and "footer" after the insertion of each
|
|
|
|
* word. When the node represents a key inside the radix tree, we write it
|
|
|
|
* between [], otherwise it is written between ().
|
|
|
|
*
|
|
|
|
* This is the vanilla representation:
|
|
|
|
*
|
|
|
|
* (f) ""
|
|
|
|
* \
|
|
|
|
* (o) "f"
|
|
|
|
* \
|
|
|
|
* (o) "fo"
|
|
|
|
* \
|
|
|
|
* [t b] "foo"
|
|
|
|
* / \
|
|
|
|
* "foot" (e) (a) "foob"
|
|
|
|
* / \
|
|
|
|
* "foote" (r) (r) "fooba"
|
|
|
|
* / \
|
|
|
|
* "footer" [] [] "foobar"
|
|
|
|
*
|
|
|
|
* However, this implementation implements a very common optimization where
|
|
|
|
* successive nodes having a single child are "compressed" into the node
|
|
|
|
* itself as a string of characters, each representing a next-level child,
|
|
|
|
* and only the link to the node representing the last character node is
|
|
|
|
* provided inside the representation. So the above representation is turend
|
|
|
|
* into:
|
|
|
|
*
|
|
|
|
* ["foo"] ""
|
|
|
|
* |
|
|
|
|
* [t b] "foo"
|
|
|
|
* / \
|
|
|
|
* "foot" ("er") ("ar") "foob"
|
|
|
|
* / \
|
|
|
|
* "footer" [] [] "foobar"
|
|
|
|
*
|
|
|
|
* However this optimization makes the implementation a bit more complex.
|
|
|
|
* For instance if a key "first" is added in the above radix tree, a
|
|
|
|
* "node splitting" operation is needed, since the "foo" prefix is no longer
|
|
|
|
* composed of nodes having a single child one after the other. This is the
|
|
|
|
* above tree and the resulting node splitting after this event happens:
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* (f) ""
|
|
|
|
* /
|
|
|
|
* (i o) "f"
|
|
|
|
* / \
|
|
|
|
* "firs" ("rst") (o) "fo"
|
|
|
|
* / \
|
|
|
|
* "first" [] [t b] "foo"
|
|
|
|
* / \
|
|
|
|
* "foot" ("er") ("ar") "foob"
|
|
|
|
* / \
|
|
|
|
* "footer" [] [] "foobar"
|
|
|
|
*
|
|
|
|
* Similarly after deletion, if a new chain of nodes having a single child
|
|
|
|
* is created (the chain must also not include nodes that represent keys),
|
|
|
|
* it must be compressed back into a single node.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define RAX_NODE_MAX_SIZE ((1<<29)-1)
|
|
|
|
typedef struct raxNode {
|
|
|
|
uint32_t iskey:1; /* Does this node contain a key? */
|
|
|
|
uint32_t isnull:1; /* Associated value is NULL (don't store it). */
|
|
|
|
uint32_t iscompr:1; /* Node is compressed. */
|
|
|
|
uint32_t size:29; /* Number of children, or compressed string len. */
|
|
|
|
/* Data layout is as follows:
|
|
|
|
*
|
|
|
|
* If node is not compressed we have 'size' bytes, one for each children
|
|
|
|
* character, and 'size' raxNode pointers, point to each child node.
|
|
|
|
* Note how the character is not stored in the children but in the
|
|
|
|
* edge of the parents:
|
|
|
|
*
|
|
|
|
* [header strlen=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
|
|
|
|
*
|
|
|
|
* if node is compressed (strlen != 0) the node has 1 children.
|
|
|
|
* In that case the 'size' bytes of the string stored immediately at
|
|
|
|
* the start of the data section, represent a sequence of successive
|
|
|
|
* nodes linked one after the other, for which only the last one in
|
|
|
|
* the sequence is actually represented as a node, and pointed to by
|
|
|
|
* the current compressed node.
|
|
|
|
*
|
|
|
|
* [header strlen=3][xyz][z-ptr](value-ptr?)
|
|
|
|
*
|
|
|
|
* Both compressed and not compressed nodes can represent a key
|
|
|
|
* with associated data in the radix tree at any level (not just terminal
|
|
|
|
* nodes).
|
|
|
|
*
|
|
|
|
* If the node has an associated key (iskey=1) and is not NULL
|
|
|
|
* (isnull=0), then after the raxNode pointers poiting to the
|
|
|
|
* childen, an additional value pointer is present (as you can see
|
|
|
|
* in the representation above as "value-ptr" field).
|
|
|
|
*/
|
|
|
|
unsigned char data[];
|
|
|
|
} raxNode;
|
|
|
|
|
|
|
|
typedef struct rax {
|
|
|
|
raxNode *head;
|
|
|
|
uint64_t numele;
|
|
|
|
uint64_t numnodes;
|
|
|
|
} rax;
|
|
|
|
|
|
|
|
/* Stack data structure used by raxLowWalk() in order to, optionally, return
|
|
|
|
* a list of parent nodes to the caller. The nodes do not have a "parent"
|
|
|
|
* field for space concerns, so we use the auxiliary stack when needed. */
|
|
|
|
#define RAX_STACK_STATIC_ITEMS 32
|
|
|
|
typedef struct raxStack {
|
|
|
|
void **stack; /* Points to static_items or an heap allocated array. */
|
|
|
|
size_t items, maxitems; /* Number of items contained and total space. */
|
|
|
|
/* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
|
|
|
|
* and use this static array of pointers instead. */
|
|
|
|
void *static_items[RAX_STACK_STATIC_ITEMS];
|
|
|
|
int oom; /* True if pushing into this stack failed for OOM at some point. */
|
|
|
|
} raxStack;
|
|
|
|
|
|
|
|
/* Radix tree iterator state is encapsulated into this data structure. */
|
|
|
|
#define RAX_ITER_STATIC_LEN 128
|
|
|
|
#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
|
|
|
|
element for the first iteration and
|
|
|
|
clear the flag. */
|
|
|
|
#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */
|
|
|
|
#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while
|
|
|
|
iterating. But it is slower. */
|
|
|
|
typedef struct raxIterator {
|
|
|
|
int flags;
|
|
|
|
rax *rt; /* Radix tree we are iterating. */
|
|
|
|
unsigned char *key; /* The current string. */
|
|
|
|
void *data; /* Data associated to this key. */
|
|
|
|
size_t key_len; /* Current key length. */
|
|
|
|
size_t key_max; /* Max key len the current key buffer can hold. */
|
|
|
|
unsigned char key_static_string[RAX_ITER_STATIC_LEN];
|
|
|
|
raxNode *node; /* Current node. Only for unsafe iteration. */
|
|
|
|
raxStack stack; /* Stack used for unsafe iteration. */
|
|
|
|
} raxIterator;
|
|
|
|
|
|
|
|
/* A special pointer returned for not found items. */
|
|
|
|
extern void *raxNotFound;
|
|
|
|
|
|
|
|
/* Exported API. */
|
|
|
|
rax *raxNew(void);
|
2017-04-07 02:46:39 -04:00
|
|
|
int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
|
|
|
|
int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
|
2017-03-27 09:26:56 -04:00
|
|
|
void *raxFind(rax *rax, unsigned char *s, size_t len);
|
|
|
|
void raxFree(rax *rax);
|
|
|
|
void raxStart(raxIterator *it, rax *rt);
|
2017-04-07 02:46:39 -04:00
|
|
|
int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
|
|
|
|
int raxNext(raxIterator *it);
|
|
|
|
int raxPrev(raxIterator *it);
|
|
|
|
int raxRandomWalk(raxIterator *it, size_t steps);
|
|
|
|
int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
|
2017-03-27 09:26:56 -04:00
|
|
|
void raxStop(raxIterator *it);
|
2017-08-30 06:40:27 -04:00
|
|
|
int raxEOF(raxIterator *it);
|
2017-03-27 09:26:56 -04:00
|
|
|
void raxShow(rax *rax);
|
2017-09-05 07:14:13 -04:00
|
|
|
uint64_t raxSize(rax *rax);
|
2017-03-27 09:26:56 -04:00
|
|
|
|
|
|
|
#endif
|