#ifndef RAX_H #define RAX_H #include /* Representation of a radix tree as implemented in this file, that contains * the strings "foo", "foobar" and "footer" after the insertion of each * word. When the node represents a key inside the radix tree, we write it * between [], otherwise it is written between (). * * This is the vanilla representation: * * (f) "" * \ * (o) "f" * \ * (o) "fo" * \ * [t b] "foo" * / \ * "foot" (e) (a) "foob" * / \ * "foote" (r) (r) "fooba" * / \ * "footer" [] [] "foobar" * * However, this implementation implements a very common optimization where * successive nodes having a single child are "compressed" into the node * itself as a string of characters, each representing a next-level child, * and only the link to the node representing the last character node is * provided inside the representation. So the above representation is turend * into: * * ["foo"] "" * | * [t b] "foo" * / \ * "foot" ("er") ("ar") "foob" * / \ * "footer" [] [] "foobar" * * However this optimization makes the implementation a bit more complex. * For instance if a key "first" is added in the above radix tree, a * "node splitting" operation is needed, since the "foo" prefix is no longer * composed of nodes having a single child one after the other. This is the * above tree and the resulting node splitting after this event happens: * * * (f) "" * / * (i o) "f" * / \ * "firs" ("rst") (o) "fo" * / \ * "first" [] [t b] "foo" * / \ * "foot" ("er") ("ar") "foob" * / \ * "footer" [] [] "foobar" * * Similarly after deletion, if a new chain of nodes having a single child * is created (the chain must also not include nodes that represent keys), * it must be compressed back into a single node. * */ #define RAX_NODE_MAX_SIZE ((1<<29)-1) typedef struct raxNode { uint32_t iskey:1; /* Does this node contain a key? */ uint32_t isnull:1; /* Associated value is NULL (don't store it). */ uint32_t iscompr:1; /* Node is compressed. */ uint32_t size:29; /* Number of children, or compressed string len. */ /* Data layout is as follows: * * If node is not compressed we have 'size' bytes, one for each children * character, and 'size' raxNode pointers, point to each child node. * Note how the character is not stored in the children but in the * edge of the parents: * * [header strlen=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?) * * if node is compressed (strlen != 0) the node has 1 children. * In that case the 'size' bytes of the string stored immediately at * the start of the data section, represent a sequence of successive * nodes linked one after the other, for which only the last one in * the sequence is actually represented as a node, and pointed to by * the current compressed node. * * [header strlen=3][xyz][z-ptr](value-ptr?) * * Both compressed and not compressed nodes can represent a key * with associated data in the radix tree at any level (not just terminal * nodes). * * If the node has an associated key (iskey=1) and is not NULL * (isnull=0), then after the raxNode pointers poiting to the * childen, an additional value pointer is present (as you can see * in the representation above as "value-ptr" field). */ unsigned char data[]; } raxNode; typedef struct rax { raxNode *head; uint64_t numele; uint64_t numnodes; } rax; /* Stack data structure used by raxLowWalk() in order to, optionally, return * a list of parent nodes to the caller. The nodes do not have a "parent" * field for space concerns, so we use the auxiliary stack when needed. */ #define RAX_STACK_STATIC_ITEMS 32 typedef struct raxStack { void **stack; /* Points to static_items or an heap allocated array. */ size_t items, maxitems; /* Number of items contained and total space. */ /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap * and use this static array of pointers instead. */ void *static_items[RAX_STACK_STATIC_ITEMS]; int oom; /* True if pushing into this stack failed for OOM at some point. */ } raxStack; /* Radix tree iterator state is encapsulated into this data structure. */ #define RAX_ITER_STATIC_LEN 128 #define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current element for the first iteration and clear the flag. */ #define RAX_ITER_EOF (1<<1) /* End of iteration reached. */ #define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while iterating. But it is slower. */ typedef struct raxIterator { int flags; rax *rt; /* Radix tree we are iterating. */ unsigned char *key; /* The current string. */ void *data; /* Data associated to this key. */ size_t key_len; /* Current key length. */ size_t key_max; /* Max key len the current key buffer can hold. */ unsigned char key_static_string[RAX_ITER_STATIC_LEN]; raxNode *node; /* Current node. Only for unsafe iteration. */ raxStack stack; /* Stack used for unsafe iteration. */ } raxIterator; /* A special pointer returned for not found items. */ extern void *raxNotFound; /* Exported API. */ rax *raxNew(void); int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old); int raxRemove(rax *rax, unsigned char *s, size_t len, void **old); void *raxFind(rax *rax, unsigned char *s, size_t len); void raxFree(rax *rax); void raxStart(raxIterator *it, rax *rt); int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len); int raxNext(raxIterator *it); int raxPrev(raxIterator *it); int raxRandomWalk(raxIterator *it, size_t steps); int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len); void raxStop(raxIterator *it); void raxShow(rax *rax); #endif