mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-23 16:48:27 -05:00
1846 lines
68 KiB
C
1846 lines
68 KiB
C
|
/* Rax -- A radix tree implementation.
|
||
|
*
|
||
|
* Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
|
||
|
* All rights reserved.
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions are met:
|
||
|
*
|
||
|
* * Redistributions of source code must retain the above copyright notice,
|
||
|
* this list of conditions and the following disclaimer.
|
||
|
* * Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in the
|
||
|
* documentation and/or other materials provided with the distribution.
|
||
|
* * Neither the name of Redis nor the names of its contributors may be used
|
||
|
* to endorse or promote products derived from this software without
|
||
|
* specific prior written permission.
|
||
|
*
|
||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||
|
*/
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <assert.h>
|
||
|
#include <stdio.h>
|
||
|
#include <errno.h>
|
||
|
#include "rax.h"
|
||
|
#include "rax_malloc.h"
|
||
|
|
||
|
/* This is a special pointer that is guaranteed to never have the same value
|
||
|
* of a radix tree node. It's used in order to report "not found" error without
|
||
|
* requiring the function to have multiple return values. */
|
||
|
void *raxNotFound = (void*)"rax-not-found-pointer";
|
||
|
|
||
|
/* -------------------------------- Debugging ------------------------------ */
|
||
|
|
||
|
void raxDebugShowNode(const char *msg, raxNode *n);
|
||
|
|
||
|
/* Turn debugging messages on/off. */
|
||
|
#if 0
|
||
|
#define debugf(...) \
|
||
|
do { \
|
||
|
printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
|
||
|
printf(__VA_ARGS__); \
|
||
|
fflush(stdout); \
|
||
|
} while (0);
|
||
|
|
||
|
#define debugnode(msg,n) raxDebugShowNode(msg,n)
|
||
|
#else
|
||
|
#define debugf(...)
|
||
|
#define debugnode(msg,n)
|
||
|
#endif
|
||
|
|
||
|
/* ------------------------- raxStack functions --------------------------
|
||
|
* The raxStack is a simple stack of pointers that is capable of switching
|
||
|
* from using a stack-allocated array to dynamic heap once a given number of
|
||
|
* items are reached. It is used in order to retain the list of parent nodes
|
||
|
* while walking the radix tree in order to implement certain operations that
|
||
|
* need to navigate the tree upward.
|
||
|
* ------------------------------------------------------------------------- */
|
||
|
|
||
|
/* Initialize the stack. */
|
||
|
static inline void raxStackInit(raxStack *ts) {
|
||
|
ts->stack = ts->static_items;
|
||
|
ts->items = 0;
|
||
|
ts->maxitems = RAX_STACK_STATIC_ITEMS;
|
||
|
ts->oom = 0;
|
||
|
}
|
||
|
|
||
|
/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
|
||
|
static inline int raxStackPush(raxStack *ts, void *ptr) {
|
||
|
if (ts->items == ts->maxitems) {
|
||
|
if (ts->stack == ts->static_items) {
|
||
|
ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2);
|
||
|
if (ts->stack == NULL) {
|
||
|
ts->stack = ts->static_items;
|
||
|
ts->oom = 1;
|
||
|
return 0;
|
||
|
}
|
||
|
memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
|
||
|
} else {
|
||
|
void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
|
||
|
if (newalloc == NULL) {
|
||
|
ts->oom = 1;
|
||
|
return 0;
|
||
|
}
|
||
|
ts->stack = newalloc;
|
||
|
}
|
||
|
ts->maxitems *= 2;
|
||
|
}
|
||
|
ts->stack[ts->items] = ptr;
|
||
|
ts->items++;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Pop an item from the stack, the function returns NULL if there are no
|
||
|
* items to pop. */
|
||
|
static inline void *raxStackPop(raxStack *ts) {
|
||
|
if (ts->items == 0) return NULL;
|
||
|
ts->items--;
|
||
|
return ts->stack[ts->items];
|
||
|
}
|
||
|
|
||
|
/* Return the stack item at the top of the stack without actually consuming
|
||
|
* it. */
|
||
|
static inline void *raxStackPeek(raxStack *ts) {
|
||
|
if (ts->items == 0) return NULL;
|
||
|
return ts->stack[ts->items-1];
|
||
|
}
|
||
|
|
||
|
/* Free the stack in case we used heap allocation. */
|
||
|
static inline void raxStackFree(raxStack *ts) {
|
||
|
if (ts->stack != ts->static_items) rax_free(ts->stack);
|
||
|
}
|
||
|
|
||
|
/* ----------------------------------------------------------------------------
|
||
|
* Radis tree implementation
|
||
|
* --------------------------------------------------------------------------*/
|
||
|
|
||
|
/* Allocate a new non compressed node with the specified number of children.
|
||
|
* If datafiled is true, the allocation is made large enough to hold the
|
||
|
* associated data pointer.
|
||
|
* Returns the new node pointer. On out of memory NULL is returned. */
|
||
|
raxNode *raxNewNode(size_t children, int datafield) {
|
||
|
size_t nodesize = sizeof(raxNode)+children+
|
||
|
sizeof(raxNode*)*children;
|
||
|
if (datafield) nodesize += sizeof(void*);
|
||
|
raxNode *node = rax_malloc(nodesize);
|
||
|
if (node == NULL) return NULL;
|
||
|
node->iskey = 0;
|
||
|
node->isnull = 0;
|
||
|
node->iscompr = 0;
|
||
|
node->size = children;
|
||
|
return node;
|
||
|
}
|
||
|
|
||
|
/* Allocate a new rax and return its pointer. On out of memory the function
|
||
|
* returns NULL. */
|
||
|
rax *raxNew(void) {
|
||
|
rax *rax = rax_malloc(sizeof(*rax));
|
||
|
if (rax == NULL) return NULL;
|
||
|
rax->numele = 0;
|
||
|
rax->numnodes = 1;
|
||
|
rax->head = raxNewNode(0,0);
|
||
|
if (rax->head == NULL) {
|
||
|
rax_free(rax);
|
||
|
return NULL;
|
||
|
} else {
|
||
|
return rax;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Return the current total size of the node. */
|
||
|
#define raxNodeCurrentLength(n) ( \
|
||
|
sizeof(raxNode)+(n)->size+ \
|
||
|
((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
|
||
|
(((n)->iskey && !(n)->isnull)*sizeof(void*)) \
|
||
|
)
|
||
|
|
||
|
/* realloc the node to make room for auxiliary data in order
|
||
|
* to store an item in that node. On out of memory NULL is returned. */
|
||
|
raxNode *raxReallocForData(raxNode *n, void *data) {
|
||
|
if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
|
||
|
size_t curlen = raxNodeCurrentLength(n);
|
||
|
return rax_realloc(n,curlen+sizeof(void*));
|
||
|
}
|
||
|
|
||
|
/* Set the node auxiliary data to the specified pointer. */
|
||
|
void raxSetData(raxNode *n, void *data) {
|
||
|
n->iskey = 1;
|
||
|
if (data != NULL) {
|
||
|
void **ndata = (void**)
|
||
|
((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
|
||
|
memcpy(ndata,&data,sizeof(data));
|
||
|
n->isnull = 0;
|
||
|
} else {
|
||
|
n->isnull = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Get the node auxiliary data. */
|
||
|
void *raxGetData(raxNode *n) {
|
||
|
if (n->isnull) return NULL;
|
||
|
void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
|
||
|
void *data;
|
||
|
memcpy(&data,ndata,sizeof(data));
|
||
|
return data;
|
||
|
}
|
||
|
|
||
|
/* Add a new child to the node 'n' representing the character 'c' and return
|
||
|
* its new pointer, as well as the child pointer by reference. Additionally
|
||
|
* '***parentlink' is populated with the raxNode pointer-to-pointer of where
|
||
|
* the new child was stored, which is useful for the caller to replace the
|
||
|
* child pointer if it gets reallocated.
|
||
|
*
|
||
|
* On success the new parent node pointer is returned (it may change because
|
||
|
* of the realloc, so the caller should discard 'n' and use the new value).
|
||
|
* On out of memory NULL is returned, and the old node is still valid. */
|
||
|
raxNode *raxAddChild(raxNode *n, char c, raxNode **childptr, raxNode ***parentlink) {
|
||
|
assert(n->iscompr == 0);
|
||
|
|
||
|
size_t curlen = sizeof(raxNode)+
|
||
|
n->size+
|
||
|
sizeof(raxNode*)*n->size;
|
||
|
size_t newlen;
|
||
|
|
||
|
/* Alloc the new child we will link to 'n'. */
|
||
|
raxNode *child = raxNewNode(0,0);
|
||
|
if (child == NULL) return NULL;
|
||
|
|
||
|
/* Make space in the original node. */
|
||
|
if (n->iskey) curlen += sizeof(void*);
|
||
|
newlen = curlen+sizeof(raxNode*)+1; /* Add 1 char and 1 pointer. */
|
||
|
raxNode *newn = rax_realloc(n,newlen);
|
||
|
if (newn == NULL) {
|
||
|
rax_free(child);
|
||
|
return NULL;
|
||
|
}
|
||
|
n = newn;
|
||
|
|
||
|
/* After the reallocation, we have 5/9 (depending on the system
|
||
|
* pointer size) bytes at the end, that is, the additional char
|
||
|
* in the 'data' section, plus one pointer to the new child:
|
||
|
*
|
||
|
* [numc][abx][ap][bp][xp]|auxp|.....
|
||
|
*
|
||
|
* Let's find where to insert the new child in order to make sure
|
||
|
* it is inserted in-place lexicographically. */
|
||
|
int pos;
|
||
|
for (pos = 0; pos < n->size; pos++) {
|
||
|
if (n->data[pos] > c) break;
|
||
|
}
|
||
|
|
||
|
/* Now, if present, move auxiliary data pointer at the end
|
||
|
* so that we can mess with the other data without overwriting it.
|
||
|
* We will obtain something like that:
|
||
|
*
|
||
|
* [numc][abx][ap][bp][xp].....|auxp| */
|
||
|
unsigned char *src;
|
||
|
if (n->iskey && !n->isnull) {
|
||
|
src = n->data+n->size+sizeof(raxNode*)*n->size;
|
||
|
memmove(src+1+sizeof(raxNode*),src,sizeof(void*));
|
||
|
}
|
||
|
|
||
|
/* Now imagine we are adding a node with edge 'c'. The insertion
|
||
|
* point is between 'b' and 'x', so the 'pos' variable value is
|
||
|
* To start, move all the child pointers after the insertion point
|
||
|
* of 1+sizeof(pointer) bytes on the right, to obtain:
|
||
|
*
|
||
|
* [numc][abx][ap][bp].....[xp]|auxp| */
|
||
|
src = n->data+n->size+sizeof(raxNode*)*pos;
|
||
|
memmove(src+1+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
|
||
|
|
||
|
/* Now make the space for the additional char in the data section,
|
||
|
* but also move the pointers before the insertion point in the right
|
||
|
* by 1 byte, in order to obtain the following:
|
||
|
*
|
||
|
* [numc][ab.x][ap][bp]....[xp]|auxp| */
|
||
|
src = n->data+pos;
|
||
|
memmove(src+1,src,n->size-pos+sizeof(raxNode*)*pos);
|
||
|
|
||
|
/* We can now set the character and its child node pointer to get:
|
||
|
*
|
||
|
* [numc][abcx][ap][bp][cp]....|auxp|
|
||
|
* [numc][abcx][ap][bp][cp][xp]|auxp| */
|
||
|
n->data[pos] = c;
|
||
|
n->size++;
|
||
|
raxNode **childfield = (raxNode**)(n->data+n->size+sizeof(raxNode*)*pos);
|
||
|
memcpy(childfield,&child,sizeof(child));
|
||
|
*childptr = child;
|
||
|
*parentlink = childfield;
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
/* Return the pointer to the last child pointer in a node. For the compressed
|
||
|
* nodes this is the only child pointer. */
|
||
|
#define raxNodeLastChildPtr(n) ((raxNode**) ( \
|
||
|
((char*)(n)) + \
|
||
|
raxNodeCurrentLength(n) - \
|
||
|
sizeof(raxNode*) - \
|
||
|
(((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \
|
||
|
))
|
||
|
|
||
|
/* Return the pointer to the first child pointer. */
|
||
|
#define raxNodeFirstChildPtr(n) ((raxNode**)((n)->data+(n)->size))
|
||
|
|
||
|
/* Turn the node 'n', that must be a node without any children, into a
|
||
|
* compressed node representing a set of nodes linked one after the other
|
||
|
* and having exactly one child each. The node can be a key or not: this
|
||
|
* property and the associated value if any will be preserved.
|
||
|
*
|
||
|
* The function also returns a child node, since the last node of the
|
||
|
* compressed chain cannot be part of the chain: it has zero children while
|
||
|
* we can only compress inner nodes with exactly one child each. */
|
||
|
raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
|
||
|
assert(n->size == 0 && n->iscompr == 0);
|
||
|
void *data = NULL; /* Initialized only to avoid warnings. */
|
||
|
size_t newsize;
|
||
|
|
||
|
debugf("Compress node: %.*s\n", (int)len,s);
|
||
|
|
||
|
/* Allocate the child to link to this node. */
|
||
|
*child = raxNewNode(0,0);
|
||
|
if (*child == NULL) return NULL;
|
||
|
|
||
|
/* Make space in the parent node. */
|
||
|
newsize = sizeof(raxNode)+len+sizeof(raxNode*);
|
||
|
if (n->iskey) {
|
||
|
data = raxGetData(n); /* To restore it later. */
|
||
|
if (!n->isnull) newsize += sizeof(void*);
|
||
|
}
|
||
|
raxNode *newn = rax_realloc(n,newsize);
|
||
|
if (newn == NULL) {
|
||
|
rax_free(*child);
|
||
|
return NULL;
|
||
|
}
|
||
|
n = newn;
|
||
|
|
||
|
n->iscompr = 1;
|
||
|
n->size = len;
|
||
|
memcpy(n->data,s,len);
|
||
|
if (n->iskey) raxSetData(n,data);
|
||
|
raxNode **childfield = raxNodeLastChildPtr(n);
|
||
|
memcpy(childfield,&child,sizeof(child));
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
/* Low level function that walks the tree looking for the string
|
||
|
* 's' of 'len' bytes. The function returns the number of characters
|
||
|
* of the key that was possible to process: if the returned integer
|
||
|
* is the same as 'len', then it means that the node corresponding to the
|
||
|
* string was found (however it may not be a key in case the node->iskey is
|
||
|
* zero or if simply we stopped in the middle of a compressed node, so that
|
||
|
* 'splitpos' is non zero).
|
||
|
*
|
||
|
* Otherwise if the returned integer is not the same as 'len', there was an
|
||
|
* early stop during the tree walk because of a character mismatch.
|
||
|
*
|
||
|
* The node where the search ended (because the full string was processed
|
||
|
* or because there was an early stop) is returned by reference as
|
||
|
* '*stopnode' if the passed pointer is not NULL. This node link in the
|
||
|
* parent's node is returned as '*plink' if not NULL. Finally, if the
|
||
|
* search stopped in a compressed node, '*splitpos' returns the index
|
||
|
* inside the compressed node where the search ended. This is useful to
|
||
|
* know where to split the node for insertion. */
|
||
|
static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {
|
||
|
raxNode *h = rax->head;
|
||
|
raxNode **parentlink = &rax->head;
|
||
|
|
||
|
size_t i = 0; /* Position in the string. */
|
||
|
size_t j = 0; /* Position in the node children (or bytes if compressed).*/
|
||
|
while(h->size && i < len) {
|
||
|
debugnode("Lookup current node",h);
|
||
|
unsigned char *v = h->data;
|
||
|
|
||
|
if (h->iscompr) {
|
||
|
for (j = 0; j < h->size && i < len; j++, i++) {
|
||
|
if (v[j] != s[i]) break;
|
||
|
}
|
||
|
if (j != h->size) break;
|
||
|
} else {
|
||
|
/* Even when h->size is large, linear scan provides good
|
||
|
* performances compared to other approaches that are in theory
|
||
|
* more sounding, like performing a binary search. */
|
||
|
for (j = 0; j < h->size; j++) {
|
||
|
if (v[j] == s[i]) break;
|
||
|
}
|
||
|
if (j == h->size) break;
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */
|
||
|
raxNode **children = raxNodeFirstChildPtr(h);
|
||
|
if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
|
||
|
memcpy(&h,children+j,sizeof(h));
|
||
|
parentlink = children+j;
|
||
|
j = 0; /* If the new node is compressed and we do not
|
||
|
iterate again (since i == l) set the split
|
||
|
position to 0 to signal this node represents
|
||
|
the searched key. */
|
||
|
}
|
||
|
if (stopnode) *stopnode = h;
|
||
|
if (plink) *plink = parentlink;
|
||
|
if (splitpos && h->iscompr) *splitpos = j;
|
||
|
return i;
|
||
|
}
|
||
|
|
||
|
/* Insert the element 's' of size 'len', setting as auxiliary data
|
||
|
* the pointer 'data'. If the element is already present, the associated
|
||
|
* data is updated, and 0 is returned, otherwise the element is inserted
|
||
|
* and 1 is returned. On out of memory the function returns 0 as well but
|
||
|
* sets errno to ENOMEM, otherwise errno will be set to 0. */
|
||
|
int raxInsert(rax *rax, unsigned char *s, size_t len, void *data) {
|
||
|
size_t i;
|
||
|
int j = 0; /* Split position. If raxLowWalk() stops in a compressed
|
||
|
node, the index 'j' represents the char we stopped within the
|
||
|
compressed node, that is, the position where to split the
|
||
|
node for insertion. */
|
||
|
raxNode *h, **parentlink;
|
||
|
|
||
|
debugf("### Insert %.*s with value %p\n", (int)len, s, data);
|
||
|
i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
|
||
|
|
||
|
/* If i == len we walked following the whole string. If we are not
|
||
|
* in the middle of a compressed node, the string is either already
|
||
|
* inserted or this middle node is currently not a key, but can represent
|
||
|
* our key. We have just to reallocate the node and make space for the
|
||
|
* data pointer. */
|
||
|
if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
|
||
|
if (h->iskey) {
|
||
|
raxSetData(h,data);
|
||
|
errno = 0;
|
||
|
return 0; /* Element already exists. */
|
||
|
}
|
||
|
h = raxReallocForData(h,data);
|
||
|
if (h == NULL) {
|
||
|
errno = ENOMEM;
|
||
|
return 0;
|
||
|
}
|
||
|
memcpy(parentlink,&h,sizeof(h));
|
||
|
raxSetData(h,data);
|
||
|
rax->numele++;
|
||
|
return 1; /* Element inserted. */
|
||
|
}
|
||
|
|
||
|
/* If the node we stopped at is a compressed node, we need to
|
||
|
* split it before to continue.
|
||
|
*
|
||
|
* Splitting a compressed node have a few possibile cases.
|
||
|
* Imagine that the node 'h' we are currently at is a compressed
|
||
|
* node contaning the string "ANNIBALE" (it means that it represents
|
||
|
* nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
|
||
|
* pointer of this node pointing at the 'E' node, because remember that
|
||
|
* we have characters at the edges of the graph, not inside the nodes
|
||
|
* themselves.
|
||
|
*
|
||
|
* In order to show a real case imagine our node to also point to
|
||
|
* another compressed node, that finally points at the node without
|
||
|
* children, representing 'O':
|
||
|
*
|
||
|
* "ANNIBALE" -> "SCO" -> []
|
||
|
*
|
||
|
* When inserting we may face the following cases. Note that all the cases
|
||
|
* require the insertion of a non compressed node with exactly two
|
||
|
* children, except for the last case which just requires splitting a
|
||
|
* compressed node.
|
||
|
*
|
||
|
* 1) Inserting "ANNIENTARE"
|
||
|
*
|
||
|
* |B| -> "ALE" -> "SCO" -> []
|
||
|
* "ANNI" -> |-|
|
||
|
* |E| -> (... continue algo ...) "NTARE" -> []
|
||
|
*
|
||
|
* 2) Inserting "ANNIBALI"
|
||
|
*
|
||
|
* |E| -> "SCO" -> []
|
||
|
* "ANNIBAL" -> |-|
|
||
|
* |I| -> (... continue algo ...) []
|
||
|
*
|
||
|
* 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
|
||
|
*
|
||
|
* |N| -> "NIBALE" -> "SCO" -> []
|
||
|
* |A| -> |-|
|
||
|
* |G| -> (... continue algo ...) |O| -> []
|
||
|
*
|
||
|
* 4) Inserting "CIAO"
|
||
|
*
|
||
|
* |A| -> "NNIBALE" -> "SCO" -> []
|
||
|
* |-|
|
||
|
* |C| -> (... continue algo ...) "IAO" -> []
|
||
|
*
|
||
|
* 5) Inserting "ANNI"
|
||
|
*
|
||
|
* "ANNI" -> "BALE" -> "SCO" -> []
|
||
|
*
|
||
|
* The final algorithm for insertion covering all the above cases is as
|
||
|
* follows.
|
||
|
*
|
||
|
* ============================= ALGO 1 =============================
|
||
|
*
|
||
|
* For the above cases 1 to 4, that is, all cases where we stopped in
|
||
|
* the middle of a compressed node for a character mismatch, do:
|
||
|
*
|
||
|
* Let $SPLITPOS be the zero-based index at which, in the
|
||
|
* compressed node array of characters, we found the mismatching
|
||
|
* character. For example if the node contains "ANNIBALE" and we add
|
||
|
* "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
|
||
|
* mismatching character is found.
|
||
|
*
|
||
|
* 1. Save the current compressed node $NEXT pointer (the pointer to the
|
||
|
* child element, that is always present in compressed nodes).
|
||
|
*
|
||
|
* 2. Create "split node" having as child the non common letter
|
||
|
* at the compressed node. The other non common letter (at the key)
|
||
|
* will be added later as we continue the normal insertion algorithm
|
||
|
* at step "6".
|
||
|
*
|
||
|
* 3a. IF $SPLITPOS == 0:
|
||
|
* Replace the old node with the split node, by copying the auxiliary
|
||
|
* data if any. Fix parent's reference. Free old node eventually
|
||
|
* (we still need its data for the next steps of the algorithm).
|
||
|
*
|
||
|
* 3b. IF $SPLITPOS != 0:
|
||
|
* Trim the compressed node (reallocating it as well) in order to
|
||
|
* contain $splitpos characters. Change chilid pointer in order to link
|
||
|
* to the split node. If new compressed node len is just 1, set
|
||
|
* iscompr to 0 (layout is the same). Fix parent's reference.
|
||
|
*
|
||
|
* 4a. IF the postfix len (the length of the remaining string of the
|
||
|
* original compressed node after the split character) is non zero,
|
||
|
* create a "postfix node". If the postfix node has just one character
|
||
|
* set iscompr to 0, otherwise iscompr to 1. Set the postfix node
|
||
|
* child pointer to $NEXT.
|
||
|
*
|
||
|
* 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
|
||
|
*
|
||
|
* 5. Set child[0] of split node to postfix node.
|
||
|
*
|
||
|
* 6. Set the split node as the current node, set current index at child[1]
|
||
|
* and continue insertion algorithm as usually.
|
||
|
*
|
||
|
* ============================= ALGO 2 =============================
|
||
|
*
|
||
|
* For case 5, that is, if we stopped in the middle of a compressed
|
||
|
* node but no mismatch was found, do:
|
||
|
*
|
||
|
* Let $SPLITPOS be the zero-based index at which, in the
|
||
|
* compressed node array of characters, we stopped iterating because
|
||
|
* there were no more keys character to match. So in the example of
|
||
|
* the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4.
|
||
|
*
|
||
|
* 1. Save the current compressed node $NEXT pointer (the pointer to the
|
||
|
* child element, that is always present in compressed nodes).
|
||
|
*
|
||
|
* 2. Create a "postfix node" containing all the characters from $SPLITPOS
|
||
|
* to the end. Use $NEXT as the postfix node child pointer.
|
||
|
* If the postfix node length is 1, set iscompr to 0.
|
||
|
* Set the node as a key with the associated value of the new
|
||
|
* inserted key.
|
||
|
*
|
||
|
* 3. Trim the current node to contain the first $SPLITPOS characters.
|
||
|
* As usually if the new node length is just 1, set iscompr to 0.
|
||
|
* Take the iskey / associated value as it was in the orignal node.
|
||
|
* Fix the parent's reference.
|
||
|
*
|
||
|
* 4. Set the postfix node as the only child pointer of the trimmed
|
||
|
* node created at step 1.
|
||
|
*/
|
||
|
|
||
|
/* ------------------------- ALGORITHM 1 --------------------------- */
|
||
|
if (h->iscompr && i != len) {
|
||
|
debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
|
||
|
h->size, h->data, (void*)h);
|
||
|
debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
|
||
|
debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
|
||
|
debugf("Other (key) letter is '%c'\n", s[i]);
|
||
|
|
||
|
/* 1: Save next pointer. */
|
||
|
raxNode **childfield = raxNodeLastChildPtr(h);
|
||
|
raxNode *next;
|
||
|
memcpy(&next,childfield,sizeof(next));
|
||
|
debugf("Next is %p\n", (void*)next);
|
||
|
debugf("iskey %d\n", h->iskey);
|
||
|
if (h->iskey) {
|
||
|
debugf("key value is %p\n", raxGetData(h));
|
||
|
}
|
||
|
|
||
|
/* Set the length of the additional nodes we will need. */
|
||
|
size_t trimmedlen = j;
|
||
|
size_t postfixlen = h->size - j - 1;
|
||
|
int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
|
||
|
size_t nodesize;
|
||
|
|
||
|
/* 2: Create the split node. Also allocate the other nodes we'll need
|
||
|
* ASAP, so that it will be simpler to handle OOM. */
|
||
|
raxNode *splitnode = raxNewNode(1, split_node_is_key);
|
||
|
raxNode *trimmed = NULL;
|
||
|
raxNode *postfix = NULL;
|
||
|
|
||
|
if (trimmedlen) {
|
||
|
nodesize = sizeof(raxNode)+trimmedlen+sizeof(raxNode*);
|
||
|
if (h->iskey && !h->isnull) nodesize += sizeof(void*);
|
||
|
trimmed = rax_malloc(nodesize);
|
||
|
}
|
||
|
|
||
|
if (postfixlen) {
|
||
|
nodesize = sizeof(raxNode)+postfixlen+
|
||
|
sizeof(raxNode*);
|
||
|
postfix = rax_malloc(nodesize);
|
||
|
}
|
||
|
|
||
|
/* OOM? Abort now that the tree is untouched. */
|
||
|
if (splitnode == NULL ||
|
||
|
(trimmedlen && trimmed == NULL) ||
|
||
|
(postfixlen && postfix == NULL))
|
||
|
{
|
||
|
rax_free(splitnode);
|
||
|
rax_free(trimmed);
|
||
|
rax_free(postfix);
|
||
|
errno = ENOMEM;
|
||
|
return 0;
|
||
|
}
|
||
|
splitnode->data[0] = h->data[j];
|
||
|
|
||
|
if (j == 0) {
|
||
|
/* 3a: Replace the old node with the split node. */
|
||
|
if (h->iskey) {
|
||
|
void *ndata = raxGetData(h);
|
||
|
raxSetData(splitnode,ndata);
|
||
|
}
|
||
|
memcpy(parentlink,&splitnode,sizeof(splitnode));
|
||
|
} else {
|
||
|
/* 3b: Trim the compressed node. */
|
||
|
trimmed->size = j;
|
||
|
memcpy(trimmed->data,h->data,j);
|
||
|
trimmed->iscompr = j > 1 ? 1 : 0;
|
||
|
trimmed->iskey = h->iskey;
|
||
|
trimmed->isnull = h->isnull;
|
||
|
if (h->iskey && !h->isnull) {
|
||
|
void *ndata = raxGetData(h);
|
||
|
raxSetData(trimmed,ndata);
|
||
|
}
|
||
|
raxNode **cp = raxNodeLastChildPtr(trimmed);
|
||
|
memcpy(cp,&splitnode,sizeof(splitnode));
|
||
|
memcpy(parentlink,&trimmed,sizeof(trimmed));
|
||
|
parentlink = cp; /* Set parentlink to splitnode parent. */
|
||
|
rax->numnodes++;
|
||
|
}
|
||
|
|
||
|
/* 4: Create the postfix node: what remains of the original
|
||
|
* compressed node after the split. */
|
||
|
if (postfixlen) {
|
||
|
/* 4a: create a postfix node. */
|
||
|
postfix->iskey = 0;
|
||
|
postfix->isnull = 0;
|
||
|
postfix->size = postfixlen;
|
||
|
postfix->iscompr = postfixlen > 1;
|
||
|
memcpy(postfix->data,h->data+j+1,postfixlen);
|
||
|
raxNode **cp = raxNodeLastChildPtr(postfix);
|
||
|
memcpy(cp,&next,sizeof(next));
|
||
|
rax->numnodes++;
|
||
|
} else {
|
||
|
/* 4b: just use next as postfix node. */
|
||
|
postfix = next;
|
||
|
}
|
||
|
|
||
|
/* 5: Set splitnode first child as the postfix node. */
|
||
|
raxNode **splitchild = raxNodeLastChildPtr(splitnode);
|
||
|
memcpy(splitchild,&postfix,sizeof(postfix));
|
||
|
|
||
|
/* 6. Continue insertion: this will cause the splitnode to
|
||
|
* get a new child (the non common character at the currently
|
||
|
* inserted key). */
|
||
|
rax_free(h);
|
||
|
h = splitnode;
|
||
|
} else if (h->iscompr && i == len) {
|
||
|
/* ------------------------- ALGORITHM 2 --------------------------- */
|
||
|
debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",
|
||
|
h->size, h->data, (void*)h, j);
|
||
|
|
||
|
/* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */
|
||
|
size_t postfixlen = h->size - j;
|
||
|
size_t nodesize = sizeof(raxNode)+postfixlen+sizeof(raxNode*);
|
||
|
if (data != NULL) nodesize += sizeof(void*);
|
||
|
raxNode *postfix = rax_malloc(nodesize);
|
||
|
|
||
|
nodesize = sizeof(raxNode)+j+sizeof(raxNode*);
|
||
|
if (h->iskey && !h->isnull) nodesize += sizeof(void*);
|
||
|
raxNode *trimmed = rax_malloc(nodesize);
|
||
|
|
||
|
if (postfix == NULL || trimmed == NULL) {
|
||
|
rax_free(postfix);
|
||
|
rax_free(trimmed);
|
||
|
errno = ENOMEM;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* 1: Save next pointer. */
|
||
|
raxNode **childfield = raxNodeLastChildPtr(h);
|
||
|
raxNode *next;
|
||
|
memcpy(&next,childfield,sizeof(next));
|
||
|
|
||
|
/* 2: Create the postfix node. */
|
||
|
postfix->size = postfixlen;
|
||
|
postfix->iscompr = postfixlen > 1;
|
||
|
postfix->iskey = 1;
|
||
|
postfix->isnull = 0;
|
||
|
memcpy(postfix->data,h->data+j,postfixlen);
|
||
|
raxSetData(postfix,data);
|
||
|
raxNode **cp = raxNodeLastChildPtr(postfix);
|
||
|
memcpy(cp,&next,sizeof(next));
|
||
|
rax->numnodes++;
|
||
|
|
||
|
/* 3: Trim the compressed node. */
|
||
|
trimmed->size = j;
|
||
|
trimmed->iscompr = j > 1;
|
||
|
trimmed->iskey = 0;
|
||
|
trimmed->isnull = 0;
|
||
|
memcpy(trimmed->data,h->data,j);
|
||
|
memcpy(parentlink,&trimmed,sizeof(trimmed));
|
||
|
if (h->iskey) {
|
||
|
void *aux = raxGetData(h);
|
||
|
raxSetData(trimmed,aux);
|
||
|
}
|
||
|
|
||
|
/* Fix the trimmed node child pointer to point to
|
||
|
* the postfix node. */
|
||
|
cp = raxNodeLastChildPtr(trimmed);
|
||
|
memcpy(cp,&postfix,sizeof(postfix));
|
||
|
|
||
|
/* Finish! We don't need to contine with the insertion
|
||
|
* algorithm for ALGO 2. The key is already inserted. */
|
||
|
rax->numele++;
|
||
|
return 1; /* Key inserted. */
|
||
|
}
|
||
|
|
||
|
/* We walked the radix tree as far as we could, but still there are left
|
||
|
* chars in our string. We need to insert the missing nodes.
|
||
|
* Note: while loop never entered if the node was split by ALGO2,
|
||
|
* since i == len. */
|
||
|
while(i < len) {
|
||
|
raxNode *child;
|
||
|
rax->numnodes++;
|
||
|
|
||
|
/* If this node is going to have a single child, and there
|
||
|
* are other characters, so that that would result in a chain
|
||
|
* of single-childed nodes, turn it into a compressed node. */
|
||
|
if (h->size == 0 && len-i > 1) {
|
||
|
debugf("Inserting compressed node\n");
|
||
|
size_t comprsize = len-i;
|
||
|
if (comprsize > RAX_NODE_MAX_SIZE)
|
||
|
comprsize = RAX_NODE_MAX_SIZE;
|
||
|
raxNode *newh = raxCompressNode(h,s+i,comprsize,&child);
|
||
|
if (newh == NULL) goto oom;
|
||
|
h = newh;
|
||
|
memcpy(parentlink,&h,sizeof(h));
|
||
|
parentlink = raxNodeLastChildPtr(h);
|
||
|
i += comprsize;
|
||
|
} else {
|
||
|
debugf("Inserting normal node\n");
|
||
|
raxNode **new_parentlink;
|
||
|
raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);
|
||
|
if (newh == NULL) goto oom;
|
||
|
h = newh;
|
||
|
memcpy(parentlink,&h,sizeof(h));
|
||
|
parentlink = new_parentlink;
|
||
|
i++;
|
||
|
}
|
||
|
h = child;
|
||
|
}
|
||
|
raxNode *newh = raxReallocForData(h,data);
|
||
|
if (newh == NULL) goto oom;
|
||
|
h = newh;
|
||
|
if (!h->iskey) rax->numele++;
|
||
|
raxSetData(h,data);
|
||
|
memcpy(parentlink,&h,sizeof(h));
|
||
|
return 1; /* Element inserted. */
|
||
|
|
||
|
oom:
|
||
|
/* This code path handles out of memory after part of the sub-tree was
|
||
|
* already added. Set the node as a key, and then remove it. */
|
||
|
h->isnull = 1;
|
||
|
h->iskey = 1;
|
||
|
raxRemove(rax,s,i);
|
||
|
errno = ENOMEM;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* Find a key in the rax, returns raxNotFound special void pointer value
|
||
|
* if the item was not found, otherwise the value associated with the
|
||
|
* item is returned. */
|
||
|
void *raxFind(rax *rax, unsigned char *s, size_t len) {
|
||
|
raxNode *h;
|
||
|
|
||
|
debugf("### Lookup: %.*s\n", (int)len, s);
|
||
|
int splitpos = 0;
|
||
|
size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL);
|
||
|
if (i != len || (h->iscompr && splitpos != 0) || !h->iskey)
|
||
|
return raxNotFound;
|
||
|
return raxGetData(h);
|
||
|
}
|
||
|
|
||
|
/* Return the memory address where the 'parent' node stores the specified
|
||
|
* 'child' pointer, so that the caller can update the pointer with another
|
||
|
* one if needed. The function assumes it will find a match, otherwise the
|
||
|
* operation is an undefined behavior (it will continue scanning the
|
||
|
* memory without any bound checking). */
|
||
|
raxNode **raxFindParentLink(raxNode *parent, raxNode *child) {
|
||
|
raxNode **cp = raxNodeFirstChildPtr(parent);
|
||
|
raxNode *c;
|
||
|
while(1) {
|
||
|
memcpy(&c,cp,sizeof(c));
|
||
|
if (c == child) break;
|
||
|
cp++;
|
||
|
}
|
||
|
return cp;
|
||
|
}
|
||
|
|
||
|
/* Low level child removal from node. The new node pointer (after the child
|
||
|
* removal) is returned. Note that this function does not fix the pointer
|
||
|
* of the parent node in its parent, so this task is up to the caller.
|
||
|
* The function never fails for out of memory. */
|
||
|
raxNode *raxRemoveChild(raxNode *parent, raxNode *child) {
|
||
|
debugnode("raxRemoveChild before", parent);
|
||
|
/* If parent is a compressed node (having a single child, as for definition
|
||
|
* of the data structure), the removal of the child consists into turning
|
||
|
* it into a normal node without children. */
|
||
|
if (parent->iscompr) {
|
||
|
void *data = NULL;
|
||
|
if (parent->iskey) data = raxGetData(parent);
|
||
|
parent->isnull = 0;
|
||
|
parent->iscompr = 0;
|
||
|
parent->size = 0;
|
||
|
if (parent->iskey) raxSetData(parent,data);
|
||
|
debugnode("raxRemoveChild after", parent);
|
||
|
return parent;
|
||
|
}
|
||
|
|
||
|
/* Otherwise we need to scan for the children pointer and memmove()
|
||
|
* accordingly.
|
||
|
*
|
||
|
* 1. To start we seek the first element in both the children
|
||
|
* pointers and edge bytes in the node. */
|
||
|
raxNode **cp = raxNodeLastChildPtr(parent) - (parent->size-1);
|
||
|
raxNode **c = cp;
|
||
|
unsigned char *e = parent->data;
|
||
|
|
||
|
/* 2. Search the child pointer to remove inside the array of children
|
||
|
* pointers. */
|
||
|
while(1) {
|
||
|
raxNode *aux;
|
||
|
memcpy(&aux,c,sizeof(aux));
|
||
|
if (aux == child) break;
|
||
|
c++;
|
||
|
e++;
|
||
|
}
|
||
|
|
||
|
/* 3. Remove the edge and the pointer by memmoving the remaining children
|
||
|
* pointer and edge bytes one position before. */
|
||
|
int taillen = parent->size - (e - parent->data) - 1;
|
||
|
debugf("raxRemoveChild tail len: %d\n", taillen);
|
||
|
memmove(e,e+1,taillen);
|
||
|
|
||
|
/* Since we have one data byte less, also child pointers start one byte
|
||
|
* before now. */
|
||
|
memmove(((char*)cp)-1,cp,(parent->size-taillen-1)*sizeof(raxNode**));
|
||
|
|
||
|
/* Move the remaining "tail" pointer at the right position as well. */
|
||
|
memmove(((char*)c)-1,c+1,taillen*sizeof(raxNode**));
|
||
|
|
||
|
/* 4. Update size. */
|
||
|
parent->size--;
|
||
|
|
||
|
/* realloc the node according to the theoretical memory usage, to free
|
||
|
* data if we are over-allocating right now. */
|
||
|
raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent));
|
||
|
debugnode("raxRemoveChild after", newnode);
|
||
|
/* Note: if rax_realloc() fails we just return the old address, which
|
||
|
* is valid. */
|
||
|
return newnode ? newnode : parent;
|
||
|
}
|
||
|
|
||
|
/* Remove the specified item. Returns 1 if the item was found and
|
||
|
* deleted, 0 otherwise. */
|
||
|
int raxRemove(rax *rax, unsigned char *s, size_t len) {
|
||
|
raxNode *h;
|
||
|
raxStack ts;
|
||
|
|
||
|
debugf("### Delete: %.*s\n", (int)len, s);
|
||
|
raxStackInit(&ts);
|
||
|
int splitpos = 0;
|
||
|
size_t i = raxLowWalk(rax,s,len,&h,NULL,NULL,&ts);
|
||
|
if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {
|
||
|
raxStackFree(&ts);
|
||
|
return 0;
|
||
|
}
|
||
|
h->iskey = 0;
|
||
|
rax->numele--;
|
||
|
|
||
|
/* If this node has no children, the deletion needs to reclaim the
|
||
|
* no longer used nodes. This is an iterative process that needs to
|
||
|
* walk the three upward, deleting all the nodes with just one child
|
||
|
* that are not keys, until the head of the rax is reached or the first
|
||
|
* node with more than one child is found. */
|
||
|
|
||
|
int trycompress = 0; /* Will be set to 1 if we should try to optimize the
|
||
|
tree resulting from the deletion. */
|
||
|
|
||
|
if (h->size == 0) {
|
||
|
debugf("Key deleted in node without children. Cleanup needed.\n");
|
||
|
raxNode *child = NULL;
|
||
|
while(h != rax->head) {
|
||
|
child = h;
|
||
|
debugf("Freeing child %p [%.*s] key:%d\n", (void*)child,
|
||
|
(int)child->size, (char*)child->data, child->iskey);
|
||
|
rax_free(child);
|
||
|
rax->numnodes--;
|
||
|
h = raxStackPop(&ts);
|
||
|
/* If this node has more then one child, or actually holds
|
||
|
* a key, stop here. */
|
||
|
if (h->iskey || (!h->iscompr && h->size != 1)) break;
|
||
|
}
|
||
|
if (child) {
|
||
|
debugf("Unlinking child %p from parent %p\n",
|
||
|
(void*)child, (void*)h);
|
||
|
raxNode *new = raxRemoveChild(h,child);
|
||
|
if (new != h) {
|
||
|
raxNode *parent = raxStackPeek(&ts);
|
||
|
raxNode **parentlink;
|
||
|
if (parent == NULL) {
|
||
|
parentlink = &rax->head;
|
||
|
} else {
|
||
|
parentlink = raxFindParentLink(parent,h);
|
||
|
}
|
||
|
memcpy(parentlink,&new,sizeof(new));
|
||
|
}
|
||
|
|
||
|
/* If after the removal the node has just a single child
|
||
|
* and is not a key, we need to try to compress it. */
|
||
|
if (new->size == 1 && new->iskey == 0) {
|
||
|
trycompress = 1;
|
||
|
h = new;
|
||
|
}
|
||
|
}
|
||
|
} else if (h->size == 1) {
|
||
|
/* If the node had just one child, after the removal of the key
|
||
|
* further compression with adjacent nodes is pontentially possible. */
|
||
|
trycompress = 1;
|
||
|
}
|
||
|
|
||
|
/* Don't try node compression if our nodes pointers stack is not
|
||
|
* complete because of OOM while executing raxLowWalk() */
|
||
|
if (trycompress && ts.oom) trycompress = 0;
|
||
|
|
||
|
/* Recompression: if trycompress is true, 'h' points to a radix tree node
|
||
|
* that changed in a way that could allow to compress nodes in this
|
||
|
* sub-branch. Compressed nodes represent chains of nodes that are not
|
||
|
* keys and have a single child, so there are two deletion events that
|
||
|
* may alter the tree so that further compression is needed:
|
||
|
*
|
||
|
* 1) A node with a single child was a key and now no longer is a key.
|
||
|
* 2) A node with two children now has just one child.
|
||
|
*
|
||
|
* We try to navigate upward till there are other nodes that can be
|
||
|
* compressed, when we reach the upper node which is not a key and has
|
||
|
* a single child, we scan the chain of children to collect the
|
||
|
* compressable part of the tree, and replace the current node with the
|
||
|
* new one, fixing the child pointer to reference the first non
|
||
|
* compressable node.
|
||
|
*
|
||
|
* Example of case "1". A tree stores the keys "FOO" = 1 and
|
||
|
* "FOOBAR" = 2:
|
||
|
*
|
||
|
*
|
||
|
* "FOO" -> "BAR" -> [] (2)
|
||
|
* (1)
|
||
|
*
|
||
|
* After the removal of "FOO" the tree can be compressed as:
|
||
|
*
|
||
|
* "FOOBAR" -> [] (2)
|
||
|
*
|
||
|
*
|
||
|
* Example of case "2". A tree stores the keys "FOOBAR" = 1 and
|
||
|
* "FOOTER" = 2:
|
||
|
*
|
||
|
* |B| -> "AR" -> [] (1)
|
||
|
* "FOO" -> |-|
|
||
|
* |T| -> "ER" -> [] (2)
|
||
|
*
|
||
|
* After the removal of "FOOTER" the resulting tree is:
|
||
|
*
|
||
|
* "FOO" -> |B| -> "AR" -> [] (1)
|
||
|
*
|
||
|
* That can be compressed into:
|
||
|
*
|
||
|
* "FOOBAR" -> [] (1)
|
||
|
*/
|
||
|
if (trycompress) {
|
||
|
debugf("After removing %.*s:\n", (int)len, s);
|
||
|
debugnode("Compression may be needed",h);
|
||
|
debugf("Seek start node\n");
|
||
|
|
||
|
/* Try to reach the upper node that is compressible.
|
||
|
* At the end of the loop 'h' will point to the first node we
|
||
|
* can try to compress and 'parent' to its parent. */
|
||
|
raxNode *parent;
|
||
|
while(1) {
|
||
|
parent = raxStackPop(&ts);
|
||
|
if (!parent || parent->iskey ||
|
||
|
(!parent->iscompr && parent->size != 1)) break;
|
||
|
h = parent;
|
||
|
debugnode("Going up to",h);
|
||
|
}
|
||
|
raxNode *start = h; /* Compression starting node. */
|
||
|
|
||
|
/* Scan chain of nodes we can compress. */
|
||
|
size_t comprsize = h->size;
|
||
|
int nodes = 1;
|
||
|
while(h->size != 0) {
|
||
|
raxNode **cp = raxNodeLastChildPtr(h);
|
||
|
memcpy(&h,cp,sizeof(h));
|
||
|
if (h->iskey || (!h->iscompr && h->size != 1)) break;
|
||
|
nodes++;
|
||
|
comprsize += h->size;
|
||
|
}
|
||
|
if (nodes > 1) {
|
||
|
/* If we can compress, create the new node and populate it. */
|
||
|
size_t nodesize =
|
||
|
sizeof(raxNode)+comprsize+sizeof(raxNode*);
|
||
|
raxNode *new = rax_malloc(nodesize);
|
||
|
/* An out of memory here just means we cannot optimize this
|
||
|
* node, but the tree is left in a consistent state. */
|
||
|
if (new == NULL) {
|
||
|
raxStackFree(&ts);
|
||
|
return 1;
|
||
|
}
|
||
|
new->iskey = 0;
|
||
|
new->isnull = 0;
|
||
|
new->iscompr = 1;
|
||
|
new->size = comprsize;
|
||
|
rax->numnodes++;
|
||
|
|
||
|
/* Scan again, this time to populate the new node content and
|
||
|
* to fix the new node child pointer. At the same time we free
|
||
|
* all the nodes that we'll no longer use. */
|
||
|
comprsize = 0;
|
||
|
h = start;
|
||
|
while(h->size != 0) {
|
||
|
memcpy(new->data+comprsize,h->data,h->size);
|
||
|
comprsize += h->size;
|
||
|
raxNode **cp = raxNodeLastChildPtr(h);
|
||
|
raxNode *tofree = h;
|
||
|
memcpy(&h,cp,sizeof(h));
|
||
|
rax_free(tofree); rax->numnodes--;
|
||
|
if (h->iskey || (!h->iscompr && h->size != 1)) break;
|
||
|
}
|
||
|
debugnode("New node",new);
|
||
|
|
||
|
/* Now 'h' points to the first node that we still need to use,
|
||
|
* so our new node child pointer will point to it. */
|
||
|
raxNode **cp = raxNodeLastChildPtr(new);
|
||
|
memcpy(cp,&h,sizeof(h));
|
||
|
|
||
|
/* Fix parent link. */
|
||
|
if (parent) {
|
||
|
raxNode **parentlink = raxFindParentLink(parent,start);
|
||
|
memcpy(parentlink,&new,sizeof(new));
|
||
|
} else {
|
||
|
rax->head = new;
|
||
|
}
|
||
|
|
||
|
debugf("Compressed %d nodes, %d total bytes\n",
|
||
|
nodes, (int)comprsize);
|
||
|
}
|
||
|
}
|
||
|
raxStackFree(&ts);
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* This is the core of raxFree(): performs a depth-first scan of the
|
||
|
* tree and releases all the nodes found. */
|
||
|
void raxRecursiveFree(rax *rax, raxNode *n) {
|
||
|
int numchildren = n->iscompr ? 1 : n->size;
|
||
|
raxNode **cp = raxNodeLastChildPtr(n);
|
||
|
while(numchildren--) {
|
||
|
raxNode *child;
|
||
|
memcpy(&child,cp,sizeof(child));
|
||
|
raxRecursiveFree(rax,child);
|
||
|
cp--;
|
||
|
}
|
||
|
debugnode("free depth-first",n);
|
||
|
rax_free(n);
|
||
|
rax->numnodes--;
|
||
|
}
|
||
|
|
||
|
/* Free a whole radix tree. */
|
||
|
void raxFree(rax *rax) {
|
||
|
raxRecursiveFree(rax,rax->head);
|
||
|
assert(rax->numnodes == 0);
|
||
|
rax_free(rax);
|
||
|
}
|
||
|
|
||
|
/* ------------------------------- Iterator --------------------------------- */
|
||
|
|
||
|
/* Initialize a Rax iterator. This call should be performed a single time
|
||
|
* to initialize the iterator, and must be followed by a raxSeek() call,
|
||
|
* otherwise the raxPrev()/raxNext() functions will just return EOF. */
|
||
|
void raxStart(raxIterator *it, rax *rt) {
|
||
|
it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */
|
||
|
it->rt = rt;
|
||
|
it->key_len = 0;
|
||
|
it->key = it->key_static_string;
|
||
|
it->key_max = RAX_ITER_STATIC_LEN;
|
||
|
it->data = NULL;
|
||
|
raxStackInit(&it->stack);
|
||
|
}
|
||
|
|
||
|
/* Append characters at the current key string of the iterator 'it'. This
|
||
|
* is a low level function used to implement the iterator, not callable by
|
||
|
* the user. Returns 0 on out of memory, otherwise 1 is returned. */
|
||
|
int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) {
|
||
|
if (it->key_max < it->key_len+len) {
|
||
|
unsigned char *old = (it->key == it->key_static_string) ? NULL :
|
||
|
it->key;
|
||
|
size_t new_max = (it->key_len+len)*2;
|
||
|
it->key = rax_realloc(old,new_max);
|
||
|
if (it->key == NULL) {
|
||
|
it->key = (!old) ? it->key_static_string : old;
|
||
|
return 0;
|
||
|
}
|
||
|
if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len);
|
||
|
it->key_max = new_max;
|
||
|
}
|
||
|
/* Use memmove since there could be an overlap between 's' and
|
||
|
* it->key when we use the current key in order to re-seek. */
|
||
|
memmove(it->key+it->key_len,s,len);
|
||
|
it->key_len += len;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Remove the specified number of chars from the right of the current
|
||
|
* iterator key. */
|
||
|
void raxIteratorDelChars(raxIterator *it, size_t count) {
|
||
|
it->key_len -= count;
|
||
|
}
|
||
|
|
||
|
/* Do an iteration step towards the next element. At the end of the step the
|
||
|
* iterator key will represent the (new) current key. If it is not possible
|
||
|
* to step in the specified direction since there are no longer elements, the
|
||
|
* iterator is flagged with RAX_ITER_EOF.
|
||
|
*
|
||
|
* If 'noup' is true the function starts directly scanning for the next
|
||
|
* lexicographically smaller children, and the current node is already assumed
|
||
|
* to be the parent of the last key node, so the first operation to go back to
|
||
|
* the parent will be skipped. This option is used by raxSeek() when
|
||
|
* implementing seeking a non existing element with the ">" or "<" options:
|
||
|
* the starting node is not a key in that particular case, so we start the scan
|
||
|
* from a node that does not represent the key set.
|
||
|
*
|
||
|
* The function returns 1 on success or 0 on out of memory. */
|
||
|
int raxIteratorNextStep(raxIterator *it, int noup) {
|
||
|
if (it->flags & RAX_ITER_EOF) {
|
||
|
return 0;
|
||
|
} else if (it->flags & RAX_ITER_JUST_SEEKED) {
|
||
|
it->flags &= ~RAX_ITER_JUST_SEEKED;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Save key len, stack items and the node where we are currently
|
||
|
* so that on iterator EOF we can restore the current key and state. */
|
||
|
size_t orig_key_len = it->key_len;
|
||
|
size_t orig_stack_items = it->stack.items;
|
||
|
raxNode *orig_node = it->node;
|
||
|
|
||
|
/* Clear the EOF flag: it will be set again if the EOF condition
|
||
|
* is still valid. */
|
||
|
it->flags &= ~RAX_ITER_EOF;
|
||
|
|
||
|
while(1) {
|
||
|
int children = it->node->iscompr ? 1 : it->node->size;
|
||
|
if (!noup && children) {
|
||
|
debugf("GO DEEPER\n");
|
||
|
/* Seek the lexicographically smaller key in this subtree, which
|
||
|
* is the first one found always going torwards the first child
|
||
|
* of every successive node. */
|
||
|
if (!raxStackPush(&it->stack,it->node)) return 0;
|
||
|
raxNode **cp = raxNodeFirstChildPtr(it->node);
|
||
|
if (!raxIteratorAddChars(it,it->node->data,
|
||
|
it->node->iscompr ? it->node->size : 1)) return 0;
|
||
|
memcpy(&it->node,cp,sizeof(it->node));
|
||
|
/* For "next" step, stop every time we find a key along the
|
||
|
* way, since the key is lexicograhically smaller compared to
|
||
|
* what follows in the sub-children. */
|
||
|
if (it->node->iskey) {
|
||
|
it->data = raxGetData(it->node);
|
||
|
return 1;
|
||
|
}
|
||
|
} else {
|
||
|
/* If we finished exporing the previous sub-tree, switch to the
|
||
|
* new one: go upper until a node is found where there are
|
||
|
* children representing keys lexicographically greater than the
|
||
|
* current key. */
|
||
|
while(1) {
|
||
|
/* Already on head? Can't go up, iteration finished. */
|
||
|
if (!noup && it->node == it->rt->head) {
|
||
|
it->flags |= RAX_ITER_EOF;
|
||
|
it->stack.items = orig_stack_items;
|
||
|
it->key_len = orig_key_len;
|
||
|
it->node = orig_node;
|
||
|
return 1;
|
||
|
}
|
||
|
/* If there are no children at the current node, try parent's
|
||
|
* next child. */
|
||
|
unsigned char prevchild = it->key[it->key_len-1];
|
||
|
if (!noup) {
|
||
|
it->node = raxStackPop(&it->stack);
|
||
|
} else {
|
||
|
noup = 0;
|
||
|
}
|
||
|
/* Adjust the current key to represent the node we are
|
||
|
* at. */
|
||
|
int todel = it->node->iscompr ? it->node->size : 1;
|
||
|
raxIteratorDelChars(it,todel);
|
||
|
|
||
|
/* Try visitng the next child if there was at least one
|
||
|
* additional child. */
|
||
|
if (!it->node->iscompr && it->node->size > 1) {
|
||
|
raxNode **cp = raxNodeFirstChildPtr(it->node);
|
||
|
int i = 0;
|
||
|
while (i < it->node->size) {
|
||
|
debugf("SCAN NEXT %c\n", it->node->data[i]);
|
||
|
if (it->node->data[i] > prevchild) break;
|
||
|
i++;
|
||
|
cp++;
|
||
|
}
|
||
|
if (i != it->node->size) {
|
||
|
debugf("SCAN found a new node\n");
|
||
|
raxIteratorAddChars(it,it->node->data+i,1);
|
||
|
if (!raxStackPush(&it->stack,it->node)) return 0;
|
||
|
memcpy(&it->node,cp,sizeof(it->node));
|
||
|
if (it->node->iskey) {
|
||
|
it->data = raxGetData(it->node);
|
||
|
return 1;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Seek the grestest key in the subtree at the current node. Return 0 on
|
||
|
* out of memory, otherwise 1. This is an helper function for different
|
||
|
* iteration functions below. */
|
||
|
int raxSeekGreatest(raxIterator *it) {
|
||
|
while(it->node->size) {
|
||
|
if (it->node->iscompr) {
|
||
|
if (!raxIteratorAddChars(it,it->node->data,
|
||
|
it->node->size)) return 0;
|
||
|
} else {
|
||
|
if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1))
|
||
|
return 0;
|
||
|
}
|
||
|
raxNode **cp = raxNodeLastChildPtr(it->node);
|
||
|
if (!raxStackPush(&it->stack,it->node)) return 0;
|
||
|
memcpy(&it->node,cp,sizeof(it->node));
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Like raxIteratorNextStep() but implements an iteration step moving
|
||
|
* to the lexicographically previous element. The 'noup' option has a similar
|
||
|
* effect to the one of raxIteratorPrevSte(). */
|
||
|
int raxIteratorPrevStep(raxIterator *it, int noup) {
|
||
|
if (it->flags & RAX_ITER_EOF) {
|
||
|
return 0;
|
||
|
} else if (it->flags & RAX_ITER_JUST_SEEKED) {
|
||
|
it->flags &= ~RAX_ITER_JUST_SEEKED;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Save key len, stack items and the node where we are currently
|
||
|
* so that on iterator EOF we can restore the current key and state. */
|
||
|
size_t orig_key_len = it->key_len;
|
||
|
size_t orig_stack_items = it->stack.items;
|
||
|
raxNode *orig_node = it->node;
|
||
|
|
||
|
while(1) {
|
||
|
/* Already on head? Can't go up, iteration finished. */
|
||
|
if (!noup && it->node == it->rt->head) {
|
||
|
it->flags |= RAX_ITER_EOF;
|
||
|
it->stack.items = orig_stack_items;
|
||
|
it->key_len = orig_key_len;
|
||
|
it->node = orig_node;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
unsigned char prevchild = it->key[it->key_len-1];
|
||
|
if (!noup) {
|
||
|
it->node = raxStackPop(&it->stack);
|
||
|
} else {
|
||
|
noup = 0;
|
||
|
}
|
||
|
|
||
|
/* Adjust the current key to represent the node we are
|
||
|
* at. */
|
||
|
int todel = it->node->iscompr ? it->node->size : 1;
|
||
|
raxIteratorDelChars(it,todel);
|
||
|
|
||
|
/* Try visiting the prev child if there was at least one
|
||
|
* additional child. */
|
||
|
if (!it->node->iscompr && it->node->size > 1) {
|
||
|
raxNode **cp = raxNodeLastChildPtr(it->node);
|
||
|
int i = it->node->size-1;
|
||
|
while (i >= 0) {
|
||
|
debugf("SCAN PREV %c\n", it->node->data[i]);
|
||
|
if (it->node->data[i] < prevchild) break;
|
||
|
i--;
|
||
|
cp--;
|
||
|
}
|
||
|
/* If we found a new subtree to explore in this node,
|
||
|
* go deeper following all the last children in order to
|
||
|
* find the key lexicographically greater. */
|
||
|
if (i != -1) {
|
||
|
debugf("SCAN found a new node\n");
|
||
|
/* Enter the node we just found. */
|
||
|
if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0;
|
||
|
if (!raxStackPush(&it->stack,it->node)) return 0;
|
||
|
memcpy(&it->node,cp,sizeof(it->node));
|
||
|
/* Seek sub-tree max. */
|
||
|
if (!raxSeekGreatest(it)) return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Return the key: this could be the key we found scanning a new
|
||
|
* subtree, or if we did not find a new subtree to explore here,
|
||
|
* before giving up with this node, check if it's a key itself. */
|
||
|
if (it->node->iskey) {
|
||
|
it->data = raxGetData(it->node);
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Seek an iterator at the specified element.
|
||
|
* Return 0 if the seek failed for syntax error or out of memory. Otherwise
|
||
|
* 1 is returned. */
|
||
|
int raxSeek(raxIterator *it, unsigned char *ele, size_t len, const char *op) {
|
||
|
int eq = 0, lt = 0, gt = 0, first = 0, last = 0;
|
||
|
|
||
|
it->stack.items = 0; /* Just resetting. Intialized by raxStart(). */
|
||
|
it->flags |= RAX_ITER_JUST_SEEKED;
|
||
|
it->flags &= ~RAX_ITER_EOF;
|
||
|
it->key_len = 0;
|
||
|
it->node = NULL;
|
||
|
|
||
|
/* Set flags according to the operator used to perform the seek. */
|
||
|
if (op[0] == '>') {
|
||
|
gt = 1;
|
||
|
if (op[1] == '=') eq = 1;
|
||
|
} else if (op[0] == '<') {
|
||
|
lt = 1;
|
||
|
if (op[1] == '=') eq = 1;
|
||
|
} else if (op[0] == '=') {
|
||
|
eq = 1;
|
||
|
} else if (op[0] == '^') {
|
||
|
first = 1;
|
||
|
} else if (op[0] == '$') {
|
||
|
last = 1;
|
||
|
} else {
|
||
|
return 0; /* Error. */
|
||
|
}
|
||
|
|
||
|
/* If there are no elements, set the EOF condition immediately and
|
||
|
* return. */
|
||
|
if (it->rt->numele == 0) {
|
||
|
it->flags |= RAX_ITER_EOF;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
if (first) {
|
||
|
/* Seeking the first key greater or equal to the empty string
|
||
|
* is equivalent to seeking the smaller key available. */
|
||
|
return raxSeek(it,NULL,0,">=");
|
||
|
}
|
||
|
|
||
|
if (last) {
|
||
|
/* Find the greatest key taking always the last child till a
|
||
|
* final node is found. */
|
||
|
it->node = it->rt->head;
|
||
|
if (!raxSeekGreatest(it)) return 0;
|
||
|
assert(it->node->iskey);
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* We need to seek the specified key. What we do here is to actually
|
||
|
* perform a lookup, and later invoke the prev/next key code that
|
||
|
* we already use for iteration. */
|
||
|
int splitpos = 0;
|
||
|
size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack);
|
||
|
|
||
|
/* Return OOM on incomplete stack info. */
|
||
|
if (it->stack.oom) return 0;
|
||
|
|
||
|
if (eq && i == len && (!it->node->iscompr || splitpos == 0) &&
|
||
|
it->node->iskey)
|
||
|
{
|
||
|
/* We found our node, since the key matches and we have an
|
||
|
* "equal" condition. */
|
||
|
if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */
|
||
|
} else {
|
||
|
/* Exact key not found or eq flag not set. We have to set as current
|
||
|
* key the one represented by the node we stopped at, and perform
|
||
|
* a next/prev operation to seek. To reconstruct the key at this node
|
||
|
* we start from the parent and go to the current node, accumulating
|
||
|
* the characters found along the way. */
|
||
|
if (!raxStackPush(&it->stack,it->node)) return 0;
|
||
|
for (size_t j = 1; j < it->stack.items; j++) {
|
||
|
raxNode *parent = it->stack.stack[j-1];
|
||
|
raxNode *child = it->stack.stack[j];
|
||
|
if (parent->iscompr) {
|
||
|
if (!raxIteratorAddChars(it,parent->data,parent->size))
|
||
|
return 0;
|
||
|
} else {
|
||
|
raxNode **cp = raxNodeFirstChildPtr(parent);
|
||
|
unsigned char *p = parent->data;
|
||
|
while(1) {
|
||
|
raxNode *aux;
|
||
|
memcpy(&aux,cp,sizeof(aux));
|
||
|
if (aux == child) break;
|
||
|
cp++;
|
||
|
p++;
|
||
|
}
|
||
|
if (!raxIteratorAddChars(it,p,1)) return 0;
|
||
|
}
|
||
|
}
|
||
|
raxStackPop(&it->stack);
|
||
|
|
||
|
/* We need to set the iterator in the correct state to call next/prev
|
||
|
* step in order to seek the desired element. */
|
||
|
debugf("After initial seek: i=%d len=%d key=%.*s\n",
|
||
|
(int)i, (int)len, (int)it->key_len, it->key);
|
||
|
if (i != len && !it->node->iscompr) {
|
||
|
/* If we stopped in the middle of a normal node because of a
|
||
|
* mismatch, add the mismatching character to the current key
|
||
|
* and call the iterator with the 'noup' flag so that it will try
|
||
|
* to seek the next/prev child in the current node directly based
|
||
|
* on the mismatching character. */
|
||
|
if (!raxIteratorAddChars(it,ele+i,1)) return 0;
|
||
|
debugf("Seek normal node on mismatch: %.*s\n",
|
||
|
(int)it->key_len, (char*)it->key);
|
||
|
|
||
|
it->flags &= ~RAX_ITER_JUST_SEEKED;
|
||
|
if (lt && !raxIteratorPrevStep(it,1)) return 0;
|
||
|
if (gt && !raxIteratorNextStep(it,1)) return 0;
|
||
|
it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
|
||
|
} else if (i != len && it->node->iscompr) {
|
||
|
debugf("Compressed mismatch: %.*s\n",
|
||
|
(int)it->key_len, (char*)it->key);
|
||
|
/* In case of a mismatch within a compressed node. */
|
||
|
int nodechar = it->node->data[splitpos];
|
||
|
int keychar = ele[i];
|
||
|
it->flags &= ~RAX_ITER_JUST_SEEKED;
|
||
|
if (gt) {
|
||
|
/* If the key the compressed node represents is greater
|
||
|
* than our seek element, continue forward, otherwise set the
|
||
|
* state in order to go back to the next sub-tree. */
|
||
|
if (nodechar > keychar) {
|
||
|
if (!raxIteratorNextStep(it,0)) return 0;
|
||
|
} else {
|
||
|
if (!raxIteratorAddChars(it,it->node->data,it->node->size))
|
||
|
return 0;
|
||
|
if (!raxIteratorNextStep(it,1)) return 0;
|
||
|
}
|
||
|
}
|
||
|
if (lt) {
|
||
|
/* If the key the compressed node represents is smaller
|
||
|
* than our seek element, seek the greater key in this
|
||
|
* subtree, otherwise set the state in order to go back to
|
||
|
* the previous sub-tree. */
|
||
|
if (nodechar < keychar) {
|
||
|
if (!raxSeekGreatest(it)) return 0;
|
||
|
} else {
|
||
|
if (!raxIteratorAddChars(it,it->node->data,it->node->size))
|
||
|
return 0;
|
||
|
if (!raxIteratorPrevStep(it,1)) return 0;
|
||
|
}
|
||
|
}
|
||
|
it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
|
||
|
} else {
|
||
|
debugf("No mismatch: %.*s\n",
|
||
|
(int)it->key_len, (char*)it->key);
|
||
|
/* If there was no mismatch we are into a node representing the
|
||
|
* key, (but which is not a key or the seek operator does not
|
||
|
* include 'eq'), or we stopped in the middle of a compressed node
|
||
|
* after processing all the key. Cotinue iterating as this was
|
||
|
* a legitimate key we stopped at. */
|
||
|
it->flags &= ~RAX_ITER_JUST_SEEKED;
|
||
|
if (gt && !raxIteratorNextStep(it,0)) return 0;
|
||
|
if (lt && !raxIteratorPrevStep(it,0)) return 0;
|
||
|
it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
|
||
|
}
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Go to the next element in the scope of the iterator 'it'.
|
||
|
* If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
|
||
|
* returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
|
||
|
int raxNext(raxIterator *it, unsigned char *stop, size_t stoplen, char *op) {
|
||
|
if (!raxIteratorNextStep(it,0)) {
|
||
|
errno = ENOMEM;
|
||
|
return 0;
|
||
|
}
|
||
|
if (it->flags & RAX_ITER_EOF) {
|
||
|
errno = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Go to the previous element in the scope of the iterator 'it'.
|
||
|
* If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
|
||
|
* returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
|
||
|
int raxPrev(raxIterator *it, unsigned char *stop, size_t stoplen, char *op) {
|
||
|
if (!raxIteratorPrevStep(it,0)) {
|
||
|
errno = ENOMEM;
|
||
|
return 0;
|
||
|
}
|
||
|
if (it->flags & RAX_ITER_EOF) {
|
||
|
errno = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Free the iterator. */
|
||
|
void raxStop(raxIterator *it) {
|
||
|
if (it->key != it->key_static_string) rax_free(it->key);
|
||
|
raxStackFree(&it->stack);
|
||
|
}
|
||
|
|
||
|
/* ----------------------------- Introspection ------------------------------ */
|
||
|
|
||
|
/* This function is mostly used for debugging and learning purposes.
|
||
|
* It shows an ASCII representation of a tree on standard output, outling
|
||
|
* all the nodes and the contained keys.
|
||
|
*
|
||
|
* The representation is as follow:
|
||
|
*
|
||
|
* "foobar" (compressed node)
|
||
|
* [abc] (normal node with three children)
|
||
|
* [abc]=0x12345678 (node is a key, pointing to value 0x12345678)
|
||
|
* [] (a normal empty node)
|
||
|
*
|
||
|
* Children are represented in new idented lines, each children prefixed by
|
||
|
* the "`-(x)" string, where "x" is the edge byte.
|
||
|
*
|
||
|
* [abc]
|
||
|
* `-(a) "ladin"
|
||
|
* `-(b) [kj]
|
||
|
* `-(c) []
|
||
|
*
|
||
|
* However when a node has a single child the following representation
|
||
|
* is used instead:
|
||
|
*
|
||
|
* [abc] -> "ladin" -> []
|
||
|
*/
|
||
|
|
||
|
/* The actual implementation of raxShow(). */
|
||
|
void raxRecursiveShow(int level, int lpad, raxNode *n) {
|
||
|
char s = n->iscompr ? '"' : '[';
|
||
|
char e = n->iscompr ? '"' : ']';
|
||
|
|
||
|
int numchars = printf("%c%.*s%c", s, n->size, n->data, e);
|
||
|
if (n->iskey) {
|
||
|
numchars += printf("=%p",raxGetData(n));
|
||
|
}
|
||
|
|
||
|
int numchildren = n->iscompr ? 1 : n->size;
|
||
|
/* Note that 7 and 4 magic constants are the string length
|
||
|
* of " `-(x) " and " -> " respectively. */
|
||
|
if (level) {
|
||
|
lpad += (numchildren > 1) ? 7 : 4;
|
||
|
if (numchildren == 1) lpad += numchars;
|
||
|
}
|
||
|
raxNode **cp = raxNodeFirstChildPtr(n);
|
||
|
for (int i = 0; i < numchildren; i++) {
|
||
|
char *branch = " `-(%c) ";
|
||
|
if (numchildren > 1) {
|
||
|
printf("\n");
|
||
|
for (int j = 0; j < lpad; j++) putchar(' ');
|
||
|
printf(branch,n->data[i]);
|
||
|
} else {
|
||
|
printf(" -> ");
|
||
|
}
|
||
|
raxNode *child;
|
||
|
memcpy(&child,cp,sizeof(child));
|
||
|
raxRecursiveShow(level+1,lpad,child);
|
||
|
cp++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Show a tree, as outlined in the comment above. */
|
||
|
void raxShow(rax *rax) {
|
||
|
raxRecursiveShow(0,0,rax->head);
|
||
|
putchar('\n');
|
||
|
}
|
||
|
|
||
|
/* Used by debugnode() macro to show info about a given node. */
|
||
|
void raxDebugShowNode(const char *msg, raxNode *n) {
|
||
|
printf("%s: %p [%.*s] key:%d size:%d children:",
|
||
|
msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size);
|
||
|
int numcld = n->iscompr ? 1 : n->size;
|
||
|
raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1);
|
||
|
while(numcld--) {
|
||
|
raxNode *child;
|
||
|
memcpy(&child,cldptr,sizeof(child));
|
||
|
cldptr++;
|
||
|
printf("%p ", (void*)child);
|
||
|
}
|
||
|
printf("\n");
|
||
|
fflush(stdout);
|
||
|
}
|
||
|
|
||
|
#ifdef BENCHMARK_MAIN
|
||
|
#include <stdio.h>
|
||
|
#include <sys/time.h>
|
||
|
|
||
|
/* This is a simple Feistel network in order to turn every possible
|
||
|
* uint32_t input into another "randomly" looking uint32_t. It is a
|
||
|
* one to one map so there are no repetitions. */
|
||
|
static uint32_t int2int(uint32_t input) {
|
||
|
uint16_t l = input & 0xffff;
|
||
|
uint16_t r = input >> 16;
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
uint16_t nl = r;
|
||
|
uint16_t F = (((r * 31) + (r >> 5) + 7 * 371) ^ r) & 0xffff;
|
||
|
r = l ^ F;
|
||
|
l = nl;
|
||
|
}
|
||
|
return (r<<16)|l;
|
||
|
}
|
||
|
|
||
|
/* Turn an uint32_t integer into an alphanumerical key and return its
|
||
|
* length. This function is used in order to generate keys that have
|
||
|
* a large charset, so that the radix tree can be testsed with many
|
||
|
* children per node. */
|
||
|
static size_t int2alphakey(char *s, size_t maxlen, uint32_t i) {
|
||
|
const char *set = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||
|
"abcdefghijklmnopqrstuvwxyz"
|
||
|
"0123456789";
|
||
|
const size_t setlen = 62;
|
||
|
|
||
|
if (maxlen == 0) return 0;
|
||
|
maxlen--; /* Space for null term char. */
|
||
|
size_t len = 0;
|
||
|
while(len < maxlen) {
|
||
|
s[len++] = set[i%setlen];
|
||
|
i /= setlen;
|
||
|
if (i == 0) break;
|
||
|
}
|
||
|
s[len] = '\0';
|
||
|
return len;
|
||
|
}
|
||
|
|
||
|
/* Return the UNIX time in microseconds */
|
||
|
static long long ustime(void) {
|
||
|
struct timeval tv;
|
||
|
long long ust;
|
||
|
|
||
|
gettimeofday(&tv, NULL);
|
||
|
ust = ((long long)tv.tv_sec)*1000000;
|
||
|
ust += tv.tv_usec;
|
||
|
return ust;
|
||
|
}
|
||
|
|
||
|
/* Turn the integer 'i' into a key according to 'mode'.
|
||
|
* mode = 0, just represents the integer as a string.
|
||
|
* mode = 1, turn it into a random-looking alphanumerical string
|
||
|
* according to the int2alphakey() function. */
|
||
|
static size_t int2key(char *s, size_t maxlen, uint32_t i, int mode) {
|
||
|
if (mode == 0) {
|
||
|
return snprintf(s,maxlen,"%lu",(unsigned long)i);
|
||
|
} else {
|
||
|
i = int2int(i);
|
||
|
return int2alphakey(s,maxlen,i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int main(void) {
|
||
|
for (int mode = 0; mode < 2; mode++) {
|
||
|
printf("Benchmark with %s keys:\n",
|
||
|
(mode == 0) ? "integer" : "alphanumerical");
|
||
|
rax *t = raxNew();
|
||
|
long long start = ustime();
|
||
|
for (int i = 0; i < 5000000; i++) {
|
||
|
char buf[64];
|
||
|
int len = int2key(buf,sizeof(buf),i,mode);
|
||
|
raxInsert(t,(unsigned char*)buf,len,(void*)(long)i);
|
||
|
}
|
||
|
printf("Insert: %f\n", (double)(ustime()-start)/1000000);
|
||
|
printf("%llu total nodes\n", (unsigned long long)t->numnodes);
|
||
|
printf("%llu total elements\n", (unsigned long long)t->numele);
|
||
|
|
||
|
start = ustime();
|
||
|
for (int i = 0; i < 5000000; i++) {
|
||
|
char buf[64];
|
||
|
int len = int2key(buf,sizeof(buf),i,mode);
|
||
|
void *data = raxFind(t,(unsigned char*)buf,len);
|
||
|
if (data != (void*)(long)i) {
|
||
|
printf("Issue with %s: %p instead of %p\n", buf,
|
||
|
data, (void*)(long)i);
|
||
|
}
|
||
|
}
|
||
|
printf("Lookup: %f\n", (double)(ustime()-start)/1000000);
|
||
|
|
||
|
start = ustime();
|
||
|
for (int i = 0; i < 5000000; i++) {
|
||
|
char buf[64];
|
||
|
int r = rand() % 5000000;
|
||
|
int len = int2key(buf,sizeof(buf),r,mode);
|
||
|
void *data = raxFind(t,(unsigned char*)buf,len);
|
||
|
if (data != (void*)(long)r) {
|
||
|
printf("Issue with %s: %p instead of %p\n", buf,
|
||
|
data, (void*)(long)r);
|
||
|
}
|
||
|
}
|
||
|
printf("Random lookup: %f\n", (double)(ustime()-start)/1000000);
|
||
|
|
||
|
start = ustime();
|
||
|
int count = 0;
|
||
|
for (int i = 0; i < 5000000; i++) {
|
||
|
char buf[64];
|
||
|
int len = int2key(buf,sizeof(buf),i,mode);
|
||
|
buf[i%len] = '!'; /* "!" is never set into keys. */
|
||
|
void *data = raxFind(t,(unsigned char*) buf,len);
|
||
|
if (data != (void*)(long)i) count++;
|
||
|
}
|
||
|
printf("Failed lookup: %f\n", (double)(ustime()-start)/1000000);
|
||
|
|
||
|
start = ustime();
|
||
|
for (int i = 0; i < 5000000; i++) {
|
||
|
char buf[64];
|
||
|
int len = int2key(buf,sizeof(buf),i,mode);
|
||
|
int retval = raxRemove(t,(unsigned char*)buf,len);
|
||
|
assert(retval == 1);
|
||
|
}
|
||
|
printf("Deletion: %f\n", (double)(ustime()-start)/1000000);
|
||
|
|
||
|
printf("%llu total nodes\n", (unsigned long long)t->numnodes);
|
||
|
printf("%llu total elements\n", (unsigned long long)t->numele);
|
||
|
raxFree(t);
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#ifdef TEST_MAIN
|
||
|
#include <stdio.h>
|
||
|
#include <time.h>
|
||
|
|
||
|
int main(void) {
|
||
|
printf("notfound = %p\n", raxNotFound);
|
||
|
rax *t = raxNew();
|
||
|
char *toadd[] = {"alligator","alien","baloon","chromodynamic","romane","romanus","romulus","rubens","ruber","rubicon","rubicundus","all","rub","ba",NULL};
|
||
|
|
||
|
srand(time(NULL));
|
||
|
for (int x = 0; x < 10000; x++) rand();
|
||
|
|
||
|
long items = 0;
|
||
|
while(toadd[items] != NULL) items++;
|
||
|
|
||
|
for (long i = 0; i < items; i++) {
|
||
|
raxInsert(t,(unsigned char*)toadd[i],strlen(toadd[i]),(void*)i);
|
||
|
printf("Added %s\n", toadd[i]);
|
||
|
}
|
||
|
raxShow(t);
|
||
|
|
||
|
raxIterator iter;
|
||
|
raxStart(&iter,t);
|
||
|
|
||
|
// OK: all this tests will need to go in the Rax unit test.
|
||
|
// raxSeek(&iter,(unsigned char*)"rpxxx",5,"<=");
|
||
|
// raxSeek(&iter,(unsigned char*)"rom",3,">=");
|
||
|
// raxSeek(&iter,(unsigned char*)"rub",3,">=");
|
||
|
// raxSeek(&iter,(unsigned char*)"rub",3,">");
|
||
|
// raxSeek(&iter,(unsigned char*)"rub",3,"<");
|
||
|
// raxSeek(&iter,(unsigned char*)"rom",3,">");
|
||
|
// raxSeek(&iter,(unsigned char*)"chro",4,">");
|
||
|
// raxSeek(&iter,(unsigned char*)"chro",4,"<");
|
||
|
// raxSeek(&iter,(unsigned char*)"chromz",6,"<");
|
||
|
// raxSeek(&iter,NULL,0,"^");
|
||
|
// raxSeek(&iter,"zorro",5,"<=");
|
||
|
// raxSeek(&iter,"zorro",5,"<");
|
||
|
// raxSeek(&iter,NULL,0,"$");
|
||
|
|
||
|
// STILL TO TEST
|
||
|
raxSeek(&iter,(unsigned char*)"ro",2,">=");
|
||
|
printf("EOF: %d\n", (iter.flags & RAX_ITER_EOF) != 0);
|
||
|
|
||
|
printf("SEEKED: %.*s, val %p\n", (int)iter.key_len,
|
||
|
(char*)iter.key,
|
||
|
iter.data);
|
||
|
|
||
|
printf("NEXT\n");
|
||
|
while(raxNext(&iter,NULL,0,NULL)) {
|
||
|
printf("--- key: %.*s, val %p\n", (int)iter.key_len,
|
||
|
(char*)iter.key,
|
||
|
iter.data);
|
||
|
}
|
||
|
|
||
|
printf("After EOF element is: %.*s\n", (int)iter.key_len,
|
||
|
(char*)iter.key);
|
||
|
printf("~~~~~~~~~~~~~~\n");
|
||
|
|
||
|
printf("PREV\n");
|
||
|
raxSeek(&iter,iter.key,iter.key_len,"==");
|
||
|
while(raxPrev(&iter,NULL,0,NULL)) {
|
||
|
printf("--- key: %.*s, val %p\n", (int)iter.key_len,
|
||
|
(char*)iter.key,
|
||
|
iter.data);
|
||
|
}
|
||
|
|
||
|
printf("After EOF element is: %.*s\n", (int)iter.key_len,
|
||
|
(char*)iter.key);
|
||
|
raxStop(&iter);
|
||
|
|
||
|
#if 0
|
||
|
raxStop(&iter);
|
||
|
#endif
|
||
|
exit(0);
|
||
|
|
||
|
int rnum = rand();
|
||
|
int survivor = rnum % items;
|
||
|
|
||
|
#if 1
|
||
|
printf("Removing everything but %s in random order\n", toadd[survivor]);
|
||
|
for (long i = 0; i < 1000; i++) {
|
||
|
int r = rand() % items;
|
||
|
if (r == survivor) continue;
|
||
|
raxRemove(t,(unsigned char*)toadd[r],strlen(toadd[r]));
|
||
|
}
|
||
|
#else
|
||
|
printf("Removing rubicon\n");
|
||
|
raxRemove(t,(unsigned char*)"rubicon",7);
|
||
|
#endif
|
||
|
|
||
|
printf("%llu total nodes\n", (unsigned long long)t->numnodes);
|
||
|
printf("%llu total elements\n", (unsigned long long)t->numele);
|
||
|
|
||
|
raxShow(t);
|
||
|
raxFree(t);
|
||
|
}
|
||
|
#endif
|