mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-22 08:08:53 -05:00
672 lines
18 KiB
C
672 lines
18 KiB
C
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/mman.h>
|
|
#include <string.h>
|
|
#include <arpa/inet.h>
|
|
#include <stdint.h>
|
|
#include <limits.h>
|
|
#include "lzf.h"
|
|
|
|
/* Object types */
|
|
#define REDIS_STRING 0
|
|
#define REDIS_LIST 1
|
|
#define REDIS_SET 2
|
|
#define REDIS_ZSET 3
|
|
#define REDIS_HASH 4
|
|
|
|
/* Objects encoding. Some kind of objects like Strings and Hashes can be
|
|
* internally represented in multiple ways. The 'encoding' field of the object
|
|
* is set to one of this fields for this object. */
|
|
#define REDIS_ENCODING_RAW 0 /* Raw representation */
|
|
#define REDIS_ENCODING_INT 1 /* Encoded as integer */
|
|
#define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
|
|
#define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
|
|
|
|
/* Object types only used for dumping to disk */
|
|
#define REDIS_EXPIRETIME 253
|
|
#define REDIS_SELECTDB 254
|
|
#define REDIS_EOF 255
|
|
|
|
/* Defines related to the dump file format. To store 32 bits lengths for short
|
|
* keys requires a lot of space, so we check the most significant 2 bits of
|
|
* the first byte to interpreter the length:
|
|
*
|
|
* 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
|
|
* 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
|
|
* 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
|
|
* 11|000000 this means: specially encoded object will follow. The six bits
|
|
* number specify the kind of object that follows.
|
|
* See the REDIS_RDB_ENC_* defines.
|
|
*
|
|
* Lenghts up to 63 are stored using a single byte, most DB keys, and may
|
|
* values, will fit inside. */
|
|
#define REDIS_RDB_6BITLEN 0
|
|
#define REDIS_RDB_14BITLEN 1
|
|
#define REDIS_RDB_32BITLEN 2
|
|
#define REDIS_RDB_ENCVAL 3
|
|
#define REDIS_RDB_LENERR UINT_MAX
|
|
|
|
/* When a length of a string object stored on disk has the first two bits
|
|
* set, the remaining two bits specify a special encoding for the object
|
|
* accordingly to the following defines: */
|
|
#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
|
|
#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
|
|
#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
|
|
#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
|
|
|
|
#define ERROR(...) { \
|
|
printf(__VA_ARGS__); \
|
|
exit(1); \
|
|
}
|
|
|
|
/* data type to hold offset in file and size */
|
|
typedef struct {
|
|
void *data;
|
|
unsigned long size;
|
|
unsigned long offset;
|
|
} pos;
|
|
|
|
static unsigned char level = 0;
|
|
static pos positions[16];
|
|
|
|
#define CURR_OFFSET (positions[level].offset)
|
|
|
|
/* Hold a stack of errors */
|
|
typedef struct {
|
|
char error[16][1024];
|
|
unsigned long offset[16];
|
|
unsigned int level;
|
|
} errors_t;
|
|
static errors_t errors;
|
|
|
|
#define SHIFT_ERROR(provided_offset, ...) { \
|
|
sprintf(errors.error[errors.level], __VA_ARGS__); \
|
|
errors.offset[errors.level] = provided_offset; \
|
|
errors.level++; \
|
|
}
|
|
|
|
/* Data type to hold opcode with optional key name an success status */
|
|
typedef struct {
|
|
char* key;
|
|
int type;
|
|
char success;
|
|
} entry;
|
|
|
|
/* Global vars that are actally used as constants. The following double
|
|
* values are used for double on-disk serialization, and are initialized
|
|
* at runtime to avoid strange compiler optimizations. */
|
|
static double R_Zero, R_PosInf, R_NegInf, R_Nan;
|
|
|
|
/* store string types for output */
|
|
static char types[256][16];
|
|
|
|
/* when number of bytes to read is negative, do a peek */
|
|
int readBytes(void *target, long num) {
|
|
char peek = (num < 0) ? 1 : 0;
|
|
num = (num < 0) ? -num : num;
|
|
|
|
pos p = positions[level];
|
|
if (p.offset + num > p.size) {
|
|
return 0;
|
|
} else {
|
|
memcpy(target, (void*)((unsigned long)p.data + p.offset), num);
|
|
if (!peek) positions[level].offset += num;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int processHeader() {
|
|
char buf[10] = "_________";
|
|
int dump_version;
|
|
|
|
if (!readBytes(buf, 9)) {
|
|
ERROR("Cannot read header\n");
|
|
}
|
|
|
|
/* expect the first 5 bytes to equal REDIS */
|
|
if (memcmp(buf,"REDIS",5) != 0) {
|
|
ERROR("Wrong signature in header\n");
|
|
}
|
|
|
|
dump_version = (int)strtol(buf + 5, NULL, 10);
|
|
if (dump_version != 1) {
|
|
ERROR("Unknown RDB format version: %d\n", dump_version);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int loadType(entry *e) {
|
|
uint32_t offset = CURR_OFFSET;
|
|
|
|
/* this byte needs to qualify as type */
|
|
unsigned char t;
|
|
if (readBytes(&t, 1)) {
|
|
if (t <= 4 || t >= 253) {
|
|
e->type = t;
|
|
return 1;
|
|
} else {
|
|
SHIFT_ERROR(offset, "Unknown type (0x%02x)", t);
|
|
}
|
|
} else {
|
|
SHIFT_ERROR(offset, "Could not read type");
|
|
}
|
|
|
|
/* failure */
|
|
return 0;
|
|
}
|
|
|
|
int peekType() {
|
|
unsigned char t;
|
|
if (readBytes(&t, -1) && (t <= 4 || t >= 253)) return t;
|
|
return -1;
|
|
}
|
|
|
|
/* discard time, just consume the bytes */
|
|
int processTime() {
|
|
uint32_t offset = CURR_OFFSET;
|
|
unsigned char t[4];
|
|
if (readBytes(t, 4)) {
|
|
return 1;
|
|
} else {
|
|
SHIFT_ERROR(offset, "Could not read time");
|
|
}
|
|
|
|
/* failure */
|
|
return 0;
|
|
}
|
|
|
|
uint32_t loadLength(int *isencoded) {
|
|
unsigned char buf[2];
|
|
uint32_t len;
|
|
int type;
|
|
|
|
if (isencoded) *isencoded = 0;
|
|
if (!readBytes(buf, 1)) return REDIS_RDB_LENERR;
|
|
type = (buf[0] & 0xC0) >> 6;
|
|
if (type == REDIS_RDB_6BITLEN) {
|
|
/* Read a 6 bit len */
|
|
return buf[0] & 0x3F;
|
|
} else if (type == REDIS_RDB_ENCVAL) {
|
|
/* Read a 6 bit len encoding type */
|
|
if (isencoded) *isencoded = 1;
|
|
return buf[0] & 0x3F;
|
|
} else if (type == REDIS_RDB_14BITLEN) {
|
|
/* Read a 14 bit len */
|
|
if (!readBytes(buf+1,1)) return REDIS_RDB_LENERR;
|
|
return ((buf[0] & 0x3F) << 8) | buf[1];
|
|
} else {
|
|
/* Read a 32 bit len */
|
|
if (!readBytes(&len, 4)) return REDIS_RDB_LENERR;
|
|
return (unsigned int)ntohl(len);
|
|
}
|
|
}
|
|
|
|
char *loadIntegerObject(int enctype) {
|
|
uint32_t offset = CURR_OFFSET;
|
|
unsigned char enc[4];
|
|
long long val;
|
|
|
|
if (enctype == REDIS_RDB_ENC_INT8) {
|
|
uint8_t v;
|
|
if (!readBytes(enc, 1)) return NULL;
|
|
v = enc[0];
|
|
val = (int8_t)v;
|
|
} else if (enctype == REDIS_RDB_ENC_INT16) {
|
|
uint16_t v;
|
|
if (!readBytes(enc, 2)) return NULL;
|
|
v = enc[0]|(enc[1]<<8);
|
|
val = (int16_t)v;
|
|
} else if (enctype == REDIS_RDB_ENC_INT32) {
|
|
uint32_t v;
|
|
if (!readBytes(enc, 4)) return NULL;
|
|
v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
|
|
val = (int32_t)v;
|
|
} else {
|
|
SHIFT_ERROR(offset, "Unknown integer encoding (0x%02x)", enctype);
|
|
return NULL;
|
|
}
|
|
|
|
/* convert val into string */
|
|
char *buf;
|
|
buf = malloc(sizeof(char) * 128);
|
|
sprintf(buf, "%lld", val);
|
|
return buf;
|
|
}
|
|
|
|
char* loadLzfStringObject() {
|
|
unsigned int slen, clen;
|
|
char *c, *s;
|
|
|
|
if ((clen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
|
|
if ((slen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
|
|
|
|
c = malloc(clen);
|
|
if (!readBytes(c, clen)) {
|
|
free(c);
|
|
return NULL;
|
|
}
|
|
|
|
s = malloc(slen+1);
|
|
if (lzf_decompress(c,clen,s,slen) == 0) {
|
|
free(c); free(s);
|
|
return NULL;
|
|
}
|
|
|
|
free(c);
|
|
return s;
|
|
}
|
|
|
|
/* returns NULL when not processable, char* when valid */
|
|
char* loadStringObject() {
|
|
uint32_t offset = CURR_OFFSET;
|
|
int isencoded;
|
|
uint32_t len;
|
|
|
|
len = loadLength(&isencoded);
|
|
if (isencoded) {
|
|
switch(len) {
|
|
case REDIS_RDB_ENC_INT8:
|
|
case REDIS_RDB_ENC_INT16:
|
|
case REDIS_RDB_ENC_INT32:
|
|
return loadIntegerObject(len);
|
|
case REDIS_RDB_ENC_LZF:
|
|
return loadLzfStringObject();
|
|
default:
|
|
/* unknown encoding */
|
|
SHIFT_ERROR(offset, "Unknown string encoding (0x%02x)", len);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
if (len == REDIS_RDB_LENERR) return NULL;
|
|
|
|
char *buf = malloc(sizeof(char) * (len+1));
|
|
buf[len] = '\0';
|
|
if (!readBytes(buf, len)) {
|
|
free(buf);
|
|
return NULL;
|
|
}
|
|
return buf;
|
|
}
|
|
|
|
int processStringObject(char** store) {
|
|
unsigned long offset = CURR_OFFSET;
|
|
char *key = loadStringObject();
|
|
if (key == NULL) {
|
|
SHIFT_ERROR(offset, "Error reading string object");
|
|
free(key);
|
|
return 0;
|
|
}
|
|
|
|
if (store != NULL) {
|
|
*store = key;
|
|
} else {
|
|
free(key);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
double* loadDoubleValue() {
|
|
char buf[256];
|
|
unsigned char len;
|
|
double* val;
|
|
|
|
if (!readBytes(&len,1)) return NULL;
|
|
|
|
val = malloc(sizeof(double));
|
|
switch(len) {
|
|
case 255: *val = R_NegInf; return val;
|
|
case 254: *val = R_PosInf; return val;
|
|
case 253: *val = R_Nan; return val;
|
|
default:
|
|
if (!readBytes(buf, len)) {
|
|
free(val);
|
|
return NULL;
|
|
}
|
|
buf[len] = '\0';
|
|
sscanf(buf, "%lg", val);
|
|
return val;
|
|
}
|
|
}
|
|
|
|
int processDoubleValue(double** store) {
|
|
unsigned long offset = CURR_OFFSET;
|
|
double *val = loadDoubleValue();
|
|
if (val == NULL) {
|
|
SHIFT_ERROR(offset, "Error reading double value");
|
|
free(val);
|
|
return 0;
|
|
}
|
|
|
|
if (store != NULL) {
|
|
*store = val;
|
|
} else {
|
|
free(val);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int loadPair(entry *e) {
|
|
uint32_t offset = CURR_OFFSET;
|
|
uint32_t i;
|
|
|
|
/* read key first */
|
|
char *key;
|
|
if (processStringObject(&key)) {
|
|
e->key = key;
|
|
} else {
|
|
SHIFT_ERROR(offset, "Error reading entry key");
|
|
return 0;
|
|
}
|
|
|
|
uint32_t length = 0;
|
|
if (e->type == REDIS_LIST ||
|
|
e->type == REDIS_SET ||
|
|
e->type == REDIS_ZSET ||
|
|
e->type == REDIS_HASH) {
|
|
if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
|
|
SHIFT_ERROR(offset, "Error reading %s length", types[e->type]);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
switch(e->type) {
|
|
case REDIS_STRING:
|
|
if (!processStringObject(NULL)) {
|
|
SHIFT_ERROR(offset, "Error reading entry value");
|
|
return 0;
|
|
}
|
|
break;
|
|
case REDIS_LIST:
|
|
case REDIS_SET:
|
|
for (i = 0; i < length; i++) {
|
|
offset = CURR_OFFSET;
|
|
if (!processStringObject(NULL)) {
|
|
SHIFT_ERROR(offset, "Error reading element at index %d (length: %d)", i, length);
|
|
return 0;
|
|
}
|
|
}
|
|
break;
|
|
case REDIS_ZSET:
|
|
for (i = 0; i < length; i++) {
|
|
offset = CURR_OFFSET;
|
|
if (!processStringObject(NULL)) {
|
|
SHIFT_ERROR(offset, "Error reading element key at index %d (length: %d)", i, length);
|
|
return 0;
|
|
}
|
|
offset = CURR_OFFSET;
|
|
if (!processDoubleValue(NULL)) {
|
|
SHIFT_ERROR(offset, "Error reading element value at index %d (length: %d)", i, length);
|
|
return 0;
|
|
}
|
|
}
|
|
break;
|
|
case REDIS_HASH:
|
|
for (i = 0; i < length; i++) {
|
|
offset = CURR_OFFSET;
|
|
if (!processStringObject(NULL)) {
|
|
SHIFT_ERROR(offset, "Error reading element key at index %d (length: %d)", i, length);
|
|
return 0;
|
|
}
|
|
offset = CURR_OFFSET;
|
|
if (!processStringObject(NULL)) {
|
|
SHIFT_ERROR(offset, "Error reading element value at index %d (length: %d)", i, length);
|
|
return 0;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
SHIFT_ERROR(offset, "Type not implemented");
|
|
return 0;
|
|
}
|
|
/* because we're done, we assume success */
|
|
e->success = 1;
|
|
return 1;
|
|
}
|
|
|
|
entry loadEntry() {
|
|
entry e = { NULL, -1, 0 };
|
|
uint32_t length, offset[4];
|
|
|
|
/* reset error container */
|
|
errors.level = 0;
|
|
|
|
offset[0] = CURR_OFFSET;
|
|
if (!loadType(&e)) {
|
|
return e;
|
|
}
|
|
|
|
offset[1] = CURR_OFFSET;
|
|
if (e.type == REDIS_SELECTDB) {
|
|
if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
|
|
SHIFT_ERROR(offset[1], "Error reading database number");
|
|
return e;
|
|
}
|
|
if (length > 63) {
|
|
SHIFT_ERROR(offset[1], "Database number out of range (%d)", length);
|
|
return e;
|
|
}
|
|
} else if (e.type == REDIS_EOF) {
|
|
if (positions[level].offset < positions[level].size) {
|
|
SHIFT_ERROR(offset[0], "Unexpected EOF");
|
|
} else {
|
|
e.success = 1;
|
|
}
|
|
return e;
|
|
} else {
|
|
/* optionally consume expire */
|
|
if (e.type == REDIS_EXPIRETIME) {
|
|
if (!processTime()) return e;
|
|
if (!loadType(&e)) return e;
|
|
}
|
|
|
|
offset[1] = CURR_OFFSET;
|
|
if (!loadPair(&e)) {
|
|
SHIFT_ERROR(offset[1], "Error for type %s", types[e.type]);
|
|
return e;
|
|
}
|
|
}
|
|
|
|
/* all entries are followed by a valid type:
|
|
* e.g. a new entry, SELECTDB, EXPIRE, EOF */
|
|
offset[2] = CURR_OFFSET;
|
|
if (peekType() == -1) {
|
|
SHIFT_ERROR(offset[2], "Followed by invalid type");
|
|
SHIFT_ERROR(offset[0], "Error for type %s", types[e.type]);
|
|
e.success = 0;
|
|
} else {
|
|
e.success = 1;
|
|
}
|
|
|
|
return e;
|
|
}
|
|
|
|
void printCentered(int indent, int width, char* body) {
|
|
char head[256], tail[256];
|
|
memset(head, '\0', 256);
|
|
memset(tail, '\0', 256);
|
|
|
|
memset(head, '=', indent);
|
|
memset(tail, '=', width - 2 - indent - strlen(body));
|
|
printf("%s %s %s\n", head, body, tail);
|
|
}
|
|
|
|
void printValid(int ops, int bytes) {
|
|
char body[80];
|
|
sprintf(body, "Processed %d valid opcodes (in %d bytes)", ops, bytes);
|
|
printCentered(4, 80, body);
|
|
}
|
|
|
|
void printSkipped(int bytes, int offset) {
|
|
char body[80];
|
|
sprintf(body, "Skipped %d bytes (resuming at 0x%08x)", bytes, offset);
|
|
printCentered(4, 80, body);
|
|
}
|
|
|
|
void printErrorStack(entry *e) {
|
|
unsigned int i;
|
|
char body[64];
|
|
|
|
if (e->type == -1) {
|
|
sprintf(body, "Error trace");
|
|
} else if (e->type >= 253) {
|
|
sprintf(body, "Error trace (%s)", types[e->type]);
|
|
} else if (!e->key) {
|
|
sprintf(body, "Error trace (%s: (unknown))", types[e->type]);
|
|
} else {
|
|
char tmp[41];
|
|
strncpy(tmp, e->key, 40);
|
|
|
|
/* display truncation at the last 3 chars */
|
|
if (strlen(e->key) > 40) {
|
|
memset(&tmp[37], '.', 3);
|
|
}
|
|
|
|
/* display unprintable characters as ? */
|
|
for (i = 0; i < strlen(tmp); i++) {
|
|
if (tmp[i] <= 32) tmp[i] = '?';
|
|
}
|
|
sprintf(body, "Error trace (%s: %s)", types[e->type], tmp);
|
|
}
|
|
|
|
printCentered(4, 80, body);
|
|
|
|
/* display error stack */
|
|
for (i = 0; i < errors.level; i++) {
|
|
printf("0x%08lx - %s\n", errors.offset[i], errors.error[i]);
|
|
}
|
|
}
|
|
|
|
void process() {
|
|
int i, num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0;
|
|
entry entry;
|
|
processHeader();
|
|
|
|
level = 1;
|
|
while(positions[0].offset < positions[0].size) {
|
|
positions[1] = positions[0];
|
|
|
|
entry = loadEntry();
|
|
if (!entry.success) {
|
|
printValid(num_valid_ops, num_valid_bytes);
|
|
printErrorStack(&entry);
|
|
num_errors++;
|
|
num_valid_ops = 0;
|
|
num_valid_bytes = 0;
|
|
|
|
/* search for next valid entry */
|
|
unsigned long offset = positions[0].offset + 1;
|
|
while (!entry.success && offset < positions[0].size) {
|
|
positions[1].offset = offset;
|
|
|
|
/* find 3 consecutive valid entries */
|
|
for (i = 0; i < 3; i++) {
|
|
entry = loadEntry();
|
|
if (!entry.success) break;
|
|
}
|
|
/* check if we found 3 consecutive valid entries */
|
|
if (i < 3) {
|
|
offset++;
|
|
}
|
|
}
|
|
|
|
/* print how many bytes we have skipped to find a new valid opcode */
|
|
if (offset < positions[0].size) {
|
|
printSkipped(offset - positions[0].offset, offset);
|
|
}
|
|
|
|
positions[0].offset = offset;
|
|
} else {
|
|
num_valid_ops++;
|
|
num_valid_bytes += positions[1].offset - positions[0].offset;
|
|
|
|
/* advance position */
|
|
positions[0] = positions[1];
|
|
}
|
|
}
|
|
|
|
/* because there is another potential error,
|
|
* print how many valid ops we have processed */
|
|
printValid(num_valid_ops, num_valid_bytes);
|
|
|
|
/* expect an eof */
|
|
if (entry.type != REDIS_EOF) {
|
|
/* last byte should be EOF, add error */
|
|
errors.level = 0;
|
|
SHIFT_ERROR(positions[0].offset, "Expected EOF, got %s", types[entry.type]);
|
|
|
|
/* this is an EOF error so reset type */
|
|
entry.type = -1;
|
|
printErrorStack(&entry);
|
|
|
|
num_errors++;
|
|
}
|
|
|
|
/* print summary on errors */
|
|
if (num_errors > 0) {
|
|
printf("\n");
|
|
printf("Total unprocessable opcodes: %d\n", num_errors);
|
|
}
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
/* expect the first argument to be the dump file */
|
|
if (argc <= 1) {
|
|
printf("Usage: %s <dump.rdb>\n", argv[0]);
|
|
exit(0);
|
|
}
|
|
|
|
int fd;
|
|
unsigned long size;
|
|
struct stat stat;
|
|
void *data;
|
|
|
|
fd = open(argv[1], O_RDONLY);
|
|
if (fd < 1) {
|
|
ERROR("Cannot open file: %s\n", argv[1]);
|
|
}
|
|
if (fstat(fd, &stat) == -1) {
|
|
ERROR("Cannot stat: %s\n", argv[1]);
|
|
} else {
|
|
size = stat.st_size;
|
|
}
|
|
|
|
data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
|
|
if (data == MAP_FAILED) {
|
|
ERROR("Cannot mmap: %s\n", argv[1]);
|
|
}
|
|
|
|
/* Initialize static vars */
|
|
positions[0].data = data;
|
|
positions[0].size = size;
|
|
positions[0].offset = 0;
|
|
errors.level = 0;
|
|
|
|
/* Object types */
|
|
sprintf(types[REDIS_STRING], "STRING");
|
|
sprintf(types[REDIS_LIST], "LIST");
|
|
sprintf(types[REDIS_SET], "SET");
|
|
sprintf(types[REDIS_ZSET], "ZSET");
|
|
sprintf(types[REDIS_HASH], "HASH");
|
|
|
|
/* Object types only used for dumping to disk */
|
|
sprintf(types[REDIS_EXPIRETIME], "EXPIRETIME");
|
|
sprintf(types[REDIS_SELECTDB], "SELECTDB");
|
|
sprintf(types[REDIS_EOF], "EOF");
|
|
|
|
/* Double constants initialization */
|
|
R_Zero = 0.0;
|
|
R_PosInf = 1.0/R_Zero;
|
|
R_NegInf = -1.0/R_Zero;
|
|
R_Nan = R_Zero/R_Zero;
|
|
|
|
process();
|
|
|
|
munmap(data, size);
|
|
close(fd);
|
|
return 0;
|
|
}
|