From b1a8e3e89e7824e0375b149e487f9ff1012a2100 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Mar 2011 15:44:21 +0100 Subject: [PATCH 1/9] byte ordering detection in config.h --- src/config.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++----- src/sha1.c | 50 +--------------------------------------- 2 files changed, 59 insertions(+), 55 deletions(-) diff --git a/src/config.h b/src/config.h index 40f22fa51..07700bed6 100644 --- a/src/config.h +++ b/src/config.h @@ -21,7 +21,7 @@ #define redis_malloc_size(p) malloc_size(p) #endif -/* define redis_fstat to fstat or fstat64() */ +/* Tefine redis_fstat to fstat or fstat64() */ #if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) #define redis_fstat fstat64 #define redis_stat stat64 @@ -30,22 +30,22 @@ #define redis_stat stat #endif -/* test for proc filesystem */ +/* Test for proc filesystem */ #ifdef __linux__ #define HAVE_PROCFS 1 #endif -/* test for task_info() */ +/* Test for task_info() */ #if defined(__APPLE__) #define HAVE_TASKINFO 1 #endif -/* test for backtrace() */ +/* Test for backtrace() */ #if defined(__APPLE__) || defined(__linux__) #define HAVE_BACKTRACE 1 #endif -/* test for polling API */ +/* Test for polling API */ #ifdef __linux__ #define HAVE_EPOLL 1 #endif @@ -54,11 +54,63 @@ #define HAVE_KQUEUE 1 #endif -/* define aof_fsync to fdatasync() in Linux and fsync() for all the rest */ +/* Define aof_fsync to fdatasync() in Linux and fsync() for all the rest */ #ifdef __linux__ #define aof_fsync fdatasync #else #define aof_fsync fsync #endif +/* Byte ordering detection */ +#include /* This will likely define BYTE_ORDER */ + +#ifndef BYTE_ORDER +#if (BSD >= 199103) +# include +#else +#if defined(linux) || defined(__linux__) +# include +#else +#define LITTLE_ENDIAN 1234 /* least-significant byte first (vax, pc) */ +#define BIG_ENDIAN 4321 /* most-significant byte first (IBM, net) */ +#define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long (pdp)*/ + +#if defined(vax) || defined(ns32000) || defined(sun386) || defined(__i386__) || \ + defined(MIPSEL) || defined(_MIPSEL) || defined(BIT_ZERO_ON_RIGHT) || \ + defined(__alpha__) || defined(__alpha) +#define BYTE_ORDER LITTLE_ENDIAN +#endif + +#if defined(sel) || defined(pyr) || defined(mc68000) || defined(sparc) || \ + defined(is68k) || defined(tahoe) || defined(ibm032) || defined(ibm370) || \ + defined(MIPSEB) || defined(_MIPSEB) || defined(_IBMR2) || defined(DGUX) ||\ + defined(apollo) || defined(__convex__) || defined(_CRAY) || \ + defined(__hppa) || defined(__hp9000) || \ + defined(__hp9000s300) || defined(__hp9000s700) || \ + defined (BIT_ZERO_ON_LEFT) || defined(m68k) || defined(__sparc) +#define BYTE_ORDER BIG_ENDIAN +#endif +#endif /* linux */ +#endif /* BSD */ +#endif /* BYTE_ORDER */ + +#if defined(__BYTE_ORDER) && !defined(BYTE_ORDER) +#if (__BYTE_ORDER == __LITTLE_ENDIAN) +#define BYTE_ORDER LITTLE_ENDIAN +#else +#define BYTE_ORDER BIG_ENDIAN +#endif +#endif + +#if !defined(BYTE_ORDER) || \ + (BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN && \ + BYTE_ORDER != PDP_ENDIAN) + /* you must determine what the correct bit order is for + * your compiler - the next line is an intentional error + * which will force your compiles to bomb until you fix + * the above macros. + */ +#error "Undefined or invalid BYTE_ORDER" +#endif + #endif diff --git a/src/sha1.c b/src/sha1.c index 2c50433e8..26a5565ee 100644 --- a/src/sha1.c +++ b/src/sha1.c @@ -28,55 +28,7 @@ A million repetitions of "a" #include "solarisfixes.h" #endif #include "sha1.h" - -#ifndef BYTE_ORDER -#if (BSD >= 199103) -# include -#else -#if defined(linux) || defined(__linux__) -# include -#else -#define LITTLE_ENDIAN 1234 /* least-significant byte first (vax, pc) */ -#define BIG_ENDIAN 4321 /* most-significant byte first (IBM, net) */ -#define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long (pdp)*/ - -#if defined(vax) || defined(ns32000) || defined(sun386) || defined(__i386__) || \ - defined(MIPSEL) || defined(_MIPSEL) || defined(BIT_ZERO_ON_RIGHT) || \ - defined(__alpha__) || defined(__alpha) -#define BYTE_ORDER LITTLE_ENDIAN -#endif - -#if defined(sel) || defined(pyr) || defined(mc68000) || defined(sparc) || \ - defined(is68k) || defined(tahoe) || defined(ibm032) || defined(ibm370) || \ - defined(MIPSEB) || defined(_MIPSEB) || defined(_IBMR2) || defined(DGUX) ||\ - defined(apollo) || defined(__convex__) || defined(_CRAY) || \ - defined(__hppa) || defined(__hp9000) || \ - defined(__hp9000s300) || defined(__hp9000s700) || \ - defined (BIT_ZERO_ON_LEFT) || defined(m68k) || defined(__sparc) -#define BYTE_ORDER BIG_ENDIAN -#endif -#endif /* linux */ -#endif /* BSD */ -#endif /* BYTE_ORDER */ - -#if defined(__BYTE_ORDER) && !defined(BYTE_ORDER) -#if (__BYTE_ORDER == __LITTLE_ENDIAN) -#define BYTE_ORDER LITTLE_ENDIAN -#else -#define BYTE_ORDER BIG_ENDIAN -#endif -#endif - -#if !defined(BYTE_ORDER) || \ - (BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN && \ - BYTE_ORDER != PDP_ENDIAN) - /* you must determine what the correct bit order is for - * your compiler - the next line is an intentional error - * which will force your compiles to bomb until you fix - * the above macros. - */ -#error "Undefined or invalid BYTE_ORDER" -#endif +#include "config.h" #define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) From e12cb14308ab2719b506762e662ab179f31aceb9 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Mar 2011 16:24:18 +0100 Subject: [PATCH 2/9] endianess conversion API, to be applied to specially encoded data types for arch agnostic encoding. --- src/endian.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/endian.h | 8 +++++++ 2 files changed, 71 insertions(+) create mode 100644 src/endian.c create mode 100644 src/endian.h diff --git a/src/endian.c b/src/endian.c new file mode 100644 index 000000000..aff2425a6 --- /dev/null +++ b/src/endian.c @@ -0,0 +1,63 @@ +/* Toggle the 16 bit unsigned integer pointed by *p from little endian to + * big endian */ +void memrev16(void *p) { + unsigned char *x = p, t; + + t = x[0]; + x[0] = x[1]; + x[1] = t; +} + +/* Toggle the 32 bit unsigned integer pointed by *p from little endian to + * big endian */ +void memrev32(void *p) { + unsigned char *x = p, t; + + t = x[0]; + x[0] = x[3]; + x[3] = t; + t = x[1]; + x[1] = x[2]; + x[2] = t; +} + +/* Toggle the 64 bit unsigned integer pointed by *p from little endian to + * big endian */ +void memrev64(void *p) { + unsigned char *x = p, t; + + t = x[0]; + x[0] = x[7]; + x[7] = t; + t = x[1]; + x[1] = x[6]; + x[6] = t; + t = x[2]; + x[2] = x[5]; + x[5] = t; + t = x[3]; + x[3] = x[4]; + x[4] = t; +} + +#ifdef TESTMAIN +#include + +int main(void) { + char buf[32]; + + sprintf(buf,"ciaoroma"); + memrev16(buf); + printf("%s\n", buf); + + sprintf(buf,"ciaoroma"); + memrev32(buf); + printf("%s\n", buf); + + sprintf(buf,"ciaoroma"); + memrev64(buf); + printf("%s\n", buf); + + return 0; +} +#endif diff --git a/src/endian.h b/src/endian.h new file mode 100644 index 000000000..ea295ee5f --- /dev/null +++ b/src/endian.h @@ -0,0 +1,8 @@ +#ifndef __ENDIAN_H +#define __ENDIAN_H + +void memrev16(void *p); +void memrev32(void *p); +void memrev64(void *p); + +#endif From 48e46215909d5161db838a596bb3ec334c86c6d7 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Mar 2011 16:33:09 +0100 Subject: [PATCH 3/9] Ehm... sorry if we don't support PDP endianess --- src/config.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/config.h b/src/config.h index 07700bed6..d98067c1f 100644 --- a/src/config.h +++ b/src/config.h @@ -103,8 +103,7 @@ #endif #if !defined(BYTE_ORDER) || \ - (BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN && \ - BYTE_ORDER != PDP_ENDIAN) + (BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN) /* you must determine what the correct bit order is for * your compiler - the next line is an intentional error * which will force your compiles to bomb until you fix From bcde63785d6c6e54ae71216df8bec276f2c85155 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Mar 2011 16:36:02 +0100 Subject: [PATCH 4/9] TODO updated --- TODO | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO b/TODO index 48ea8ac6d..580adda69 100644 --- a/TODO +++ b/TODO @@ -31,6 +31,7 @@ APPEND ONLY FILE OPTIMIZATIONS ============= +* Avoid COW due to incrementing the dict iterators counter. * SORT: Don't copy the list into a vector when BY argument is constant. * Write the hash table size of every db in the dump, so that Redis can resize the hash table just one time when loading a big DB. * Read-only mode for slaves. From b5325132f1c8bc90e6c87392c54724ca33388ee8 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Mar 2011 17:28:16 +0100 Subject: [PATCH 5/9] memrev variants only doing the work if the target host is big endian --- src/endian.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/endian.h b/src/endian.h index ea295ee5f..327cc570d 100644 --- a/src/endian.h +++ b/src/endian.h @@ -5,4 +5,14 @@ void memrev16(void *p); void memrev32(void *p); void memrev64(void *p); +/* variants of the function doing the actual convertion only if the target + * host is big endian */ +#if (BYTE_ORDER == LITTLE_ENDIAN) +#define memrev16ifbe(p) +#define memrev32ifbe(p) +#define memrev64ifbe(p) +#else +#define memrev16ifbe(p) memrev16(p) +#define memrev32ifbe(p) memrev32(p) +#define memrev64ifbe(p) memrev64(p) #endif From 336c82d58379205b789f3ca9cefc7c2090808fb8 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Mar 2011 17:31:02 +0100 Subject: [PATCH 6/9] zipmaps are now endianess agnostic, needed for on disk serialization of zipmaps without convertions layers --- src/Makefile | 2 +- src/endian.h | 2 ++ src/zipmap.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 4d0f86db9..acbc9b5f0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -25,7 +25,7 @@ PREFIX= /usr/local INSTALL_BIN= $(PREFIX)/bin INSTALL= cp -p -OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o dscache.o pubsub.o multi.o debug.o sort.o intset.o syncio.o diskstore.o +OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o dscache.o pubsub.o multi.o debug.o sort.o intset.o syncio.o diskstore.o endian.o BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o release.o CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o diff --git a/src/endian.h b/src/endian.h index 327cc570d..bef822727 100644 --- a/src/endian.h +++ b/src/endian.h @@ -16,3 +16,5 @@ void memrev64(void *p); #define memrev32ifbe(p) memrev32(p) #define memrev64ifbe(p) memrev64(p) #endif + +#endif diff --git a/src/zipmap.c b/src/zipmap.c index 693db7b93..9f0fc7181 100644 --- a/src/zipmap.c +++ b/src/zipmap.c @@ -80,6 +80,7 @@ #include #include #include "zmalloc.h" +#include "endian.h" #define ZIPMAP_BIGLEN 254 #define ZIPMAP_END 255 @@ -108,6 +109,7 @@ static unsigned int zipmapDecodeLength(unsigned char *p) { if (len < ZIPMAP_BIGLEN) return len; memcpy(&len,p+1,sizeof(unsigned int)); + memrev32ifbe(&len); return len; } @@ -123,6 +125,7 @@ static unsigned int zipmapEncodeLength(unsigned char *p, unsigned int len) { } else { p[0] = ZIPMAP_BIGLEN; memcpy(p+1,&len,sizeof(len)); + memrev32ifbe(p+1); return 1+sizeof(len); } } From f22043745015281f76f3d3bc7f16afe3de731af9 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Mar 2011 18:49:59 +0100 Subject: [PATCH 7/9] ziplist are now endianess agnostic --- src/ziplist.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/ziplist.c b/src/ziplist.c index 524f72388..55bb662bc 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -68,6 +68,7 @@ #include #include "zmalloc.h" #include "ziplist.h" +#include "endian.h" int ll2string(char *s, size_t len, long long value); @@ -207,6 +208,7 @@ static unsigned int zipPrevDecodeLength(unsigned char *p, unsigned int *lensize) } else { if (lensize) *lensize = 1+sizeof(len); memcpy(&len,p+1,sizeof(len)); + memrev32ifbe(&len); } return len; } @@ -223,6 +225,7 @@ static unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) { } else { p[0] = ZIP_BIGLEN; memcpy(p+1,&len,sizeof(len)); + memrev32ifbe(p+1); return 1+sizeof(len); } } @@ -234,6 +237,7 @@ static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) { if (p == NULL) return; p[0] = ZIP_BIGLEN; memcpy(p+1,&len,sizeof(len)); + memrev32ifbe(p+1); } /* Return the difference in number of bytes needed to store the new length @@ -287,12 +291,15 @@ static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encodi if (encoding == ZIP_INT_16B) { i16 = value; memcpy(p,&i16,sizeof(i16)); + memrev16ifbe(p); } else if (encoding == ZIP_INT_32B) { i32 = value; memcpy(p,&i32,sizeof(i32)); + memrev32ifbe(p); } else if (encoding == ZIP_INT_64B) { i64 = value; memcpy(p,&i64,sizeof(i64)); + memrev64ifbe(p); } else { assert(NULL); } @@ -305,12 +312,15 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) { int64_t i64, ret = 0; if (encoding == ZIP_INT_16B) { memcpy(&i16,p,sizeof(i16)); + memrev16ifbe(&i16); ret = i16; } else if (encoding == ZIP_INT_32B) { memcpy(&i32,p,sizeof(i32)); + memrev16ifbe(&i32); ret = i32; } else if (encoding == ZIP_INT_64B) { memcpy(&i64,p,sizeof(i64)); + memrev16ifbe(&i64); ret = i64; } else { assert(NULL); From dc75b1edfb2d5935246d2b3f97a04474c1296008 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Mar 2011 19:14:04 +0100 Subject: [PATCH 8/9] encoding agnostic intsets --- src/intset.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/intset.c b/src/intset.c index 13bd220e7..4dd0141d8 100644 --- a/src/intset.c +++ b/src/intset.c @@ -3,6 +3,7 @@ #include #include "intset.h" #include "zmalloc.h" +#include "endian.h" /* Note that these encodings are ordered, so: * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */ @@ -16,16 +17,29 @@ static uint8_t _intsetValueEncoding(int64_t v) { return INTSET_ENC_INT64; else if (v < INT16_MIN || v > INT16_MAX) return INTSET_ENC_INT32; - return INTSET_ENC_INT16; + else + return INTSET_ENC_INT16; } /* Return the value at pos, given an encoding. */ static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) { - if (enc == INTSET_ENC_INT64) - return ((int64_t*)is->contents)[pos]; - else if (enc == INTSET_ENC_INT32) - return ((int32_t*)is->contents)[pos]; - return ((int16_t*)is->contents)[pos]; + int64_t v64; + int32_t v32; + int16_t v16; + + if (enc == INTSET_ENC_INT64) { + memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64)); + memrev64ifbe(&v64); + return v64; + } else if (enc == INTSET_ENC_INT32) { + memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32)); + memrev32ifbe(&v32); + return v32; + } else { + memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16)); + memrev16ifbe(&v16); + return v16; + } } /* Return the value at pos, using the configured encoding. */ @@ -35,12 +49,16 @@ static int64_t _intsetGet(intset *is, int pos) { /* Set the value at pos, using the configured encoding. */ static void _intsetSet(intset *is, int pos, int64_t value) { - if (is->encoding == INTSET_ENC_INT64) + if (is->encoding == INTSET_ENC_INT64) { ((int64_t*)is->contents)[pos] = value; - else if (is->encoding == INTSET_ENC_INT32) + memrev64ifbe(((int64_t*)is->contents)+pos); + } else if (is->encoding == INTSET_ENC_INT32) { ((int32_t*)is->contents)[pos] = value; - else + memrev32ifbe(((int32_t*)is->contents)+pos); + } else { ((int16_t*)is->contents)[pos] = value; + memrev16ifbe(((int16_t*)is->contents)+pos); + } } /* Create an empty intset. */ From 7493d2a0325fe33dc75317bfedf9b1c1e5b0d0b5 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 10 Mar 2011 16:39:19 +0100 Subject: [PATCH 9/9] fixed diskstore race condition --- src/dscache.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/dscache.c b/src/dscache.c index 2dbc1134d..cbe9bb016 100644 --- a/src/dscache.c +++ b/src/dscache.c @@ -890,8 +890,16 @@ int waitForSwappedKey(redisClient *c, robj *key) { listAddNodeTail(l,c); /* Are we already loading the key from disk? If not create a job */ - if (de == NULL) - cacheScheduleIO(c->db,key,REDIS_IO_LOAD); + if (de == NULL) { + int flags = cacheScheduleIOGetFlags(c->db,key); + + /* It is possible that even if there are no clients waiting for + * a load operation, still we have a load operation in progress. + * For instance think to a client performing a GET and then + * closing the connection */ + if ((flags & (REDIS_IO_LOAD|REDIS_IO_LOADINPROG)) == 0) + cacheScheduleIO(c->db,key,REDIS_IO_LOAD); + } return 1; }