Cluster: new option to work with partial slots coverage.

This commit is contained in:
antirez 2014-09-17 11:10:09 +02:00
parent a2c740ea93
commit c89afc8e5d
6 changed files with 40 additions and 6 deletions

View File

@ -660,6 +660,19 @@ lua-time-limit 5000
#
# cluster-migration-barrier 1
# By default Redis Cluster nodes stop accepting queries if they detect there
# is at least an hash slot uncovered (no available node is serving it).
# This way if the cluster is partially down (for example a range of hash slots
# are no longer covered) all the cluster becomes, eventually, unavailable.
# It automatically returns available as soon as all the slots are covered again.
#
# However sometimes you want the subset of the cluster which is working,
# to continue to accept queries for the part of the key space that is still
# covered. In order to do so, just set the cluster-require-full-coverage
# option to no.
#
# cluster-require-full-coverage yes
# In order to setup your cluster make sure to read the documentation
# available at http://redis.io web site.

View File

@ -3171,12 +3171,14 @@ void clusterUpdateState(void) {
new_state = REDIS_CLUSTER_OK;
/* Check if all the slots are covered. */
for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
if (server.cluster->slots[j] == NULL ||
server.cluster->slots[j]->flags & (REDIS_NODE_FAIL))
{
new_state = REDIS_CLUSTER_FAIL;
break;
if (server.cluster_require_full_coverage) {
for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
if (server.cluster->slots[j] == NULL ||
server.cluster->slots[j]->flags & (REDIS_NODE_FAIL))
{
new_state = REDIS_CLUSTER_FAIL;
break;
}
}
}

View File

@ -15,6 +15,7 @@
* multiplicators of the node timeout value (when ending with MULT). */
#define REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT 15000
#define REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY 10 /* Slave max data age factor. */
#define REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE 1
#define REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
#define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
#define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */

View File

@ -429,6 +429,13 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"cluster-config-file") && argc == 2) {
zfree(server.cluster_configfile);
server.cluster_configfile = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"cluster-require-full-coverage") &&
argc == 2)
{
if ((server.cluster_require_full_coverage = yesnotoi(argv[1])) == -1)
{
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"cluster-node-timeout") && argc == 2) {
server.cluster_node_timeout = strtoll(argv[1],NULL,10);
if (server.cluster_node_timeout <= 0) {
@ -918,6 +925,11 @@ void configSetCommand(redisClient *c) {
ll < 0) goto badfmt;
server.repl_min_slaves_max_lag = ll;
refreshGoodSlavesCount();
} else if (!strcasecmp(c->argv[2]->ptr,"cluster-require-full-coverage")) {
int yn = yesnotoi(o->ptr);
if (yn == -1) goto badfmt;
server.cluster_require_full_coverage = yn;
} else if (!strcasecmp(c->argv[2]->ptr,"cluster-node-timeout")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
ll <= 0) goto badfmt;
@ -1039,6 +1051,8 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("cluster-slave-validity-factor",server.cluster_slave_validity_factor);
/* Bool (yes/no) values */
config_get_bool_field("cluster-require-full-coverage",
server.cluster_require_full_coverage);
config_get_bool_field("no-appendfsync-on-rewrite",
server.aof_no_fsync_on_rewrite);
config_get_bool_field("slave-serve-stale-data",
@ -1806,6 +1820,7 @@ int rewriteConfig(char *path) {
rewriteConfigNumericalOption(state,"lua-time-limit",server.lua_time_limit,REDIS_LUA_TIME_LIMIT);
rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0);
rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE);
rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT);
rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER);
rewriteConfigNumericalOption(state,"cluster-slave-validity-factor",server.cluster_slave_validity_factor,REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY);

View File

@ -1451,6 +1451,7 @@ void initServerConfig(void) {
server.cluster_node_timeout = REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT;
server.cluster_migration_barrier = REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER;
server.cluster_slave_validity_factor = REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY;
server.cluster_require_full_coverage = REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE;
server.cluster_configfile = zstrdup(REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
server.lua_caller = NULL;
server.lua_time_limit = REDIS_LUA_TIME_LIMIT;

View File

@ -858,6 +858,8 @@ struct redisServer {
struct clusterState *cluster; /* State of the cluster */
int cluster_migration_barrier; /* Cluster replicas migration barrier. */
int cluster_slave_validity_factor; /* Slave max data age for failover. */
int cluster_require_full_coverage; /* If true, put the cluster down if
there is at least an uncovered slot. */
/* Scripting */
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
redisClient *lua_client; /* The "fake client" to query Redis from Lua */