From ccebe673ad807aa204a0a0a0dd0e0c3e67fc7843 Mon Sep 17 00:00:00 2001 From: Yaron Sananes Date: Thu, 14 May 2026 18:20:58 +0300 Subject: [PATCH 1/2] Fix deferred freeClient clobbering replication state after replicaof Since PR #3324, freeClient() on a primary client with pending IO is deferred via freeClientAsync. The deferred free eventually chains through replicationCachePrimary() -> replicationHandlePrimaryDisconnection(), which unconditionally set repl_state = REPL_STATE_CONNECT. This causes two bugs: 1. REPLICAOF NO ONE: primary_host is NULL when the deferred free runs, so replicationCron calls connectWithPrimary(NULL) -> SIGSEGV in connTLSConnect (inet_pton with NULL addr). 2. REPLICAOF newhost newport: the deferred free clobbers the already- progressed repl_state (CONNECTING) back to CONNECT, causing replicationCron to call connectWithPrimary() again, which overwrites server.repl_transfer_s without closing the previous connection (FD leak). Fix by making replicationHandlePrimaryDisconnection() only transition to REPL_STATE_CONNECT when repl_state is still REPL_STATE_CONNECTED (meaning this is a genuine disconnect, not a stale deferred free). If repl_state has already moved on, the deferred free is stale and should not mutate the state machine. Additionally: - Add NULL check for addr in connTLSConnect() as defense in depth. - Add 10s timeout to the WAITAOF test to prevent indefinite hanging. - Add dedicated tests for the repoint scenario. Signed-off-by: Yaron Sananes --- src/debug.c | 3 +++ src/networking.c | 8 ++++++++ src/replication.c | 22 ++++++++++++++++++--- src/server.c | 1 + src/server.h | 1 + src/tls.c | 1 + tests/unit/wait.tcl | 48 ++++++++++++++++++++++++++++++++++++++++++++- 7 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/debug.c b/src/debug.c index c0a760d58f8..daa676a59d0 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1066,6 +1066,9 @@ void debugCommand(client *c) { } else if (!strcasecmp(objectGetVal(c->argv[1]), "client-enforce-reply-list") && c->argc == 3) { server.debug_client_enforce_reply_list = atoi(objectGetVal(c->argv[2])); addReply(c, shared.ok); + } else if (!strcasecmp(objectGetVal(c->argv[1]), "force-free-primary-async") && c->argc == 3) { + server.debug_force_free_primary_async = atoi(objectGetVal(c->argv[2])); + addReply(c, shared.ok); } else if (!handleDebugClusterCommand(c)) { addReplySubcommandSyntaxError(c); return; diff --git a/src/networking.c b/src/networking.c index 28443270428..a5837d7b798 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2102,6 +2102,14 @@ int freeClient(client *c) { return 0; } + /* Debug: force async free for the primary client to deterministically + * reproduce the deferred-free replication state clobber race. */ + if (server.debug_force_free_primary_async && c->flag.primary) { + server.debug_force_free_primary_async = 0; + freeClientAsync(c); + return 0; + } + /* For connected clients, call the disconnection event of modules hooks. */ if (c->conn) { moduleFireServerEvent(VALKEYMODULE_EVENT_CLIENT_CHANGE, VALKEYMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED, c); diff --git a/src/replication.c b/src/replication.c index e32f8e71182..8cc365d9eb1 100644 --- a/src/replication.c +++ b/src/replication.c @@ -4560,15 +4560,31 @@ void replicationHandlePrimaryDisconnection(void) { moduleFireServerEvent(VALKEYMODULE_EVENT_PRIMARY_LINK_CHANGE, VALKEYMODULE_SUBEVENT_PRIMARY_LINK_DOWN, NULL); server.primary = NULL; - server.repl_state = REPL_STATE_CONNECT; - server.repl_down_since = server.unixtime; + + /* freeClient(primary) can be deferred via freeClientAsync when the client + * has pending IO. By the time we run in that deferred context, + * replicationUnsetPrimary()/replicationSetPrimary() may have already + * finalized replication state. Only transition to REPL_STATE_CONNECT if + * we were genuinely connected (REPL_STATE_CONNECTED) and primary_host is + * still set. Otherwise this is a stale deferred free and we must not + * clobber the current state. */ + if (server.repl_state == REPL_STATE_CONNECTED && server.primary_host) { + server.repl_state = REPL_STATE_CONNECT; + server.repl_down_since = server.unixtime; + } else if (server.repl_state == REPL_STATE_CONNECTED) { + /* primary_host is NULL: deliberate unset in progress. */ + server.repl_state = REPL_STATE_NONE; + } + /* Any other repl_state means the state machine already moved on + * (e.g. REPL_STATE_CONNECT, CONNECTING, NONE) — leave it untouched. */ + /* We lost connection with our primary, don't disconnect replicas yet, * maybe we'll be able to PSYNC with our primary later. We'll disconnect * the replicas only if we'll have to do a full resync with our primary. */ /* Try to re-connect immediately rather than wait for replicationCron * waiting 1 second may risk backlog being recycled. */ - if (server.primary_host) { + if (server.repl_state == REPL_STATE_CONNECT && server.primary_host) { serverLog(LL_NOTICE, "Reconnecting to PRIMARY %s:%d", server.primary_host, server.primary_port); connectWithPrimary(); } diff --git a/src/server.c b/src/server.c index e1cef181ff5..1b9d00706d3 100644 --- a/src/server.c +++ b/src/server.c @@ -2967,6 +2967,7 @@ void initServer(void) { server.reply_buffer_resizing_enabled = 1; server.client_mem_usage_buckets = NULL; server.debug_client_enforce_reply_list = 0; + server.debug_force_free_primary_async = 0; resetReplicationBuffer(); /* Make sure the locale is set on startup based on the config file. */ diff --git a/src/server.h b/src/server.h index 0211264de4c..aa4c8050a00 100644 --- a/src/server.h +++ b/src/server.h @@ -1853,6 +1853,7 @@ struct valkeyServer { int enable_module_cmd; /* Enable MODULE commands, see PROTECTED_ACTION_ALLOWED_* */ int enable_debug_assert; /* Enable debug asserts */ int debug_client_enforce_reply_list; /* Force client to always use the reply list */ + int debug_force_free_primary_async; /* Force freeClient on primary to use async path */ /* Reply construction copy avoidance */ int min_io_threads_copy_avoid; /* Minimum number of IO threads for copy avoidance in reply construction */ int min_string_size_copy_avoid_threaded; /* Minimum bulk string size for copy avoidance in reply construction when IO threads enabled */ diff --git a/src/tls.c b/src/tls.c index 1d85131c1f3..e75b6f0d407 100644 --- a/src/tls.c +++ b/src/tls.c @@ -1603,6 +1603,7 @@ static int connTLSConnect(connection *conn_, unsigned char addr_buf[sizeof(struct in6_addr)]; if (conn->c.state != CONN_STATE_NONE) return C_ERR; + if (addr == NULL) return C_ERR; ERR_clear_error(); /* Check whether addr is an IP address, if not, use the value for Server Name Indication */ diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl index 34bea8014af..c9727f02fdf 100644 --- a/tests/unit/wait.tcl +++ b/tests/unit/wait.tcl @@ -343,7 +343,7 @@ tags {"wait aof network external:skip"} { set rd [valkey_deferring_client -1] $rd incr foo $rd read - $rd waitaof 0 1 0 + $rd waitaof 0 1 10000 wait_for_blocked_client -1 $replica replicaof $master_host $master_port assert_equal [$rd read] {1 1} @@ -532,3 +532,49 @@ start_server {} { } } } + +start_server {tags {"wait network external:skip"}} { +start_server {} { +start_server {} { + set master1 [srv -2 client] + set master1_host [srv -2 host] + set master1_port [srv -2 port] + + set master2 [srv -1 client] + set master2_host [srv -1 host] + set master2_port [srv -1 port] + + set replica [srv 0 client] + + test {Repoint replica with deferred freeClient does not double-connect} { + $replica replicaof $master1_host $master1_port + wait_for_condition 50 100 { + [s 0 master_link_status] eq {up} + } else { + fail "Replication to master1 not established" + } + + $replica debug force-free-primary-async 1 + + set log_lines [count_log_lines 0] + $replica replicaof $master2_host $master2_port + wait_for_condition 50 200 { + [s 0 master_link_status] eq {up} + } else { + fail "Replication to master2 not established after repoint" + } + after 2000 + + set logfile [srv 0 stdout] + set lines [split [exec tail -n +[expr {$log_lines + 1}] < $logfile] "\n"] + set sync_count 0 + foreach line $lines { + if {[string match "*Connecting to PRIMARY $master2_host:$master2_port*" $line]} { + incr sync_count + } + } + assert_equal 1 $sync_count + } +} +} +} From 5512e0434d8f3d938d31f30799a389d60e4e5f32 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 15 May 2026 09:41:46 +0300 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Ran Shidlansik Signed-off-by: Ran Shidlansik --- src/replication.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/replication.c b/src/replication.c index 8cc365d9eb1..13cbc4183d6 100644 --- a/src/replication.c +++ b/src/replication.c @@ -4574,6 +4574,7 @@ void replicationHandlePrimaryDisconnection(void) { } else if (server.repl_state == REPL_STATE_CONNECTED) { /* primary_host is NULL: deliberate unset in progress. */ server.repl_state = REPL_STATE_NONE; + server.repl_down_since = server.unixtime; } /* Any other repl_state means the state machine already moved on * (e.g. REPL_STATE_CONNECT, CONNECTING, NONE) — leave it untouched. */