From 0d1c5099e55721d798283ddc162949ccc6c4e35c Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Fri, 5 Jun 2026 03:47:26 +0100
Subject: [PATCH 1/4] Lock sliding sync connections when inserting lazy members
 to prevent deadlocks

---
 .../storage/databases/main/sliding_sync.py    | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/synapse/storage/databases/main/sliding_sync.py b/synapse/storage/databases/main/sliding_sync.py
index 9a09c0f9b54..55c360cbf44 100644
--- a/synapse/storage/databases/main/sliding_sync.py
+++ b/synapse/storage/databases/main/sliding_sync.py
@@ -673,6 +673,29 @@ def _persist_sliding_sync_connection_lazy_members_txn(
 
         now = self.clock.time_msec()
 
+        if isinstance(self.database_engine, PostgresEngine):
+            # If the update is large and there are multiple overlapping
+            # updates going on,
+            # multiple transactions can try to INSERT INTO sliding_sync_connection_lazy_members
+            # with conflicting tuples on
+            #     "sliding_sync_connection_lazy_members_idx" UNIQUE, btree
+            #     (connection_key, room_id, user_id)
+            # This causes a deadlock and eventually a DeadlockDetected error.
+            # The transactions get tried again and again and do not make progress.
+            # To avoid this, take a lock on the connection upfront.
+            # https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-ROWS
+            # (We could also consider sorting our insertions, but not clear if Postgres
+            # guarantees to preservee the insertion order)
+            txn.execute(
+                """
+                SELECT 1
+                FROM sliding_sync_connections
+                WHERE connection_key = ?
+                FOR NO KEY UPDATE
+                """,
+                (connection_key,),
+            )
+
         # Figure out which cache entries to add or update.
         #
         # These are either a) new entries we've never sent before (i.e. with a

From b1c313e573101379960bdcdbbccb12d4f407a232 Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Fri, 5 Jun 2026 03:56:14 +0100
Subject: [PATCH 2/4] Newsfile

Signed-off-by: Olivier 'reivilibre <oliverw@matrix.org>
---
 changelog.d/19826.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/19826.bugfix

diff --git a/changelog.d/19826.bugfix b/changelog.d/19826.bugfix
new file mode 100644
index 00000000000..771b197b205
--- /dev/null
+++ b/changelog.d/19826.bugfix
@@ -0,0 +1 @@
+Lock Sliding Sync connections when inserting lazy members, to prevent repeated deadlocks.
\ No newline at end of file

From 57a4d7cc81bec94af7589af6477aea1f23b4cd0e Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Fri, 5 Jun 2026 14:10:57 +0100
Subject: [PATCH 3/4] Hoist lock to entrypoint of outer txn

---
 .../storage/databases/main/sliding_sync.py    | 49 ++++++++++---------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/synapse/storage/databases/main/sliding_sync.py b/synapse/storage/databases/main/sliding_sync.py
index 55c360cbf44..b959169b9ed 100644
--- a/synapse/storage/databases/main/sliding_sync.py
+++ b/synapse/storage/databases/main/sliding_sync.py
@@ -204,6 +204,32 @@ def persist_per_connection_state_txn(
                 raise SlidingSyncUnknownPosition()
 
             (connection_key,) = row
+
+            if isinstance(self.database_engine, PostgresEngine):
+                # Lock the sliding sync connection row for update upfront,
+                # to prevent deadlocks between concurrent transactions
+                # (which can retry again and again without making progress).
+                #
+                # (We don't need to explicitly lock in the other branch,
+                # where we re-create the connection, as that implies a lock
+                # anyway)
+                #
+                # Specifically, the statements seen to deadlock against
+                # each other were
+                # `INSERT INTO sliding_sync_connection_lazy_members`
+                # with conflicting tuples on
+                #     "sliding_sync_connection_lazy_members_idx" UNIQUE, btree
+                #     (connection_key, room_id, user_id)
+                # https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-ROWS
+                txn.execute(
+                    """
+                    SELECT 1
+                    FROM sliding_sync_connections
+                    WHERE connection_key = ?
+                    FOR NO KEY UPDATE
+                    """,
+                    (connection_key,),
+                )
         else:
             # We're restarting the connection, so we clear the previous existing data we
             # used to track it. We do this here to ensure that if we get lots of
@@ -673,29 +699,6 @@ def _persist_sliding_sync_connection_lazy_members_txn(
 
         now = self.clock.time_msec()
 
-        if isinstance(self.database_engine, PostgresEngine):
-            # If the update is large and there are multiple overlapping
-            # updates going on,
-            # multiple transactions can try to INSERT INTO sliding_sync_connection_lazy_members
-            # with conflicting tuples on
-            #     "sliding_sync_connection_lazy_members_idx" UNIQUE, btree
-            #     (connection_key, room_id, user_id)
-            # This causes a deadlock and eventually a DeadlockDetected error.
-            # The transactions get tried again and again and do not make progress.
-            # To avoid this, take a lock on the connection upfront.
-            # https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-ROWS
-            # (We could also consider sorting our insertions, but not clear if Postgres
-            # guarantees to preservee the insertion order)
-            txn.execute(
-                """
-                SELECT 1
-                FROM sliding_sync_connections
-                WHERE connection_key = ?
-                FOR NO KEY UPDATE
-                """,
-                (connection_key,),
-            )
-
         # Figure out which cache entries to add or update.
         #
         # These are either a) new entries we've never sent before (i.e. with a

From 4374b7b9a63b8f32771670b961c30fba05f9203c Mon Sep 17 00:00:00 2001
From: Olivier 'reivilibre <oliverw@matrix.org>
Date: Mon, 15 Jun 2026 13:49:03 +0100
Subject: [PATCH 4/4] Put lock clause in initial query

---
 .../storage/databases/main/sliding_sync.py    | 48 ++++++++-----------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/synapse/storage/databases/main/sliding_sync.py b/synapse/storage/databases/main/sliding_sync.py
index b959169b9ed..d0943002b6d 100644
--- a/synapse/storage/databases/main/sliding_sync.py
+++ b/synapse/storage/databases/main/sliding_sync.py
@@ -186,15 +186,35 @@ def persist_per_connection_state_txn(
         # First we fetch (or create) the connection key associated with the
         # previous connection position.
         if previous_connection_position is not None:
+            lock_clause = ""
+            if isinstance(self.database_engine, PostgresEngine):
+                # Lock the sliding sync connection row for update upfront,
+                # to prevent deadlocks between concurrent transactions
+                # (which can retry again and again without making progress).
+                #
+                # (We don't need to explicitly lock in the other branch,
+                # where we re-create the connection, as that implies a lock
+                # anyway)
+                #
+                # Specifically, the statements seen to deadlock against
+                # each other were
+                # `INSERT INTO sliding_sync_connection_lazy_members`
+                # with conflicting tuples on
+                #     "sliding_sync_connection_lazy_members_idx" UNIQUE, btree
+                #     (connection_key, room_id, user_id)
+                # https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-ROWS
+                lock_clause = "FOR NO KEY UPDATE OF sliding_sync_connections"
+
             # The `previous_connection_position` is a user-supplied value, so we
             # need to make sure that the one they supplied is actually theirs.
-            sql = """
+            sql = f"""
                 SELECT connection_key
                 FROM sliding_sync_connection_positions
                 INNER JOIN sliding_sync_connections USING (connection_key)
                 WHERE
                     connection_position = ?
                     AND user_id = ? AND effective_device_id = ? AND conn_id = ?
+                {lock_clause}
             """
             txn.execute(
                 sql, (previous_connection_position, user_id, device_id, conn_id)
@@ -204,32 +224,6 @@ def persist_per_connection_state_txn(
                 raise SlidingSyncUnknownPosition()
 
             (connection_key,) = row
-
-            if isinstance(self.database_engine, PostgresEngine):
-                # Lock the sliding sync connection row for update upfront,
-                # to prevent deadlocks between concurrent transactions
-                # (which can retry again and again without making progress).
-                #
-                # (We don't need to explicitly lock in the other branch,
-                # where we re-create the connection, as that implies a lock
-                # anyway)
-                #
-                # Specifically, the statements seen to deadlock against
-                # each other were
-                # `INSERT INTO sliding_sync_connection_lazy_members`
-                # with conflicting tuples on
-                #     "sliding_sync_connection_lazy_members_idx" UNIQUE, btree
-                #     (connection_key, room_id, user_id)
-                # https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-ROWS
-                txn.execute(
-                    """
-                    SELECT 1
-                    FROM sliding_sync_connections
-                    WHERE connection_key = ?
-                    FOR NO KEY UPDATE
-                    """,
-                    (connection_key,),
-                )
         else:
             # We're restarting the connection, so we clear the previous existing data we
             # used to track it. We do this here to ensure that if we get lots of