Skip to content

Commit 7ab7a51

Browse files
SOLR-18011: Allow locked Admin APIs to call other locked Admin APIs (#3916)
1 parent fb7377c commit 7ab7a51

30 files changed

Lines changed: 860 additions & 86 deletions
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
2+
title: Allow locked Admin APIs to call other locked AdminAPIs. These locked Admin APIs can only call other APIs on the same resource tree (Collection > Shard > Replica) to protect against deadlocks.
3+
type: changed # added, changed, fixed, deprecated, removed, dependency_update, security, other
4+
authors:
5+
- name: Houston Putman
6+
nick: HoustonPutman
7+
links:
8+
- name: SOLR-18011
9+
url: https://issues.apache.org/jira/browse/SOLR-18011

solr/core/src/java/org/apache/solr/api/V2HttpCall.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.solr.api;
1919

2020
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
21+
import static org.apache.solr.common.params.CollectionAdminParams.CALLING_LOCK_ID_HEADER;
2122
import static org.apache.solr.servlet.HttpSolrCall.Action.ADMIN;
2223
import static org.apache.solr.servlet.HttpSolrCall.Action.ADMIN_OR_REMOTEPROXY;
2324
import static org.apache.solr.servlet.HttpSolrCall.Action.PROCESS;
@@ -212,6 +213,11 @@ private void initAdminRequest(String path) throws Exception {
212213
solrReq.getContext().put(CoreContainer.class.getName(), cores);
213214
requestType = AuthorizationContext.RequestType.ADMIN;
214215
action = ADMIN;
216+
217+
String callingLockId = req.getHeader(CALLING_LOCK_ID_HEADER);
218+
if (callingLockId != null && !callingLockId.isBlank()) {
219+
solrReq.getContext().put(CALLING_LOCK_ID_HEADER, callingLockId);
220+
}
215221
}
216222

217223
protected void parseRequest() throws Exception {

solr/core/src/java/org/apache/solr/cloud/DistributedCollectionLockFactory.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public interface DistributedCollectionLockFactory {
5454
* @param replicaName is ignored and can be {@code null} if {@code level} is {@link
5555
* org.apache.solr.common.params.CollectionParams.LockLevel#COLLECTION} or {@link
5656
* org.apache.solr.common.params.CollectionParams.LockLevel#SHARD}
57+
* @param callingLockId the lockId from the caller that should be mirrored by this lock
5758
* @return a lock instance that must be {@link DistributedLock#release()}'ed in a {@code finally},
5859
* regardless of the lock having been acquired or not.
5960
*/
@@ -62,5 +63,6 @@ DistributedLock createLock(
6263
CollectionParams.LockLevel level,
6364
String collName,
6465
String shardId,
65-
String replicaName);
66+
String replicaName,
67+
String callingLockId);
6668
}

solr/core/src/java/org/apache/solr/cloud/DistributedLock.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,8 @@ public interface DistributedLock {
2424
void release();
2525

2626
boolean isAcquired();
27+
28+
String getLockId();
29+
30+
boolean isMirroringLock();
2731
}

solr/core/src/java/org/apache/solr/cloud/DistributedMultiLock.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
import com.google.common.annotations.VisibleForTesting;
2121
import java.lang.invoke.MethodHandles;
2222
import java.util.List;
23+
import java.util.stream.Collectors;
2324
import org.apache.solr.common.SolrException;
25+
import org.apache.solr.common.util.StrUtils;
2426
import org.slf4j.Logger;
2527
import org.slf4j.LoggerFactory;
2628

@@ -46,7 +48,12 @@ public void waitUntilAcquired() {
4648
for (DistributedLock lock : locks) {
4749
log.debug("DistributedMultiLock.waitUntilAcquired. About to wait on lock {}", lock);
4850
lock.waitUntilAcquired();
49-
log.debug("DistributedMultiLock.waitUntilAcquired. Acquired lock {}", lock);
51+
if (lock.isMirroringLock()) {
52+
log.debug(
53+
"DistributedMultiLock.waitUntilAcquired. Mirroring already-acquired lock {}", lock);
54+
} else {
55+
log.debug("DistributedMultiLock.waitUntilAcquired. Acquired lock {}", lock);
56+
}
5057
}
5158
}
5259

@@ -70,6 +77,17 @@ public boolean isAcquired() {
7077
return true;
7178
}
7279

80+
public String getLockId() {
81+
return locks.stream().map(DistributedLock::getLockId).collect(Collectors.joining(","));
82+
}
83+
84+
public static List<String> splitLockIds(String lockIds) {
85+
if (StrUtils.isBlank(lockIds)) {
86+
return List.of();
87+
}
88+
return List.of(lockIds.split(","));
89+
}
90+
7391
@VisibleForTesting
7492
public int getCountInternalLocks() {
7593
return locks.size();

solr/core/src/java/org/apache/solr/cloud/LockTree.java

Lines changed: 107 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@
2121
import java.util.ArrayDeque;
2222
import java.util.HashMap;
2323
import java.util.List;
24+
import java.util.Locale;
2425
import java.util.Map;
26+
import java.util.UUID;
2527
import org.apache.solr.cloud.OverseerMessageHandler.Lock;
28+
import org.apache.solr.common.SolrException;
2629
import org.apache.solr.common.params.CollectionParams;
2730
import org.apache.solr.common.params.CollectionParams.LockLevel;
2831
import org.apache.solr.common.util.StrUtils;
@@ -38,20 +41,36 @@ public class LockTree {
3841
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
3942
private final Node root = new Node(null, LockLevel.CLUSTER, null);
4043

44+
public final Map<String, Lock> allLocks = new HashMap<>();
45+
4146
private class LockImpl implements Lock {
4247
final Node node;
48+
final String id;
4349

4450
LockImpl(Node node) {
4551
this.node = node;
52+
this.id = UUID.randomUUID().toString();
4653
}
4754

4855
@Override
4956
public void unlock() {
5057
synchronized (LockTree.this) {
51-
node.unlock(this);
58+
if (node.unlock(this)) {
59+
allLocks.remove(id);
60+
}
5261
}
5362
}
5463

64+
@Override
65+
public String id() {
66+
return id;
67+
}
68+
69+
@Override
70+
public boolean validateSubpath(int lockLevel, List<String> path) {
71+
return node.validateSubpath(lockLevel, path);
72+
}
73+
5574
@Override
5675
public String toString() {
5776
return StrUtils.join(node.constructPath(new ArrayDeque<>()), '/');
@@ -71,12 +90,43 @@ public String toString() {
7190
public class Session {
7291
private SessionNode root = new SessionNode(LockLevel.CLUSTER);
7392

74-
public Lock lock(CollectionParams.CollectionAction action, List<String> path) {
93+
public Lock lock(
94+
CollectionParams.CollectionAction action, List<String> path, String callingLockId) {
7595
if (action.lockLevel == LockLevel.NONE) return FREELOCK;
96+
Node startingNode = LockTree.this.root;
97+
SessionNode startingSession = root;
98+
99+
// If a callingLockId was passed in, validate it with the current lock path, and only start
100+
// locking below the calling lock
101+
Lock callingLock = StrUtils.isBlank(callingLockId) ? null : allLocks.get(callingLockId);
102+
log.debug("Calling lock id: {}, level: {}", callingLockId, callingLock);
103+
boolean reuseCurrentLock = false;
104+
if (callingLock != null) {
105+
if (callingLock.validateSubpath(action.lockLevel.getHeight(), path)) {
106+
startingNode = ((LockImpl) callingLock).node;
107+
startingSession = startingSession.find(startingNode.level.getHeight(), path);
108+
if (startingSession == null) {
109+
startingSession = root;
110+
}
111+
reuseCurrentLock = true;
112+
} else {
113+
throw new SolrException(
114+
SolrException.ErrorCode.SERVER_ERROR,
115+
String.format(
116+
Locale.ROOT,
117+
"Cannot lock an action under a different path than the calling action. Calling action lock path: %s, Requested action lock path: %s",
118+
callingLock,
119+
String.join("/", path)));
120+
}
121+
}
76122
synchronized (LockTree.this) {
77-
if (root.isBusy(action.lockLevel, path)) return null;
78-
Lock lockObject = LockTree.this.root.lock(action.lockLevel, path);
79-
if (lockObject == null) root.markBusy(action.lockLevel, path);
123+
if (startingSession.isBusy(action.lockLevel, path)) return null;
124+
Lock lockObject = startingNode.lock(action.lockLevel, path, reuseCurrentLock);
125+
if (lockObject == null) {
126+
startingSession.markBusy(action.lockLevel, path);
127+
} else {
128+
allLocks.put(lockObject.id(), lockObject);
129+
}
80130
return lockObject;
81131
}
82132
}
@@ -125,6 +175,18 @@ boolean isBusy(LockLevel lockLevel, List<String> path) {
125175
return false;
126176
}
127177
}
178+
179+
SessionNode find(int lockLevel, List<String> path) {
180+
if (level.getHeight() == lockLevel) {
181+
return this;
182+
} else if (level.getHeight() < lockLevel
183+
&& kids != null
184+
&& kids.containsKey(path.get(level.getHeight()))) {
185+
return kids.get(path.get(level.getHeight())).find(lockLevel, path);
186+
} else {
187+
return null;
188+
}
189+
}
128190
}
129191

130192
public Session getSession() {
@@ -135,6 +197,7 @@ private class Node {
135197
final String name;
136198
final Node mom;
137199
final LockLevel level;
200+
int refCount = 0;
138201
HashMap<String, Node> children = new HashMap<>();
139202
LockImpl myLock;
140203

@@ -151,36 +214,70 @@ boolean isLocked() {
151214
return false;
152215
}
153216

154-
void unlock(LockImpl lockObject) {
217+
boolean unlock(LockImpl lockObject) {
218+
if (--refCount > 0) {
219+
return false;
220+
}
155221
if (myLock == lockObject) myLock = null;
156222
else {
157223
log.info("Unlocked multiple times : {}", lockObject);
158224
}
225+
return true;
159226
}
160227

161-
Lock lock(LockLevel lockLevel, List<String> path) {
162-
if (myLock != null) return null; // I'm already locked. no need to go any further
228+
Lock lock(LockLevel lockLevel, List<String> path, boolean reuseCurrentLock) {
229+
if (myLock != null && !reuseCurrentLock) {
230+
// I'm already locked. no need to go any further
231+
return null;
232+
}
163233
if (lockLevel == level) {
164234
// lock is supposed to be acquired at this level
235+
if (myLock != null && reuseCurrentLock) {
236+
// I am already locked, and I want to be re-used
237+
refCount++;
238+
return myLock;
239+
}
165240
// If I am locked or any of my children or grandchildren are locked
166241
// it is not possible to acquire a lock
167242
if (isLocked()) return null;
243+
refCount++;
168244
return myLock = new LockImpl(this);
169245
} else {
170246
String childName = path.get(level.getHeight());
171247
Node child = children.get(childName);
172248
if (child == null)
173249
children.put(childName, child = new Node(childName, level.getChild(), this));
174-
return child.lock(lockLevel, path);
250+
return child.lock(lockLevel, path, false);
175251
}
176252
}
177253

254+
boolean validateSubpath(int lockLevel, List<String> path) {
255+
return level.getHeight() <= lockLevel
256+
&& (level.getHeight() == 0 || name.equals(path.get(level.getHeight() - 1)))
257+
&& (mom == null || mom.validateSubpath(lockLevel, path));
258+
}
259+
178260
ArrayDeque<String> constructPath(ArrayDeque<String> collect) {
179261
if (name != null) collect.addFirst(name);
180262
if (mom != null) mom.constructPath(collect);
181263
return collect;
182264
}
183265
}
184266

185-
static final Lock FREELOCK = () -> {};
267+
static final String FREELOCK_ID = "-1";
268+
static final Lock FREELOCK =
269+
new Lock() {
270+
@Override
271+
public void unlock() {}
272+
273+
@Override
274+
public String id() {
275+
return FREELOCK_ID;
276+
}
277+
278+
@Override
279+
public boolean validateSubpath(int lockLevel, List<String> path) {
280+
return false;
281+
}
282+
};
186283
}

solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import java.lang.invoke.MethodHandles;
2323
import java.util.HashSet;
24+
import java.util.List;
2425
import java.util.Set;
2526
import org.apache.solr.common.SolrException;
2627
import org.apache.solr.common.SolrException.ErrorCode;
@@ -61,7 +62,7 @@ public OverseerConfigSetMessageHandler(ZkStateReader zkStateReader, CoreContaine
6162
}
6263

6364
@Override
64-
public OverseerSolrResponse processMessage(ZkNodeProps message, String operation) {
65+
public OverseerSolrResponse processMessage(ZkNodeProps message, String operation, Lock lock) {
6566
NamedList<Object> results = new NamedList<>();
6667
try {
6768
if (!operation.startsWith(CONFIGSETS_ACTION_PREFIX)) {
@@ -113,11 +114,26 @@ public String getTimerName(String operation) {
113114
}
114115

115116
@Override
116-
public Lock lockTask(ZkNodeProps message, long ignored) {
117+
public Lock lockTask(ZkNodeProps message, long ignored, String callingLockId) {
117118
String configSetName = getTaskKey(message);
118119
if (canExecute(configSetName, message)) {
119120
markExclusiveTask(configSetName, message);
120-
return () -> unmarkExclusiveTask(configSetName, message);
121+
return new Lock() {
122+
@Override
123+
public void unlock() {
124+
unmarkExclusiveTask(configSetName, message);
125+
}
126+
127+
@Override
128+
public String id() {
129+
return configSetName;
130+
}
131+
132+
@Override
133+
public boolean validateSubpath(int lockLevel, List<String> path) {
134+
return false;
135+
}
136+
};
121137
}
122138
return null;
123139
}

solr/core/src/java/org/apache/solr/cloud/OverseerMessageHandler.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.apache.solr.cloud;
1818

19+
import java.util.List;
1920
import org.apache.solr.common.cloud.ZkNodeProps;
2021

2122
/** Interface for processing messages received by an {@link OverseerTaskProcessor} */
@@ -26,7 +27,7 @@ public interface OverseerMessageHandler {
2627
* @param operation the operation to process
2728
* @return response
2829
*/
29-
OverseerSolrResponse processMessage(ZkNodeProps message, String operation);
30+
OverseerSolrResponse processMessage(ZkNodeProps message, String operation, Lock lock);
3031

3132
/**
3233
* @return the name of the OverseerMessageHandler
@@ -41,14 +42,18 @@ public interface OverseerMessageHandler {
4142

4243
interface Lock {
4344
void unlock();
45+
46+
String id();
47+
48+
boolean validateSubpath(int lockLevel, List<String> path);
4449
}
4550

4651
/**
4752
* Grabs an exclusive lock for this particular task.
4853
*
4954
* @return <code>null</code> if locking is not possible.
5055
*/
51-
Lock lockTask(ZkNodeProps message, long batchSessionId);
56+
Lock lockTask(ZkNodeProps message, long batchSessionId, String callingLockId);
5257

5358
/**
5459
* @param message the message being processed

0 commit comments

Comments
 (0)