Skip to content

Commit d609706

Browse files
authored
SOLR-17877: Introduce cluster property overseerEnabled, replacing dual booleans (#3524)
The SolrCloud Overseer is disable-able, in lieu of a simpler distributed mode of cluster command and collection state processing. In Solr 9 this was possible with undocumented / unsupported configuration in solr.xml (`distributedClusterStateUpdates` & `distributedCollectionConfigSetExecution`). Those are now removed. In their place is a new overseerEnabled cluster property and an env var SOLR_CLOUD_OVERSEER_ENABLED.
1 parent 8d6b6d6 commit d609706

18 files changed

Lines changed: 96 additions & 161 deletions

File tree

solr/CHANGES.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,13 @@ Other Changes
221221

222222
* SOLR-17874: Switch remaining usages of Apache HttpClient to use the internally managed Jetty HttpClient instance. (David Smiley)
223223

224+
* SOLR-17877: The SolrCloud Overseer is disable-able, in lieu of a simpler distributed mode of
225+
cluster command and collection state processing. In Solr 9 this was possible with undocumented
226+
/ unsupported configuration in solr.xml (`distributedClusterStateUpdates` &
227+
`distributedCollectionConfigSetExecution`). Those are now removed. In their place is a new
228+
overseerEnabled cluster property and an env var SOLR_CLOUD_OVERSEER_ENABLED. Read more in the
229+
upgrade guide. (David Smiley)
230+
224231
================== 9.10.0 ==================
225232
New Features
226233
---------------------

solr/core/src/java/org/apache/solr/cloud/DistributedClusterStateUpdater.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,6 @@ public class DistributedClusterStateUpdater {
9292
*/
9393
public DistributedClusterStateUpdater(boolean useDistributedStateUpdate) {
9494
this.useDistributedStateUpdate = useDistributedStateUpdate;
95-
if (log.isInfoEnabled()) {
96-
log.info(
97-
"Creating DistributedClusterStateUpdater with useDistributedStateUpdate="
98-
+ useDistributedStateUpdate
99-
+ ". Solr will be using "
100-
+ (useDistributedStateUpdate ? "distributed" : "Overseer based")
101-
+ " cluster state updates."); // nowarn
102-
}
10395
}
10496

10597
/**

solr/core/src/java/org/apache/solr/cloud/Overseer.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -711,8 +711,7 @@ public Overseer(
711711
this.zkController = zkController;
712712
this.stats = new Stats();
713713
this.config = config;
714-
this.distributedClusterStateUpdater =
715-
new DistributedClusterStateUpdater(config.getDistributedClusterStateUpdates());
714+
this.distributedClusterStateUpdater = zkController.getDistributedClusterStateUpdater();
716715

717716
this.solrMetricsContext =
718717
new SolrMetricsContext(

solr/core/src/java/org/apache/solr/cloud/ZkController.java

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
import org.apache.solr.common.params.CommonParams;
103103
import org.apache.solr.common.params.SolrParams;
104104
import org.apache.solr.common.util.Compressor;
105+
import org.apache.solr.common.util.EnvUtils;
105106
import org.apache.solr.common.util.ExecutorUtil;
106107
import org.apache.solr.common.util.IOUtils;
107108
import org.apache.solr.common.util.ObjectReleaseTracker;
@@ -306,10 +307,6 @@ public ZkController(
306307

307308
this.cloudConfig = cloudConfig;
308309

309-
// Use the configured way to do cluster state update (Overseer queue vs distributed)
310-
distributedClusterStateUpdater =
311-
new DistributedClusterStateUpdater(cloudConfig.getDistributedClusterStateUpdates());
312-
313310
this.genericCoreNodeNames = cloudConfig.getGenericCoreNodeNames();
314311

315312
this.zkServerAddress = zkServerAddress;
@@ -373,11 +370,6 @@ public ZkController(
373370
// Refuse to start if ZK has a non empty /clusterstate.json or a /solr.xml file
374371
checkNoOldClusterstate(zkClient);
375372

376-
this.distributedCommandRunner =
377-
cloudConfig.getDistributedCollectionConfigSetExecution()
378-
? Optional.of(new DistributedCollectionConfigSetCommandRunner(cc, zkClient))
379-
: Optional.empty();
380-
381373
this.overseerRunningMap = Overseer.getRunningMap(zkClient);
382374
this.overseerCompletedMap = Overseer.getCompletedMap(zkClient);
383375
this.overseerFailureMap = Overseer.getFailureMap(zkClient);
@@ -390,6 +382,25 @@ public ZkController(
390382
if (cc != null) cc.securityNodeChanged();
391383
});
392384

385+
// Now that zkStateReader is available, read OVERSEER_ENABLED.
386+
// When overseerEnabled is false, both distributed features should be enabled
387+
Boolean overseerEnabled =
388+
zkStateReader.getClusterProperty(ZkStateReader.OVERSEER_ENABLED, null);
389+
if (overseerEnabled == null) {
390+
overseerEnabled = EnvUtils.getPropertyAsBool("solr.cloud.overseer.enabled", true);
391+
}
392+
if (overseerEnabled) {
393+
log.info("The Overseer is enabled. It will process all cluster commands & state updates.");
394+
} else {
395+
log.info(
396+
"The Overseer is disabled. Cluster commands & state updates will happen on any/all nodes.");
397+
}
398+
this.distributedClusterStateUpdater = new DistributedClusterStateUpdater(!overseerEnabled);
399+
this.distributedCommandRunner =
400+
!overseerEnabled
401+
? Optional.of(new DistributedCollectionConfigSetCommandRunner(cc, zkClient))
402+
: Optional.empty();
403+
393404
init();
394405

395406
if (distributedClusterStateUpdater.isDistributedStateUpdate()) {

solr/core/src/java/org/apache/solr/cloud/api/collections/DistributedCollectionConfigSetCommandRunner.java

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@
6464
* going through Overseer and {@link OverseerCollectionMessageHandler} or {@link
6565
* org.apache.solr.cloud.OverseerConfigSetMessageHandler}.
6666
*
67-
* <p>This class is only called when Collection and Config Set API calls are configured to be
68-
* distributed, which implies cluster state updates are distributed as well.
67+
* <p>This class is only called when the Overseer is disabled, which implies cluster state updates
68+
* are distributed as well.
6969
*/
7070
public class DistributedCollectionConfigSetCommandRunner {
7171
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -112,13 +112,6 @@ public DistributedCollectionConfigSetCommandRunner(
112112
// note: coreContainer.getZkController() is not yet instantiated; don't call it right now
113113
this.coreContainer = coreContainer;
114114

115-
if (log.isInfoEnabled()) {
116-
// Note is it hard to print a log when Collection API is handled by Overseer because Overseer
117-
// is started regardless of how Collection API is handled, so it doesn't really know...
118-
log.info(
119-
"Creating DistributedCollectionConfigSetCommandRunner. Collection and ConfigSet APIs are running distributed (not Overseer based)");
120-
}
121-
122115
// TODO we should look at how everything is getting closed when the node is shutdown. But it
123116
// seems that CollectionsHandler (that creates instances of this class) is not really closed, so
124117
// maybe it doesn't matter?

solr/core/src/java/org/apache/solr/core/CloudConfig.java

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,6 @@ public class CloudConfig {
4949

5050
private final String pkiHandlerPublicKeyPath;
5151

52-
private final boolean useDistributedClusterStateUpdates;
53-
54-
private final boolean useDistributedCollectionConfigSetExecution;
55-
5652
private final int minStateByteLenForCompression;
5753

5854
private final String stateCompressorClass;
@@ -72,8 +68,6 @@ public class CloudConfig {
7268
boolean createCollectionCheckLeaderActive,
7369
String pkiHandlerPrivateKeyPath,
7470
String pkiHandlerPublicKeyPath,
75-
boolean useDistributedClusterStateUpdates,
76-
boolean useDistributedCollectionConfigSetExecution,
7771
int minStateByteLenForCompression,
7872
String stateCompressorClass) {
7973
this.zkHost = zkHost;
@@ -90,17 +84,9 @@ public class CloudConfig {
9084
this.createCollectionCheckLeaderActive = createCollectionCheckLeaderActive;
9185
this.pkiHandlerPrivateKeyPath = pkiHandlerPrivateKeyPath;
9286
this.pkiHandlerPublicKeyPath = pkiHandlerPublicKeyPath;
93-
this.useDistributedClusterStateUpdates = useDistributedClusterStateUpdates;
94-
this.useDistributedCollectionConfigSetExecution = useDistributedCollectionConfigSetExecution;
9587
this.minStateByteLenForCompression = minStateByteLenForCompression;
9688
this.stateCompressorClass = stateCompressorClass;
9789

98-
if (useDistributedCollectionConfigSetExecution && !useDistributedClusterStateUpdates) {
99-
throw new SolrException(
100-
SolrException.ErrorCode.SERVER_ERROR,
101-
"'useDistributedCollectionConfigSetExecution' can't be true if useDistributedClusterStateUpdates is false");
102-
}
103-
10490
if (this.hostPort == -1)
10591
throw new SolrException(
10692
SolrException.ErrorCode.SERVER_ERROR, "'hostPort' must be configured to run SolrCloud");
@@ -162,14 +148,6 @@ public String getPkiHandlerPublicKeyPath() {
162148
return pkiHandlerPublicKeyPath;
163149
}
164150

165-
public boolean getDistributedClusterStateUpdates() {
166-
return useDistributedClusterStateUpdates;
167-
}
168-
169-
public boolean getDistributedCollectionConfigSetExecution() {
170-
return useDistributedCollectionConfigSetExecution;
171-
}
172-
173151
public int getMinStateByteLenForCompression() {
174152
return minStateByteLenForCompression;
175153
}
@@ -202,8 +180,6 @@ public static class CloudConfigBuilder {
202180
DEFAULT_CREATE_COLLECTION_CHECK_LEADER_ACTIVE;
203181
private String pkiHandlerPrivateKeyPath;
204182
private String pkiHandlerPublicKeyPath;
205-
private boolean useDistributedClusterStateUpdates = false;
206-
private boolean useDistributedCollectionConfigSetExecution = false;
207183
private int minStateByteLenForCompression = DEFAULT_MINIMUM_STATE_SIZE_FOR_COMPRESSION;
208184

209185
private String stateCompressorClass;
@@ -277,18 +253,6 @@ public CloudConfigBuilder setPkiHandlerPublicKeyPath(String pkiHandlerPublicKeyP
277253
return this;
278254
}
279255

280-
public CloudConfigBuilder setUseDistributedClusterStateUpdates(
281-
boolean useDistributedClusterStateUpdates) {
282-
this.useDistributedClusterStateUpdates = useDistributedClusterStateUpdates;
283-
return this;
284-
}
285-
286-
public CloudConfigBuilder setUseDistributedCollectionConfigSetExecution(
287-
boolean useDistributedCollectionConfigSetExecution) {
288-
this.useDistributedCollectionConfigSetExecution = useDistributedCollectionConfigSetExecution;
289-
return this;
290-
}
291-
292256
public CloudConfigBuilder setMinStateByteLenForCompression(int minStateByteLenForCompression) {
293257
this.minStateByteLenForCompression = minStateByteLenForCompression;
294258
return this;
@@ -315,8 +279,6 @@ public CloudConfig build() {
315279
createCollectionCheckLeaderActive,
316280
pkiHandlerPrivateKeyPath,
317281
pkiHandlerPublicKeyPath,
318-
useDistributedClusterStateUpdates,
319-
useDistributedCollectionConfigSetExecution,
320282
minStateByteLenForCompression,
321283
stateCompressorClass);
322284
}

solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -564,18 +564,13 @@ private static CloudConfig fillSolrCloudSection(NamedList<Object> nl, String def
564564
case "pkiHandlerPublicKeyPath":
565565
builder.setPkiHandlerPublicKeyPath(value);
566566
break;
567-
case "distributedClusterStateUpdates":
568-
builder.setUseDistributedClusterStateUpdates(Boolean.parseBoolean(value));
569-
break;
570-
case "distributedCollectionConfigSetExecution":
571-
builder.setUseDistributedCollectionConfigSetExecution(Boolean.parseBoolean(value));
572-
break;
573567
case "minStateByteLenForCompression":
574568
builder.setMinStateByteLenForCompression(parseInt(name, value));
575569
break;
576570
case "stateCompressor":
577571
builder.setStateCompressorClass(value);
578572
break;
573+
579574
default:
580575
throw new SolrException(
581576
SolrException.ErrorCode.SERVER_ERROR,

solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ public class CreateCollectionCleanupTest extends SolrCloudTestCase {
5656
+ " <int name=\"distribUpdateConnTimeout\">${distribUpdateConnTimeout:45000}</int>\n"
5757
+ " <int name=\"distribUpdateSoTimeout\">${distribUpdateSoTimeout:340000}</int>\n"
5858
+ " <int name=\"createCollectionWaitTimeTillActive\">${createCollectionWaitTimeTillActive:10}</int>\n"
59-
+ " <str name=\"distributedClusterStateUpdates\">${solr.distributedClusterStateUpdates:false}</str> \n"
6059
+ " </solrcloud>\n"
6160
+ " \n"
6261
+ "</solr>\n";
@@ -67,7 +66,7 @@ public static void createCluster() throws Exception {
6766
.addConfig(
6867
"conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
6968
.withSolrXml(CLOUD_SOLR_XML_WITH_10S_CREATE_COLL_WAIT)
70-
.useOtherCollectionConfigSetExecution()
69+
.flipOverseerEnablement()
7170
.configure();
7271
}
7372

solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public void setUp() throws Exception {
6464
// these tests need to be isolated, so we don't share the minicluster
6565
configureCluster(4)
6666
.addConfig("conf", configset("cloud-minimal"))
67-
.useOtherCollectionConfigSetExecution()
67+
.flipOverseerEnablement()
6868
// Some tests (this one) use "the other" cluster Collection API execution strategy to
6969
// increase coverage
7070
.configure();

solr/core/src/test/org/apache/solr/cloud/OverseerTest.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@ public void createCollection(String collection, int numShards) throws Exception
208208
"");
209209
final Overseer overseer = MiniSolrCloudCluster.getOpenOverseer(overseers);
210210
// This being an Overseer test, we force it to use the Overseer based cluster state update.
211-
// Look for "new Overseer" calls in this class.
212211
assertFalse(overseer.getDistributedClusterStateUpdater().isDistributedStateUpdate());
213212
ZkDistributedQueue q = overseer.getStateUpdateQueue();
214213
q.offer(m);
@@ -998,6 +997,9 @@ public void testOverseerStatsReset() throws Exception {
998997
reader = new ZkStateReader(zkClient);
999998
reader.createClusterStateWatchersAndUpdate();
1000999

1000+
// Set system property to ensure tests use Overseer mode
1001+
System.setProperty("solr.cloud.overseer.enabled", "true");
1002+
10011003
mockController =
10021004
new MockZKController(server.getZkAddress(), "127.0.0.1:8983_solr", overseers);
10031005

@@ -1021,10 +1023,7 @@ public void testOverseerStatsReset() throws Exception {
10211023
"/admin/cores",
10221024
reader,
10231025
zkController,
1024-
new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983)
1025-
.setUseDistributedClusterStateUpdates(false)
1026-
.setUseDistributedCollectionConfigSetExecution(false)
1027-
.build());
1026+
new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build());
10281027
overseers.add(overseer);
10291028
ElectionContext ec =
10301029
new OverseerElectionContext(zkClient, overseer, server.getZkAddress().replace("/", "_"));
@@ -1837,8 +1836,12 @@ private SolrZkClient electNewOverseer(String address)
18371836
httpShardHandlerFactory.init(new PluginInfo("shardHandlerFactory", Collections.emptyMap()));
18381837
httpShardHandlerFactorys.add(httpShardHandlerFactory);
18391838

1839+
// Set system property to ensure tests use Overseer mode
1840+
System.setProperty("solr.cloud.overseer.enabled", "true");
1841+
18401842
ZkController zkController = createMockZkController(address, null, reader);
18411843
zkControllers.add(zkController);
1844+
18421845
// Create an Overseer with associated configuration to NOT USE distributed state update. Tests
18431846
// in this class really test the Overseer.
18441847
Overseer overseer =
@@ -1848,9 +1851,7 @@ private SolrZkClient electNewOverseer(String address)
18481851
"/admin/cores",
18491852
reader,
18501853
zkController,
1851-
new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983)
1852-
.setUseDistributedClusterStateUpdates(false)
1853-
.build());
1854+
new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build());
18541855
overseers.add(overseer);
18551856
ElectionContext ec = new OverseerElectionContext(zkClient, overseer, address.replace("/", "_"));
18561857
overseerElector.setup(ec);
@@ -1910,6 +1911,8 @@ public Void answer(InvocationOnMock invocation) {
19101911
when(zkController.getCoreContainer()).thenReturn(mockAlwaysUpCoreContainer);
19111912
when(zkController.getZkClient()).thenReturn(zkClient);
19121913
when(zkController.getZkStateReader()).thenReturn(reader);
1914+
when(zkController.getDistributedClusterStateUpdater())
1915+
.thenReturn(new DistributedClusterStateUpdater(false));
19131916
// primitive support for CC.runAsync
19141917
doAnswer(
19151918
invocable -> {

0 commit comments

Comments
 (0)