Skip to content

Commit d1b373d

Browse files
authored
SOLR-17877: fix overseerEnabled cluster prop (#3627)
ZkController: * fix parsing of overseerEnabled cluster property * fix sequence of reading cluster property * reorder some initialization for clarity MiniSolrCloudCluster: * Remove flipOverseerEnablement * Fix MiniSolrCloudCluster application of overseer enablement TestDistributedTracing requires Overseer MockSimpleZkController: unused * Test overseerEnabled property (unit test)
1 parent 3e0f291 commit d1b373d

7 files changed

Lines changed: 113 additions & 79 deletions

File tree

solr/core/src/java/org/apache/solr/cloud/ZkController.java

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ public ZkController(
300300
String zkServerAddress,
301301
int zkClientConnectTimeout,
302302
CloudConfig cloudConfig)
303-
throws InterruptedException, TimeoutException, IOException {
303+
throws InterruptedException, TimeoutException, IOException, KeeperException {
304304

305305
if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null.");
306306
this.cc = cc;
@@ -375,26 +375,38 @@ public ZkController(
375375
this.overseerFailureMap = Overseer.getFailureMap(zkClient);
376376
this.asyncIdsMap = Overseer.getAsyncIdsMap(zkClient);
377377

378+
createClusterZkNodes(zkClient);
379+
378380
zkStateReader =
379381
new ZkStateReader(
380382
zkClient,
381383
() -> {
382384
if (cc != null) cc.securityNodeChanged();
383385
});
384386

387+
zkStateReader.createClusterStateWatchersAndUpdate(); // and reads cluster properties
388+
389+
// note: Can't read cluster properties until createClusterState ^ is called
390+
final String urlSchemeFromClusterProp =
391+
zkStateReader.getClusterProperty(ZkStateReader.URL_SCHEME, ZkStateReader.HTTP);
392+
// this must happen after zkStateReader has initialized the cluster props
393+
this.baseURL = URLUtil.getBaseUrlForNodeName(this.nodeName, urlSchemeFromClusterProp);
394+
385395
// Now that zkStateReader is available, read OVERSEER_ENABLED.
386-
// When overseerEnabled is false, both distributed features should be enabled
387-
Boolean overseerEnabled =
388-
zkStateReader.getClusterProperty(ZkStateReader.OVERSEER_ENABLED, null);
389-
if (overseerEnabled == null) {
390-
overseerEnabled = EnvUtils.getPropertyAsBool("solr.cloud.overseer.enabled", true);
391-
}
396+
final boolean overseerEnabled =
397+
Boolean.parseBoolean(
398+
String.valueOf(
399+
zkStateReader.getClusterProperty(
400+
ZkStateReader.OVERSEER_ENABLED,
401+
EnvUtils.getPropertyAsBool("solr.cloud.overseer.enabled", true))));
402+
392403
if (overseerEnabled) {
393404
log.info("The Overseer is enabled. It will process all cluster commands & state updates.");
394405
} else {
395406
log.info(
396407
"The Overseer is disabled. Cluster commands & state updates will happen on any/all nodes.");
397408
}
409+
// These "distributed" things replace the Overseer when that's disabled
398410
this.distributedClusterStateUpdater = new DistributedClusterStateUpdater(!overseerEnabled);
399411
this.distributedCommandRunner =
400412
!overseerEnabled
@@ -1068,16 +1080,6 @@ private static void repairSecurityJson(SolrZkClient zkClient)
10681080

10691081
private void init() {
10701082
try {
1071-
createClusterZkNodes(zkClient);
1072-
zkStateReader.createClusterStateWatchersAndUpdate();
1073-
1074-
// note: Can't read cluster properties until createClusterState ^ is called
1075-
final String urlSchemeFromClusterProp =
1076-
zkStateReader.getClusterProperty(ZkStateReader.URL_SCHEME, ZkStateReader.HTTP);
1077-
1078-
// this must happen after zkStateReader has initialized the cluster props
1079-
this.baseURL = Utils.getBaseUrlForNodeName(this.nodeName, urlSchemeFromClusterProp);
1080-
10811083
checkForExistingEphemeralNode();
10821084
registerLiveNodesListener();
10831085
checkClusterVersionCompatibility();
@@ -1109,10 +1111,6 @@ private void init() {
11091111

11101112
// Do this last to signal we're up.
11111113
createEphemeralLiveNode();
1112-
} catch (IOException e) {
1113-
log.error("", e);
1114-
throw new SolrException(
1115-
SolrException.ErrorCode.SERVER_ERROR, "Can't create ZooKeeperController", e);
11161114
} catch (InterruptedException e) {
11171115
// Restore the interrupted status
11181116
Thread.currentThread().interrupt();

solr/core/src/test/org/apache/solr/cloud/CreateCollectionCleanupTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ public static void createCluster() throws Exception {
6666
.addConfig(
6767
"conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
6868
.withSolrXml(CLOUD_SOLR_XML_WITH_10S_CREATE_COLL_WAIT)
69-
.flipOverseerEnablement()
7069
.configure();
7170
}
7271

solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,7 @@ public void setUp() throws Exception {
6262
System.setProperty("distribUpdateSoTimeout", "15000");
6363

6464
// these tests need to be isolated, so we don't share the minicluster
65-
configureCluster(4)
66-
.addConfig("conf", configset("cloud-minimal"))
67-
.flipOverseerEnablement()
68-
// Some tests (this one) use "the other" cluster Collection API execution strategy to
69-
// increase coverage
70-
.configure();
65+
configureCluster(4).addConfig("conf", configset("cloud-minimal")).configure();
7166
}
7267

7368
@After

solr/core/src/test/org/apache/solr/cloud/MockSimpleZkController.java

Lines changed: 0 additions & 36 deletions
This file was deleted.

solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,96 @@ public void testCheckNoOldClusterstate() throws Exception {
682682
}
683683
}
684684

685+
@Test
686+
public void testOverseerEnabledClusterPropertyRespected() throws Exception {
687+
// Do not use MiniSolrCloudCluster
688+
Path zkDir = createTempDir("testOverseerEnabledClusterPropertyRespected");
689+
ZkTestServer server = new ZkTestServer(zkDir);
690+
try {
691+
server.run();
692+
693+
// Create cluster ZK nodes and set the overseerEnabled cluster property to false BEFORE
694+
// ZkController starts
695+
try (SolrZkClient client =
696+
new SolrZkClient.Builder()
697+
.withUrl(server.getZkAddress())
698+
.withTimeout(TIMEOUT, TimeUnit.MILLISECONDS)
699+
.build()) {
700+
701+
ZkController.createClusterZkNodes(client);
702+
ClusterProperties cp = new ClusterProperties(client);
703+
cp.setClusterProperty(ZkStateReader.OVERSEER_ENABLED, "false");
704+
}
705+
706+
// Now create a ZkController - it should respect the cluster property and have overseer
707+
// disabled
708+
CoreContainer cc = getCoreContainer();
709+
try {
710+
CloudConfig cloudConfig = new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build();
711+
try (ZkController zkController =
712+
new ZkController(cc, server.getZkAddress(), TIMEOUT, cloudConfig)) {
713+
714+
// The overseer should be disabled due to the cluster property, meaning distributed state
715+
// updates should be enabled
716+
// Since overseerEnabled=false in cluster properties, !overseerEnabled should be true, so
717+
// isDistributedStateUpdate() should return true
718+
assertTrue(
719+
"Overseer should be disabled when overseerEnabled cluster property is false, meaning distributed state updates should be enabled",
720+
zkController.getDistributedClusterStateUpdater().isDistributedStateUpdate());
721+
}
722+
} finally {
723+
cc.shutdown();
724+
}
725+
} finally {
726+
server.shutdown();
727+
}
728+
}
729+
730+
@Test
731+
public void testOverseerEnabledClusterPropertyTrue() throws Exception {
732+
// Do not use MiniSolrCloudCluster
733+
Path zkDir = createTempDir("testOverseerEnabledClusterPropertyTrue");
734+
ZkTestServer server = new ZkTestServer(zkDir);
735+
try {
736+
server.run();
737+
738+
// Create cluster ZK nodes and set the overseerEnabled cluster property to true BEFORE
739+
// ZkController starts
740+
try (SolrZkClient client =
741+
new SolrZkClient.Builder()
742+
.withUrl(server.getZkAddress())
743+
.withTimeout(TIMEOUT, TimeUnit.MILLISECONDS)
744+
.build()) {
745+
746+
ZkController.createClusterZkNodes(client);
747+
ClusterProperties cp = new ClusterProperties(client);
748+
cp.setClusterProperty(ZkStateReader.OVERSEER_ENABLED, "true");
749+
}
750+
751+
// Now create a ZkController - it should respect the cluster property and have overseer
752+
// enabled
753+
CoreContainer cc = getCoreContainer();
754+
try {
755+
CloudConfig cloudConfig = new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983).build();
756+
try (ZkController zkController =
757+
new ZkController(cc, server.getZkAddress(), TIMEOUT, cloudConfig)) {
758+
759+
// The overseer should be enabled due to the cluster property, meaning distributed state
760+
// updates should be disabled
761+
// Since overseerEnabled=true in cluster properties, !overseerEnabled should be false, so
762+
// isDistributedStateUpdate() should return false
763+
assertFalse(
764+
"Overseer should be enabled when overseerEnabled cluster property is true, meaning distributed state updates should be disabled",
765+
zkController.getDistributedClusterStateUpdater().isDistributedStateUpdate());
766+
}
767+
} finally {
768+
cc.shutdown();
769+
}
770+
} finally {
771+
server.shutdown();
772+
}
773+
}
774+
685775
private CoreContainer getCoreContainer() {
686776
return new MockCoreContainer();
687777
}

solr/modules/opentelemetry/src/test/org/apache/solr/opentelemetry/TestDistributedTracing.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public static void setupCluster() throws Exception {
5858
.addConfig("config", TEST_PATH().resolve("collection1").resolve("conf"))
5959
.withSolrXml(TEST_PATH().resolve("solr.xml"))
6060
.withTraceIdGenerationDisabled()
61+
.withOverseer(true) // some assertions assume overseer
6162
.configure();
6263

6364
assertNotEquals(

solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,20 +1108,6 @@ public Builder addConfig(String configName, Path configPath) {
11081108
return this;
11091109
}
11101110

1111-
/**
1112-
* This method makes the MiniSolrCloudCluster use the "other" Collection API execution strategy
1113-
* than it normally would. When some test classes call this method (and some don't) we make sure
1114-
* that a run of multiple tests with a single seed will exercise both code lines (distributed
1115-
* and Overseer based Collection API) so regressions can be spotted faster.
1116-
*
1117-
* <p>The real need is for a few tests covering reasonable use cases to call this method. If
1118-
* you're adding a new test, you don't have to call it (but it's ok if you do).
1119-
*/
1120-
public Builder flipOverseerEnablement() {
1121-
overseerEnabled = !overseerEnabled;
1122-
return this;
1123-
}
1124-
11251111
/**
11261112
* Force the cluster Collection and config state API execution as well as the cluster state
11271113
* update strategy to be either Overseer based or distributed. <b>This method can be useful when
@@ -1181,7 +1167,7 @@ public MiniSolrCloudCluster configure() throws Exception {
11811167
* @throws Exception if an error occurs on startup
11821168
*/
11831169
public MiniSolrCloudCluster build() throws Exception {
1184-
this.clusterProperties.put(ZkStateReader.OVERSEER_ENABLED, Boolean.toString(overseerEnabled));
1170+
System.setProperty("solr.cloud.overseer.enabled", Boolean.toString(overseerEnabled));
11851171

11861172
// eager init to prevent OTEL init races caused by test setup
11871173
if (!disableTraceIdGeneration && TracerConfigurator.TRACE_ID_GEN_ENABLED) {
@@ -1204,6 +1190,7 @@ public MiniSolrCloudCluster build() throws Exception {
12041190
cluster.uploadConfigSet(config.path, config.name);
12051191
}
12061192

1193+
// TODO process BEFORE nodes start up! Some props are only read on node start.
12071194
if (clusterProperties.size() > 0) {
12081195
ClusterProperties props = new ClusterProperties(cluster.getZkClient());
12091196
for (Map.Entry<String, Object> entry : clusterProperties.entrySet()) {

0 commit comments

Comments
 (0)