diff --git a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/utils/CassandraConfigFileReader.java b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/utils/CassandraConfigFileReader.java index c7e291f5f1..a207219d24 100644 --- a/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/utils/CassandraConfigFileReader.java +++ b/v2/spanner-common/src/main/java/com/google/cloud/teleport/v2/spanner/migrations/utils/CassandraConfigFileReader.java @@ -44,12 +44,23 @@ public List getCassandraShard(String cassandraConfigFilePath) { LOG.info("Reading Cassandra configuration from: {}", cassandraConfigFilePath); OptionsMap optionsMap = CassandraDriverConfigLoader.getOptionsMapFromFile(cassandraConfigFilePath); - CassandraShard shard = new CassandraShard(optionsMap); - LOG.info("Successfully created CassandraShard: {}", shard); - return Collections.singletonList(shard); + return getCassandraShard(optionsMap); } catch (FileNotFoundException e) { throw new IllegalArgumentException( "Configuration file not found: " + cassandraConfigFilePath, e); } } + + /** + * Reads the Cassandra configuration file from the specified GCS path and converts it into a list + * of CassandraShard objects. + * + * @param cassandraConfigFilePath the GCS path of the Cassandra configuration file. + * @return a list containing the parsed CassandraShard. + */ + public List getCassandraShard(OptionsMap optionsMap) { + CassandraShard shard = new CassandraShard(optionsMap); + LOG.info("Successfully created CassandraShard: {}", shard); + return Collections.singletonList(shard); + } } diff --git a/v2/spanner-to-sourcedb/README.md b/v2/spanner-to-sourcedb/README.md index 16e5b1a88f..da3f199c5a 100644 --- a/v2/spanner-to-sourcedb/README.md +++ b/v2/spanner-to-sourcedb/README.md @@ -86,9 +86,9 @@ A few prerequisites must be considered before starting with reverse replication. 8. Ensure that that [session file](https://googlecloudplatform.github.io/spanner-migration-tool/reports.html#session-file-ending-in-sessionjson) is uploaded to GCS (this requires a schema conversion to be done). 9. Configuration Files Upload - **For MySQL:** - [Source shards file](./RunnigReverseReplication.md#sample-source-shards-file-for-MySQL) already uploaded to GCS. + [Source shards file](#sample-source-shards-file-for-MySQL) already uploaded to GCS. - **For Cassandra:** - [Source file](./RunnigReverseReplication.md#Sample-source-File-for-Cassandra) already uploaded to GCS. + [Source file](#sample-source-file-for-Cassandra) already uploaded to GCS. 10. Resources needed for reverse replication incur cost. Make sure to read [cost](#cost). 11. Reverse replication uses shard identifier column per table to route the Spanner records to a given source shard.The column identified as the sharding column needs to be selected via Spanner Migration Tool when performing migration.The value of this column should be the logicalShardId value specified in the [source shard file](#sample-source-shards-file-for-MySQL).In the event that the shard identifier column is not an existing column,the application code needs to be changed to populate this shard identifier column when writing to Spanner. Or use a custom shard identifier plugin to supply the shard identifier. In case of single shard migrations, this step is skipped. 12. The reverse replication pipeline uses GCS for dead letter queue handling. Ensure that the DLQ directory exists in GCS. @@ -141,24 +141,26 @@ The database user password should be kept in [Secret Manager](#https://cloud.goo The file should be a list of JSONs as: ```json -[ - { - "logicalShardId": "shard1", - "host": "10.11.12.13", - "user": "root", - "secretManagerUri":"projects/123/secrets/rev-cmek-cred-shard1/versions/latest", - "port": "3306", - "dbName": "db1" - }, - { - "logicalShardId": "shard2", - "host": "10.11.12.14", - "user": "root", - "secretManagerUri":"projects/123/secrets/rev-cmek-cred-shard2/versions/latest", - "port": "3306", - "dbName": "db2" - } -] +{ + "shardConfigs": [ + { + "logicalShardId": "shard1", + "host": "10.11.12.13", + "user": "root", + "secretManagerUri": "projects/123/secrets/rev-cmek-cred-shard1/versions/latest", + "port": "3306", + "dbName": "db1" + }, + { + "logicalShardId": "shard2", + "host": "10.11.12.14", + "user": "root", + "secretManagerUri": "projects/123/secrets/rev-cmek-cred-shard2/versions/latest", + "port": "3306", + "dbName": "db2" + } + ] +} ``` diff --git a/v2/spanner-to-sourcedb/pom.xml b/v2/spanner-to-sourcedb/pom.xml index e9f5d7c8f4..8ba1a015e9 100644 --- a/v2/spanner-to-sourcedb/pom.xml +++ b/v2/spanner-to-sourcedb/pom.xml @@ -137,6 +137,10 @@ com.datastax.cassandra cassandra-driver-core + + com.datastax.oss + java-driver-core + diff --git a/v2/spanner-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDb.java b/v2/spanner-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDb.java index 2e9e9286cd..e44992fba5 100644 --- a/v2/spanner-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDb.java +++ b/v2/spanner-to-sourcedb/src/main/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDb.java @@ -15,6 +15,7 @@ */ package com.google.cloud.teleport.v2.templates; +import static com.google.cloud.teleport.v2.spanner.migrations.constants.Constants.CASSANDRA_SOURCE_TYPE; import static com.google.cloud.teleport.v2.spanner.migrations.constants.Constants.MYSQL_SOURCE_TYPE; import static com.google.cloud.teleport.v2.spanner.migrations.constants.Constants.POSTGRES_SOURCE_TYPE; import static com.google.cloud.teleport.v2.spanner.migrations.constants.Constants.RUN_MODE_REGULAR; @@ -38,12 +39,16 @@ import com.google.cloud.teleport.v2.spanner.ddl.Ddl; import com.google.cloud.teleport.v2.spanner.migrations.shard.CassandraShard; import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; +import com.google.cloud.teleport.v2.spanner.migrations.source.config.CassandraConnectionConfig; +import com.google.cloud.teleport.v2.spanner.migrations.source.config.JdbcShardConfig; +import com.google.cloud.teleport.v2.spanner.migrations.source.config.SourceConfigParser; +import com.google.cloud.teleport.v2.spanner.migrations.source.config.SourceConnectionConfig; import com.google.cloud.teleport.v2.spanner.migrations.transformation.CustomTransformation; import com.google.cloud.teleport.v2.spanner.migrations.utils.CassandraConfigFileReader; import com.google.cloud.teleport.v2.spanner.migrations.utils.CassandraDriverConfigLoader; import com.google.cloud.teleport.v2.spanner.migrations.utils.DataflowWorkerMachineTypeUtils; +import com.google.cloud.teleport.v2.spanner.migrations.utils.ISecretManagerAccessor; import com.google.cloud.teleport.v2.spanner.migrations.utils.SecretManagerAccessorImpl; -import com.google.cloud.teleport.v2.spanner.migrations.utils.ShardFileReader; import com.google.cloud.teleport.v2.spanner.sourceddl.CassandraInformationSchemaScanner; import com.google.cloud.teleport.v2.spanner.sourceddl.MySqlInformationSchemaScanner; import com.google.cloud.teleport.v2.spanner.sourceddl.PostgreSQLInformationSchemaScanner; @@ -61,6 +66,7 @@ import com.google.cloud.teleport.v2.templates.transforms.UpdateDlqMetricsFn; import com.google.cloud.teleport.v2.transforms.DLQWriteTransform; import com.google.cloud.teleport.v2.values.FailsafeElement; +import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; @@ -647,20 +653,14 @@ public static PipelineResult run(Options options) { .get(SpannerInformationSchemaProcessorTransform.SHADOW_TABLE_DDL_TAG) .apply("View Shadow DDL", View.asSingleton()); - List shards; - String shardingMode; - if (MYSQL_SOURCE_TYPE.equals(options.getSourceType()) - || POSTGRES_SOURCE_TYPE.equals(options.getSourceType())) { - ShardFileReader shardFileReader = new ShardFileReader(new SecretManagerAccessorImpl()); - shards = shardFileReader.getOrderedShardDetails(options.getSourceShardsFilePath()); - shardingMode = Constants.SHARDING_MODE_MULTI_SHARD; + List shards = getShardList(options.getSourceType(), options.getSourceShardsFilePath()); - } else { - CassandraConfigFileReader cassandraConfigFileReader = new CassandraConfigFileReader(); - shards = cassandraConfigFileReader.getCassandraShard(options.getSourceShardsFilePath()); - LOG.info("Cassandra config is: {}", shards.get(0)); - shardingMode = Constants.SHARDING_MODE_SINGLE_SHARD; - } + // cassandra is always a single sharded migration. + // for JDBC, shards size and IsShardedMigration option is used below. + String shardingMode = + options.getSourceType().equals(CASSANDRA_SOURCE_TYPE) + ? Constants.SHARDING_MODE_SINGLE_SHARD + : Constants.SHARDING_MODE_MULTI_SHARD; if (MYSQL_SOURCE_TYPE.equals(options.getSourceType())) { validateMySQLNotReadOnly(shards); @@ -951,6 +951,53 @@ static void buildPipeline( .build()); } + /** + * Returns a list of shards based on the source type and source shards file path. This should be + * removed in Phase 2 of Standardizing config. + * + * @param sourceType The type of the source database. + * @param sourceShardsFilePath The GCS path to the source shards configuration file. + * @return A list of shards. + */ + public static List getShardList(String sourceType, String sourceShardsFilePath) { + ISecretManagerAccessor secretManagerAccessor = new SecretManagerAccessorImpl(); + SourceConfigParser sourceConfigParser = new SourceConfigParser(secretManagerAccessor); + SourceConnectionConfig sourceConnectionConfig; + try { + // Parse the source shards configuration file to respective + // SourceConnectionConfig. + sourceConnectionConfig = + sourceConfigParser.parseConfiguration(sourceType, sourceShardsFilePath); + } catch (Exception e) { + LOG.error("Error parsing source config", e); + throw new RuntimeException("Error parsing source config", e); + } + List shards; + if (sourceConnectionConfig instanceof JdbcShardConfig) { + shards = ((JdbcShardConfig) sourceConnectionConfig).getShardConfigs(); + LOG.info("JDBC shard config is parsed."); + } else if (sourceConnectionConfig instanceof CassandraConnectionConfig) { + CassandraConfigFileReader cassandraConfigFileReader = new CassandraConfigFileReader(); + shards = + cassandraConfigFileReader.getCassandraShard( + ((CassandraConnectionConfig) sourceConnectionConfig).getOptionsMap()); + LOG.info("Cassandra shard config is parsed."); + } else { + String errorMessage = + "Invalid source config for source type: " + + sourceType + + ". Source config parsed to: " + + sourceConnectionConfig.getClass() + + ". Source config file path: " + + sourceShardsFilePath; + LOG.error(errorMessage); + throw new RuntimeException(errorMessage); + } + Preconditions.checkArgument( + shards != null && !shards.isEmpty(), "Shard list should have at least 1 element."); + return shards; + } + public static SpannerIO.ReadChangeStream getReadChangeStreamDoFn( Options options, SpannerConfig spannerConfig) { diff --git a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDBShardedMySQLRetryAllDLQIT.java b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDBShardedMySQLRetryAllDLQIT.java index 92439faec0..01c5d31550 100644 --- a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDBShardedMySQLRetryAllDLQIT.java +++ b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDBShardedMySQLRetryAllDLQIT.java @@ -22,12 +22,8 @@ import com.google.cloud.teleport.metadata.SkipDirectRunnerTest; import com.google.cloud.teleport.metadata.TemplateIntegrationTest; -import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; import com.google.cloud.teleport.v2.spanner.migrations.transformation.CustomTransformation; import com.google.common.io.Resources; -import com.google.gson.Gson; -import com.google.gson.JsonArray; -import com.google.gson.JsonObject; import java.io.IOException; import java.time.Duration; import java.util.HashMap; @@ -105,7 +101,11 @@ public void setUp() throws IOException, InterruptedException { gcsResourceManager = setUpSpannerITGcsResourceManager(); // Use generic multi-shard logic instead of base IT helper - createAndUploadShardConfigToGcsMulti(); + createAndUploadShardConfigToGcs( + gcsResourceManager, + Map.of( + "testShardA", jdbcResourceManagerShardA, "testShardB", jdbcResourceManagerShardB)); + ; // Upload overrides file gcsResourceManager.uploadArtifact( @@ -394,35 +394,6 @@ private Integer getIntValueCaseInsensitive(Map map, String key) return null; } - private void createAndUploadShardConfigToGcsMulti() throws IOException { - Shard shardA = new Shard(); - shardA.setLogicalShardId("testShardA"); - shardA.setUser(jdbcResourceManagerShardA.getUsername()); - shardA.setHost(jdbcResourceManagerShardA.getHost()); - shardA.setPassword(jdbcResourceManagerShardA.getPassword()); - shardA.setPort(String.valueOf(jdbcResourceManagerShardA.getPort())); - shardA.setDbName(jdbcResourceManagerShardA.getDatabaseName()); - JsonObject jsObjA = (JsonObject) new Gson().toJsonTree(shardA).getAsJsonObject(); - jsObjA.remove("secretManagerUri"); // remove field secretManagerUri - - Shard shardB = new Shard(); - shardB.setLogicalShardId("testShardB"); - shardB.setUser(jdbcResourceManagerShardB.getUsername()); - shardB.setHost(jdbcResourceManagerShardB.getHost()); - shardB.setPassword(jdbcResourceManagerShardB.getPassword()); - shardB.setPort(String.valueOf(jdbcResourceManagerShardB.getPort())); - shardB.setDbName(jdbcResourceManagerShardB.getDatabaseName()); - JsonObject jsObjB = (JsonObject) new Gson().toJsonTree(shardB).getAsJsonObject(); - jsObjB.remove("secretManagerUri"); // remove field secretManagerUri - - JsonArray ja = new JsonArray(); - ja.add(jsObjA); - ja.add(jsObjB); - String shardFileContents = ja.toString(); - LOG.info("Shard file contents: {}", shardFileContents); - gcsResourceManager.createArtifact("input/shard.json", shardFileContents); - } - private void insertDataInSpanner() { com.google.cloud.spanner.Mutation customer1 = com.google.cloud.spanner.Mutation.newInsertOrUpdateBuilder("Customers") diff --git a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDBShardedMySQLRetryDLQIT.java b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDBShardedMySQLRetryDLQIT.java index 48f761c2cb..1a0213c178 100644 --- a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDBShardedMySQLRetryDLQIT.java +++ b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDBShardedMySQLRetryDLQIT.java @@ -22,12 +22,8 @@ import com.google.cloud.teleport.metadata.SkipDirectRunnerTest; import com.google.cloud.teleport.metadata.TemplateIntegrationTest; -import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; import com.google.cloud.teleport.v2.spanner.migrations.transformation.CustomTransformation; import com.google.common.io.Resources; -import com.google.gson.Gson; -import com.google.gson.JsonArray; -import com.google.gson.JsonObject; import com.google.pubsub.v1.SubscriptionName; import java.io.IOException; import java.time.Duration; @@ -108,7 +104,10 @@ public void setUp() throws IOException, InterruptedException { SpannerToSourceDBShardedMySQLRetryDLQIT.MYSQL_SCHEMA_FILE_RESOURCE); gcsResourceManager = setUpSpannerITGcsResourceManager(); - createAndUploadShardConfigToGcsMulti(); + createAndUploadShardConfigToGcs( + gcsResourceManager, + Map.of( + "testShardA", jdbcResourceManagerShardA, "testShardB", jdbcResourceManagerShardB)); // Upload session file gcsResourceManager.uploadArtifact( @@ -374,35 +373,6 @@ private Integer getIntValueCaseInsensitive(Map map, String key) return null; } - private void createAndUploadShardConfigToGcsMulti() throws IOException { - Shard shardA = new Shard(); - shardA.setLogicalShardId("testShardA"); - shardA.setUser(jdbcResourceManagerShardA.getUsername()); - shardA.setHost(jdbcResourceManagerShardA.getHost()); - shardA.setPassword(jdbcResourceManagerShardA.getPassword()); - shardA.setPort(String.valueOf(jdbcResourceManagerShardA.getPort())); - shardA.setDbName(jdbcResourceManagerShardA.getDatabaseName()); - JsonObject jsObjA = (JsonObject) new Gson().toJsonTree(shardA).getAsJsonObject(); - jsObjA.remove("secretManagerUri"); - - Shard shardB = new Shard(); - shardB.setLogicalShardId("testShardB"); - shardB.setUser(jdbcResourceManagerShardB.getUsername()); - shardB.setHost(jdbcResourceManagerShardB.getHost()); - shardB.setPassword(jdbcResourceManagerShardB.getPassword()); - shardB.setPort(String.valueOf(jdbcResourceManagerShardB.getPort())); - shardB.setDbName(jdbcResourceManagerShardB.getDatabaseName()); - JsonObject jsObjB = (JsonObject) new Gson().toJsonTree(shardB).getAsJsonObject(); - jsObjB.remove("secretManagerUri"); - - JsonArray ja = new JsonArray(); - ja.add(jsObjA); - ja.add(jsObjB); - String shardFileContents = ja.toString(); - LOG.info("Shard file contents: {}", shardFileContents); - gcsResourceManager.createArtifact("input/shard.json", shardFileContents); - } - private void insertDataInSpanner() { com.google.cloud.spanner.Mutation customer1 = com.google.cloud.spanner.Mutation.newInsertOrUpdateBuilder("Customers") diff --git a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbCustomShardIT.java b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbCustomShardIT.java index b958f58283..487c433245 100644 --- a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbCustomShardIT.java +++ b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbCustomShardIT.java @@ -23,11 +23,7 @@ import com.google.cloud.spanner.Mutation; import com.google.cloud.teleport.metadata.SkipDirectRunnerTest; import com.google.cloud.teleport.metadata.TemplateIntegrationTest; -import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; import com.google.common.io.Resources; -import com.google.gson.Gson; -import com.google.gson.JsonArray; -import com.google.gson.JsonObject; import com.google.pubsub.v1.SubscriptionName; import java.io.IOException; import java.time.Duration; @@ -103,7 +99,10 @@ public void setUp() throws IOException, InterruptedException { gcsResourceManager = setUpSpannerITGcsResourceManager(); createAndUploadJarToGcs(gcsResourceManager); - createAndUploadShardConfigToGcs(); + createAndUploadShardConfigToGcs( + gcsResourceManager, + Map.of( + "testShardA", jdbcResourceManagerShardA, "testShardB", jdbcResourceManagerShardB)); gcsResourceManager.uploadArtifact( "input/session.json", Resources.getResource(SpannerToSourceDbCustomShardIT.SESSION_FILE_RESOURCE).getPath()); @@ -211,32 +210,4 @@ private void writeSpannerDataForSingers(int singerId, String firstName, String s .build(); spannerResourceManager.write(m); } - - private void createAndUploadShardConfigToGcs() throws IOException { - Shard shard = new Shard(); - shard.setLogicalShardId("testShardA"); - shard.setUser(jdbcResourceManagerShardA.getUsername()); - shard.setHost(jdbcResourceManagerShardA.getHost()); - shard.setPassword(jdbcResourceManagerShardA.getPassword()); - shard.setPort(String.valueOf(jdbcResourceManagerShardA.getPort())); - shard.setDbName(jdbcResourceManagerShardA.getDatabaseName()); - JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject(); - jsObj.remove("secretManagerUri"); // remove field secretManagerUri - - Shard shardB = new Shard(); - shardB.setLogicalShardId("testShardB"); - shardB.setUser(jdbcResourceManagerShardB.getUsername()); - shardB.setHost(jdbcResourceManagerShardB.getHost()); - shardB.setPassword(jdbcResourceManagerShardB.getPassword()); - shardB.setPort(String.valueOf(jdbcResourceManagerShardB.getPort())); - shardB.setDbName(jdbcResourceManagerShardB.getDatabaseName()); - JsonObject jsObjB = (JsonObject) new Gson().toJsonTree(shardB).getAsJsonObject(); - jsObjB.remove("secretManagerUri"); // remove field secretManagerUri - JsonArray ja = new JsonArray(); - ja.add(jsObj); - ja.add(jsObjB); - String shardFileContents = ja.toString(); - LOG.info("Shard file contents: {}", shardFileContents); - gcsResourceManager.createArtifact("input/shard.json", shardFileContents); - } } diff --git a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbITBase.java b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbITBase.java index b86d5be8ee..33b5dadefb 100644 --- a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbITBase.java +++ b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbITBase.java @@ -20,10 +20,10 @@ import com.google.cloud.spanner.Dialect; import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; +import com.google.cloud.teleport.v2.spanner.migrations.source.config.JdbcShardConfig; import com.google.cloud.teleport.v2.spanner.migrations.transformation.CustomTransformation; import com.google.common.io.Resources; import com.google.gson.Gson; -import com.google.gson.JsonArray; import com.google.gson.JsonObject; import com.google.pubsub.v1.SubscriptionName; import com.google.pubsub.v1.TopicName; @@ -132,34 +132,52 @@ public SubscriptionName createPubsubResources( } protected void createAndUploadShardConfigToGcs( - GcsResourceManager gcsResourceManager, JDBCResourceManager jdbcResourceManager) + GcsResourceManager gcsResourceManager, + Map shardNameToJdbcResourceManagerMaps) + throws IOException { + + List shards = + shardNameToJdbcResourceManagerMaps.entrySet().stream() + .map(e -> createShardConfig(e.getValue(), e.getKey())) + .toList(); + JdbcShardConfig jdbcShardConfig = new JdbcShardConfig(); + jdbcShardConfig.setShardConfigs(shards); + JsonObject jsObj = new Gson().toJsonTree(jdbcShardConfig).getAsJsonObject(); + String shardFileContents = jsObj.toString(); + LOG.info("Shard file contents: {}", shardFileContents); + gcsResourceManager.createArtifact("input/shard.json", shardFileContents); + } + + protected void createAndUploadShardConfigToGcs( + GcsResourceManager gcsResourceManager, JDBCResourceManager jdbcResourceManagers) throws IOException { + List shards = + Collections.singletonList(createShardConfig(jdbcResourceManagers, "Shard1")); + JdbcShardConfig jdbcShardConfig = new JdbcShardConfig(); + jdbcShardConfig.setShardConfigs(shards); + JsonObject jsObj = new Gson().toJsonTree(jdbcShardConfig).getAsJsonObject(); + String shardFileContents = jsObj.toString(); + LOG.info("Shard file contents: {}", shardFileContents); + gcsResourceManager.createArtifact("input/shard.json", shardFileContents); + } + + private Shard createShardConfig(JDBCResourceManager jdbcResourceManager, String shardId) { Shard shard = new Shard(); - shard.setLogicalShardId("Shard1"); + shard.setLogicalShardId(shardId); shard.setUser(jdbcResourceManager.getUsername()); shard.setPassword(jdbcResourceManager.getPassword()); - if (jdbcResourceManager instanceof org.apache.beam.it.jdbc.PostgresResourceManager) { - org.apache.beam.it.jdbc.PostgresResourceManager pgRm = - (org.apache.beam.it.jdbc.PostgresResourceManager) jdbcResourceManager; + if (jdbcResourceManager instanceof org.apache.beam.it.jdbc.PostgresResourceManager pgRm) { shard.setHost(pgRm.getHost()); shard.setPort(String.valueOf(pgRm.getPort())); shard.setDbName(pgRm.getDatabaseName()); - } else if (jdbcResourceManager instanceof org.apache.beam.it.jdbc.MySQLResourceManager) { - org.apache.beam.it.jdbc.MySQLResourceManager mySqlRm = - (org.apache.beam.it.jdbc.MySQLResourceManager) jdbcResourceManager; + } else if (jdbcResourceManager instanceof MySQLResourceManager mySqlRm) { shard.setHost(mySqlRm.getHost()); shard.setPort(String.valueOf(mySqlRm.getPort())); shard.setDbName(mySqlRm.getDatabaseName()); } else { throw new IllegalArgumentException("Unsupported JDBC resource manager type"); } - JsonObject jsObj = new Gson().toJsonTree(shard).getAsJsonObject(); - jsObj.remove("secretManagerUri"); // remove field secretManagerUri - JsonArray ja = new JsonArray(); - ja.add(jsObj); - String shardFileContents = ja.toString(); - LOG.info("Shard file contents: {}", shardFileContents); - gcsResourceManager.createArtifact("input/shard.json", shardFileContents); + return shard; } protected CassandraResourceManager generateKeyspaceAndBuildCassandraResource() { diff --git a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbInterleaveMultiShardIT.java b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbInterleaveMultiShardIT.java index 5b124874c2..2238cd11ad 100644 --- a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbInterleaveMultiShardIT.java +++ b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbInterleaveMultiShardIT.java @@ -25,11 +25,7 @@ import com.google.cloud.spanner.Mutation; import com.google.cloud.teleport.metadata.SkipDirectRunnerTest; import com.google.cloud.teleport.metadata.TemplateIntegrationTest; -import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; import com.google.common.io.Resources; -import com.google.gson.Gson; -import com.google.gson.JsonArray; -import com.google.gson.JsonObject; import com.google.pubsub.v1.SubscriptionName; import java.io.IOException; import java.time.Duration; @@ -103,7 +99,9 @@ public void setUp() throws IOException { jdbcResourceManagerShardB, SpannerToSourceDbInterleaveMultiShardIT.MYSQL_DDL_RESOURCE); gcsResourceManager = setUpSpannerITGcsResourceManager(); - createAndUploadShardConfigToGcs(); + createAndUploadShardConfigToGcs( + gcsResourceManager, + Map.of("shardA", jdbcResourceManagerShardA, "shardB", jdbcResourceManagerShardB)); gcsResourceManager.uploadArtifact( "input/session.json", Resources.getResource(SESSION_FILE_RESOURSE).getPath()); pubsubResourceManager = setUpPubSubResourceManager(); @@ -383,32 +381,4 @@ private void assertDeletedRowsInMySQL() throws InterruptedException { () -> jdbcResourceManagerShardB.getRowCount("child22") == 0); assertThatResult(child2Result).meetsConditions(); } - - private void createAndUploadShardConfigToGcs() throws IOException { - Shard shard = new Shard(); - shard.setLogicalShardId("shardA"); - shard.setUser(jdbcResourceManagerShardA.getUsername()); - shard.setHost(jdbcResourceManagerShardA.getHost()); - shard.setPassword(jdbcResourceManagerShardA.getPassword()); - shard.setPort(String.valueOf(jdbcResourceManagerShardA.getPort())); - shard.setDbName(jdbcResourceManagerShardA.getDatabaseName()); - JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject(); - jsObj.remove("secretManagerUri"); // remove field secretManagerUri - - Shard shardB = new Shard(); - shardB.setLogicalShardId("shardB"); - shardB.setUser(jdbcResourceManagerShardB.getUsername()); - shardB.setHost(jdbcResourceManagerShardB.getHost()); - shardB.setPassword(jdbcResourceManagerShardB.getPassword()); - shardB.setPort(String.valueOf(jdbcResourceManagerShardB.getPort())); - shardB.setDbName(jdbcResourceManagerShardB.getDatabaseName()); - JsonObject jsObjB = (JsonObject) new Gson().toJsonTree(shardB).getAsJsonObject(); - jsObjB.remove("secretManagerUri"); // remove field secretManagerUri - JsonArray ja = new JsonArray(); - ja.add(jsObj); - ja.add(jsObjB); - String shardFileContents = ja.toString(); - LOG.info("Shard file contents: {}", shardFileContents); - gcsResourceManager.createArtifact("input/shard.json", shardFileContents); - } } diff --git a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbLTBase.java b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbLTBase.java index 5a3e561966..b8617a71f4 100644 --- a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbLTBase.java +++ b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbLTBase.java @@ -16,11 +16,11 @@ package com.google.cloud.teleport.v2.templates; import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; +import com.google.cloud.teleport.v2.spanner.migrations.source.config.JdbcShardConfig; import com.google.cloud.teleport.v2.spanner.migrations.transformation.CustomTransformation; import com.google.common.base.MoreObjects; import com.google.common.io.Resources; import com.google.gson.Gson; -import com.google.gson.JsonArray; import com.google.gson.JsonObject; import com.google.pubsub.v1.SubscriptionName; import com.google.pubsub.v1.TopicName; @@ -213,7 +213,7 @@ public SpannerResourceManager createSpannerMetadataDatabase() throws IOException public void createAndUploadShardConfigToGcs( GcsResourceManager gcsResourceManager, List jdbcResourceManagers) throws IOException { - JsonArray ja = new JsonArray(); + List shards = new ArrayList<>(); for (int i = 0; i < 1; ++i) { if (jdbcResourceManagers.get(i) instanceof MySQLResourceManager) { MySQLResourceManager resourceManager = (MySQLResourceManager) jdbcResourceManagers.get(i); @@ -224,15 +224,16 @@ public void createAndUploadShardConfigToGcs( shard.setPassword(jdbcResourceManagers.get(i).getPassword()); shard.setPort(String.valueOf(resourceManager.getPort())); shard.setDbName(jdbcResourceManagers.get(i).getDatabaseName()); - JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject(); - jsObj.remove("secretManagerUri"); // remove field secretManagerUri - ja.add(jsObj); + shards.add(shard); } else { throw new UnsupportedOperationException( jdbcResourceManagers.get(i).getClass().getSimpleName() + " is not supported"); } } - String shardFileContents = ja.toString(); + JdbcShardConfig jdbcShardConfig = new JdbcShardConfig(); + jdbcShardConfig.setShardConfigs(shards); + JsonObject jsObj = (JsonObject) new Gson().toJsonTree(jdbcShardConfig).getAsJsonObject(); + String shardFileContents = jsObj.toString(); LOG.info("Shard file contents: {}", shardFileContents); gcsResourceManager.createArtifact("input/shard.json", shardFileContents); } diff --git a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbLargeBacklogLT.java b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbLargeBacklogLT.java index 163b337629..aa07095c4b 100644 --- a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbLargeBacklogLT.java +++ b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbLargeBacklogLT.java @@ -30,13 +30,14 @@ import com.google.cloud.spanner.SpannerOptions; import com.google.cloud.teleport.metadata.TemplateLoadTest; import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; +import com.google.cloud.teleport.v2.spanner.migrations.source.config.JdbcShardConfig; import com.google.common.base.MoreObjects; import com.google.gson.Gson; -import com.google.gson.JsonArray; import com.google.gson.JsonObject; import java.io.IOException; import java.text.ParseException; import java.time.Duration; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -458,18 +459,21 @@ private void createLogicalTableSchema(CloudSqlResourceManager manager, String db } private void createAndUploadShardConfigToGcs() throws IOException { - JsonArray ja = new JsonArray(); - ja.add(createShardConfig("shard_0", "shard0", manager1)); - ja.add(createShardConfig("shard_1", "shard1", manager1)); - ja.add(createShardConfig("shard_2", "shard2", manager2)); - ja.add(createShardConfig("shard_3", "shard3", manager2)); - - String shardFileContents = ja.toString(); + List shards = new ArrayList<>(); + shards.add(createShardConfig("shard_0", "shard0", manager1)); + shards.add(createShardConfig("shard_1", "shard1", manager1)); + shards.add(createShardConfig("shard_2", "shard2", manager2)); + shards.add(createShardConfig("shard_3", "shard3", manager2)); + + JdbcShardConfig jdbcShardConfig = new JdbcShardConfig(); + jdbcShardConfig.setShardConfigs(shards); + JsonObject jsObj = new Gson().toJsonTree(jdbcShardConfig).getAsJsonObject(); + String shardFileContents = jsObj.toString(); LOG.info("Shard file contents: {}", shardFileContents); gcsResourceManager.createArtifact(SOURCE_SHARDS_FILE_NAME, shardFileContents); } - private JsonObject createShardConfig( + private Shard createShardConfig( String logicalShardId, String dbName, CloudSqlResourceManager manager) { Shard shard = new Shard(); shard.setLogicalShardId(logicalShardId); @@ -478,9 +482,7 @@ private JsonObject createShardConfig( shard.setPassword(manager.getPassword()); shard.setPort(String.valueOf(manager.getPort())); shard.setDbName(dbName); - JsonObject jsObj = (JsonObject) new Gson().toJsonTree(shard).getAsJsonObject(); - jsObj.remove("secretManagerUri"); - return jsObj; + return shard; } private PipelineLauncher.LaunchInfo launchImportJob(String inputDir) throws IOException { diff --git a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbTest.java b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbTest.java index 7f1e1e96f4..d6f4f5072c 100644 --- a/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbTest.java +++ b/v2/spanner-to-sourcedb/src/test/java/com/google/cloud/teleport/v2/templates/SpannerToSourceDbTest.java @@ -15,16 +15,22 @@ */ package com.google.cloud.teleport.v2.templates; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.when; import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.config.TypedDriverOption; import com.google.cloud.spanner.Options.RpcPriority; import com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager; import com.google.cloud.teleport.v2.spanner.ddl.Ddl; import com.google.cloud.teleport.v2.spanner.migrations.shard.CassandraShard; import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; +import com.google.cloud.teleport.v2.spanner.migrations.utils.JarFileReader; import com.google.cloud.teleport.v2.spanner.sourceddl.CassandraInformationSchemaScanner; import com.google.cloud.teleport.v2.spanner.sourceddl.PostgreSQLInformationSchemaScanner; import com.google.cloud.teleport.v2.spanner.sourceddl.SourceDatabaseType; @@ -32,6 +38,10 @@ import com.google.cloud.teleport.v2.templates.SpannerToSourceDb.Options; import com.google.cloud.teleport.v2.templates.constants.Constants; import com.google.common.collect.ImmutableMap; +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.nio.file.Files; import java.sql.Connection; import java.sql.ResultSet; import java.sql.Statement; @@ -40,6 +50,7 @@ import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions; import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; import org.apache.beam.runners.dataflow.options.DataflowPipelineWorkerPoolOptions; +import org.apache.beam.sdk.io.FileSystems; import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig; import org.apache.beam.sdk.io.gcp.spanner.SpannerIO; import org.apache.beam.sdk.options.PipelineOptionsFactory; @@ -47,8 +58,10 @@ import org.apache.beam.sdk.values.PCollectionView; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.mockito.Mock; import org.mockito.MockedConstruction; @@ -67,6 +80,12 @@ public class SpannerToSourceDbTest { @Mock private PCollectionView mockShadowTableDdlView; @Mock private SpannerConfig mockSpannerConfig; @Mock private SpannerConfig mockSpannerMetadataConfig; + @Rule public TemporaryFolder tempFolder = new TemporaryFolder(); + + @BeforeClass + public static void setupFileSystem() { + FileSystems.setDefaultPipelineOptions(TestPipeline.testingPipelineOptions()); + } private List shards; private Ddl dummyDdl; @@ -217,8 +236,6 @@ public void testValidateMySQLNotReadOnly_NoVariable() throws Exception { ResultSet mockRs = mock(ResultSet.class); when(mockConn.createStatement()).thenReturn(mockStmt); - when(mockStmt.executeQuery("SHOW VARIABLES LIKE 'read_only'")).thenReturn(mockRs); - when(mockRs.next()).thenReturn(false); // No rows! try (MockedStatic mockedStatic = Mockito.mockStatic(SpannerToSourceDb.class, Mockito.CALLS_REAL_METHODS)) { @@ -294,4 +311,83 @@ public void testGetSourceSchema_Cassandra() throws Exception { } } } + + @Test + public void testGetShardList_unsupportedSourceType() { + RuntimeException exception = + assertThrows( + RuntimeException.class, + () -> SpannerToSourceDb.getShardList("unsupported_db", "dummy/path.json")); + assertNotNull(exception.getCause()); + assertEquals(IllegalArgumentException.class, exception.getCause().getClass()); + } + + @Test + public void testGetShardList_fileNotFound() { + RuntimeException exception = + assertThrows( + RuntimeException.class, + () -> SpannerToSourceDb.getShardList("mysql", "non-existent-file.json")); + assertNotNull(exception.getCause()); + assertEquals(RuntimeException.class, exception.getCause().getClass()); + assertEquals( + "Failed to read configuration input file at non-existent-file.json. Make sure it is ASCII or UTF-8 encoded and contains a well-formed HOCON/JSON string.", + exception.getCause().getMessage()); + } + + @Test + public void testGetShardList_jdbc_validJsonWrapped() throws IOException { + File tempFile = tempFolder.newFile("jdbc-config-wrapped.json"); + String wrappedJson = + "{\n" + + " \"shardConfigs\": [\n" + + " {\n" + + " \"logicalShardId\": \"shard1\",\n" + + " \"host\": \"localhost\",\n" + + " \"port\": \"3306\",\n" + + " \"user\": \"test-user\",\n" + + " \"password\": \"secret-pass\",\n" + + " \"dbName\": \"testdb\"\n" + + " }\n" + + " ]\n" + + "}"; + Files.writeString(tempFile.toPath(), wrappedJson); + + List shards = SpannerToSourceDb.getShardList("mysql", tempFile.getAbsolutePath()); + assertNotNull(shards); + assertEquals(1, shards.size()); + Shard shard = shards.get(0); + assertEquals("shard1", shard.getLogicalShardId()); + assertEquals("localhost", shard.getHost()); + assertEquals("3306", shard.getPort()); + assertEquals("test-user", shard.getUserName()); + assertEquals("secret-pass", shard.getPassword()); + assertEquals("testdb", shard.getDbName()); + } + + @Test + public void testGetShardList_cassandra_validConf() throws IOException { + try (MockedStatic mockFileReader = mockStatic(JarFileReader.class)) { + String testGcsPath = "gs://smt-test-bucket/cassandraConfig.conf"; + URL testUrl = com.google.common.io.Resources.getResource("test-cassandra-config.conf"); + mockFileReader + .when(() -> JarFileReader.saveFilesLocally(testGcsPath)) + .thenReturn(new URL[] {testUrl}); + + List shards = SpannerToSourceDb.getShardList("cassandra", testGcsPath); + assertNotNull(shards); + assertEquals(1, shards.size()); + Shard shard = shards.get(0); + assertEquals("127.0.0.1", shard.getHost()); + assertEquals("9042", shard.getPort()); + + // Assert specific CassandraShard properties + CassandraShard cassandraShard = (CassandraShard) shard; + assertEquals("cassandra", cassandraShard.getUsername()); + assertEquals("my_keyspace", cassandraShard.getKeySpaceName()); + assertEquals( + "cassandra", + cassandraShard.getOptionsMap().get(TypedDriverOption.AUTH_PROVIDER_PASSWORD)); + } + } } diff --git a/v2/spanner-to-sourcedb/src/test/resources/test-cassandra-config.conf b/v2/spanner-to-sourcedb/src/test/resources/test-cassandra-config.conf new file mode 100644 index 0000000000..6fce85e15e --- /dev/null +++ b/v2/spanner-to-sourcedb/src/test/resources/test-cassandra-config.conf @@ -0,0 +1,1510 @@ +# Reference configuration for the DataStax Java driver for Apache Cassandra®. +# +# Unless you use a custom mechanism to load your configuration (see +# SessionBuilder.withConfigLoader), all the values declared here will be used as defaults. You can +# place your own `application.conf` in the classpath to override them. +# +# Options are classified into two categories: +# - basic: what is most likely to be customized first when kickstarting a new application. +# - advanced: more elaborate tuning options, or "expert"-level customizations. +# +# This file is in HOCON format, see https://github.com/typesafehub/config/blob/master/HOCON.md. +# DO NOT USE FOR PRODUCTION. +datastax-java-driver { + + # BASIC OPTIONS ---------------------------------------------------------------------------------- + + # The contact points to use for the initial connection to the cluster. + # + # These are addresses of Cassandra nodes that the driver uses to discover the cluster topology. + # Only one contact point is required (the driver will retrieve the address of the other nodes + # automatically), but it is usually a good idea to provide more than one contact point, because if + # that single contact point is unavailable, the driver cannot initialize itself correctly. + # + # This must be a list of strings with each contact point specified as "host:port". If the host is + # a DNS name that resolves to multiple A-records, all the corresponding addresses will be used. Do + # not use "localhost" as the host name (since it resolves to both IPv4 and IPv6 addresses on some + # platforms). + # + # Note that Cassandra 3 and below requires all nodes in a cluster to share the same port (see + # CASSANDRA-7544). + # + # Contact points can also be provided programmatically when you build a cluster instance. If both + # are specified, they will be merged. If both are absent, the driver will default to + # 127.0.0.1:9042. + # + # Required: no + # Modifiable at runtime: no + # Overridable in a profile: no + basic.contact-points = [ "127.0.0.1:9042", "127.0.0.2:9042" ] + + # A name that uniquely identifies the driver instance created from this configuration. This is + # used as a prefix for log messages and metrics. + # + # If this option is absent, the driver will generate an identifier composed of the letter 's' + # followed by an incrementing counter. If you provide a different value, try to keep it short to + # keep the logs readable. Also, make sure it is unique: reusing the same value will not break the + # driver, but it will mix up the logs and metrics. + # + # Required: no + # Modifiable at runtime: no + # Overridable in a profile: no + // basic.session-name = my_session + + # The name of the keyspace that the session should initially be connected to. + # + # This expects the same format as in a CQL query: case-sensitive names must be quoted (note that + # the quotes must be escaped in HOCON format). For example: + # session-keyspace = case_insensitive_name + # session-keyspace = \"CaseSensitiveName\" + # + # If this option is absent, the session won't be connected to any keyspace, and you'll have to + # either qualify table names in your queries, or use the per-query keyspace feature available in + # Cassandra 4 and above (see Request.getKeyspace()). + # + # This can also be provided programatically in CqlSessionBuilder. + # + # Required: no + # Modifiable at runtime: no + # Overridable in a profile: no + basic.session-keyspace = my_keyspace + + # How often the driver tries to reload the configuration. + # + # To disable periodic reloading, set this to 0. + # + # Required: yes (unless you pass a different ConfigLoader to the session builder). + # Modifiable at runtime: yes, the new value will be used after the next time the configuration + # gets reloaded. + # Overridable in a profile: no + basic.config-reload-interval = 5 minutes + + basic.request { + # How long the driver waits for a request to complete. This is a global limit on the duration of + # a session.execute() call, including any internal retries the driver might do. + # + # By default, this value is set pretty high to ensure that DDL queries don't time out, in order + # to provide the best experience for new users trying the driver with the out-of-the-box + # configuration. + # For any serious deployment, we recommend that you use separate configuration profiles for DDL + # and DML; you can then set the DML timeout much lower (down to a few milliseconds if needed). + # + # Note that, because timeouts are scheduled on the driver's timer thread, the duration specified + # here must be greater than the timer tick duration defined by the + # advanced.netty.timer.tick-duration setting (see below). If that is not the case, timeouts will + # not be triggered as timely as desired. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for requests issued after the change. + # Overridable in a profile: yes + timeout = 2 seconds + + # The consistency level. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for requests issued after the change. + # Overridable in a profile: yes + consistency = LOCAL_ONE + + # The page size. This controls how many rows will be retrieved simultaneously in a single + # network roundtrip (the goal being to avoid loading too many results in memory at the same + # time). If there are more results, additional requests will be used to retrieve them (either + # automatically if you iterate with the sync API, or explicitly with the async API's + # fetchNextPage method). + # If the value is 0 or negative, it will be ignored and the request will not be paged. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for requests issued after the change. + # Overridable in a profile: yes + page-size = 5000 + + # The serial consistency level. + # The allowed values are SERIAL and LOCAL_SERIAL. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for requests issued after the change. + # Overridable in a profile: yes + serial-consistency = SERIAL + + # The default idempotence of a request, that will be used for all `Request` instances where + # `isIdempotent()` returns null. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for requests issued after the change. + # Overridable in a profile: yes + default-idempotence = false + } + + # The policy that decides the "query plan" for each query; that is, which nodes to try as + # coordinators, and in which order. + # + # Required: yes + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: yes. Note that the driver creates as few instances as possible: if a + # named profile inherits from the default profile, or if two sibling profiles have the exact + # same configuration, they will share a single policy instance at runtime. + # If there are multiple load balancing policies in a single driver instance, they work together + # in the following way: + # - each request gets a query plan from its profile's policy (or the default policy if the + # request has no profile, or the profile does not override the policy). + # - when the policies assign distances to nodes, the driver uses the closest assigned distance + # for any given node. + basic.load-balancing-policy { + # The class of the policy. If it is not qualified, the driver assumes that it resides in the + # package com.datastax.oss.driver.internal.core.loadbalancing. + # + # The driver provides a single implementation out of the box: DefaultLoadBalancingPolicy. + # + # You can also specify a custom class that implements LoadBalancingPolicy and has a public + # constructor with two arguments: the DriverContext and a String representing the profile name. + class = DefaultLoadBalancingPolicy + + # The datacenter that is considered "local": the default policy will only include nodes from + # this datacenter in its query plans. + # + # When using the default policy, this option can only be absent if you specified no contact + # points: in that case, the driver defaults to 127.0.0.1:9042, and that node's datacenter is + # used as the local datacenter. As soon as you provide contact points (either through the + # configuration or through the session builder), you must define the local datacenter + # explicitly, and initialization will fail if this property is absent. In addition, all contact + # points should be from this datacenter; warnings will be logged for nodes that are from a + # different one. + # + # This can also be specified programmatically with SessionBuilder.withLocalDatacenter. If both + # are specified, the programmatic value takes precedence. + // local-datacenter = datacenter1 + + # A custom filter to include/exclude nodes. + # + # This option is not required; if present, it must be the fully-qualified name of a class that + # implements `java.util.function.Predicate`, and has a public constructor taking a single + # `DriverContext` argument. + # + # Alternatively, you can pass an instance of your filter to + # CqlSession.builder().withNodeFilter(). In that case, this option will be ignored. + # + # The predicate's `test(Node)` method will be invoked each time the policy processes a + # topology or state change: if it returns false, the node will be set at distance IGNORED + # (meaning the driver won't ever connect to it), and never included in any query plan. + // filter.class= + } + basic.cloud { + # The location of the cloud secure bundle used to connect to Datastax Apache Cassandra as a + # service. + # This setting must be a valid URL. + # If the protocol is not specified, it is implicitly assumed to be the `file://` protocol, + # in which case the value is expected to be a valid path on the local filesystem. + # For example, `/a/path/to/bundle` will be interpreted as `file:/a/path/to/bunde`. + # If the protocol is provided explicitly, then the value will be used as is. + # + # Required: no + # Modifiable at runtime: no + # Overridable in a profile: no + // secure-connect-bundle = /location/of/secure/connect/bundle + } + + + # ADVANCED OPTIONS ------------------------------------------------------------------------------- + + advanced.connection { + # The timeout to use for internal queries that run as part of the initialization process, just + # after we open a connection. If this timeout fires, the initialization of the connection will + # fail. If this is the first connection ever, the driver will fail to initialize as well, + # otherwise it will retry the connection later. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + init-query-timeout = 500 milliseconds + + # The timeout to use when the driver changes the keyspace on a connection at runtime (this + # happens when the client issues a `USE ...` query, and all connections belonging to the current + # session need to be updated). + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + set-keyspace-timeout = ${datastax-java-driver.advanced.connection.init-query-timeout} + + # The driver maintains a connection pool to each node, according to the distance assigned to it + # by the load balancing policy. If the distance is IGNORED, no connections are maintained. + pool { + local { + # The number of connections in the pool. + # + # Required: yes + # Modifiable at runtime: yes; when the change is detected, all active pools will be notified + # and will adjust their size. + # Overridable in a profile: no + size = 1 + } + remote { + size = 1 + } + } + + # The maximum number of requests that can be executed concurrently on a connection. This must be + # between 1 and 32768. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + max-requests-per-connection = 1024 + + # The maximum number of "orphaned" requests before a connection gets closed automatically. + # + # Sometimes the driver writes to a node but stops listening for a response (for example if the + # request timed out, or was completed by another node). But we can't safely reuse the stream id + # on this connection until we know for sure that the server is done with it. Therefore the id is + # marked as "orphaned" until we get a response from the node. + # + # If the response never comes (or is lost because of a network issue), orphaned ids can + # accumulate over time, eventually affecting the connection's throughput. So we monitor them + # and close the connection above a given threshold (the pool will replace it). + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + max-orphan-requests = 24576 + + # Whether to log non-fatal errors when the driver tries to open a new connection. + # + # This error as recoverable, as the driver will try to reconnect according to the reconnection + # policy. Therefore some users see them as unnecessary clutter in the logs. On the other hand, + # those logs can be handy to debug a misbehaving node. + # + # Note that some type of errors are always logged, regardless of this option: + # - protocol version mismatches (the node gets forced down) + # - when the cluster name in system.local doesn't match the other nodes (the node gets forced + # down) + # - authentication errors (will be retried) + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + warn-on-init-error = true + } + + # Whether to schedule reconnection attempts if all contact points are unreachable on the first + # initialization attempt. + # + # If this is true, the driver will retry according to the reconnection policy. The + # `SessionBuilder.build()` call -- or the future returned by `SessionBuilder.buildAsync()` -- + # won't complete until a contact point has been reached. + # + # If this is false and no contact points are available, the driver will fail with an + # AllNodesFailedException. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + advanced.reconnect-on-init = false + + # The policy that controls how often the driver tries to re-establish connections to down nodes. + # + # Required: yes + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: no + advanced.reconnection-policy { + # The class of the policy. If it is not qualified, the driver assumes that it resides in the + # package com.datastax.oss.driver.internal.core.connection. + # + # The driver provides two implementations out of the box: ExponentialReconnectionPolicy and + # ConstantReconnectionPolicy. + # + # You can also specify a custom class that implements ReconnectionPolicy and has a public + # constructor with a DriverContext argument. + class = ExponentialReconnectionPolicy + + # ExponentialReconnectionPolicy starts with the base delay, and doubles it after each failed + # reconnection attempt, up to the maximum delay (after that it stays constant). + # + # ConstantReconnectionPolicy only uses the base-delay value, the interval never changes. + base-delay = 1 second + max-delay = 60 seconds + } + + # The policy that controls if the driver retries requests that have failed on one node. + # + # Required: yes + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: yes. Note that the driver creates as few instances as possible: if a + # named profile inherits from the default profile, or if two sibling profiles have the exact + # same configuration, they will share a single policy instance at runtime. + advanced.retry-policy { + # The class of the policy. If it is not qualified, the driver assumes that it resides in the + # package com.datastax.oss.driver.internal.core.retry. + # + # The driver provides a single implementation out of the box: DefaultRetryPolicy. + # + # You can also specify a custom class that implements RetryPolicy and has a public constructor + # with two arguments: the DriverContext and a String representing the profile name. + class = DefaultRetryPolicy + } + + # The policy that controls if the driver pre-emptively tries other nodes if a node takes too long + # to respond. + # + # Required: yes + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: yes. Note that the driver creates as few instances as possible: if a + # named profile inherits from the default profile, or if two sibling profiles have the exact + # same configuration, they will share a single policy instance at runtime. + advanced.speculative-execution-policy { + # The class of the policy. If it is not qualified, the driver assumes that it resides in the + # package com.datastax.oss.driver.internal.core.specex. + # + # The following implementations are available out of the box: + # - NoSpeculativeExecutionPolicy: never schedule any speculative execution + # - ConstantSpeculativeExecutionPolicy: schedule executions based on constant delays. This + # requires the `max-executions` and `delay` options below. + # + # You can also specify a custom class that implements SpeculativeExecutionPolicy and has a + # public constructor with two arguments: the DriverContext and a String representing the + # profile name. + class = NoSpeculativeExecutionPolicy + + # The maximum number of executions (including the initial, non-speculative execution). + # This must be at least one. + // max-executions = 3 + + # The delay between each execution. 0 is allowed, and will result in all executions being sent + # simultaneously when the request starts. + # + # Note that sub-millisecond precision is not supported, any excess precision information will be + # dropped; in particular, delays of less than 1 millisecond are equivalent to 0. + # + # Also note that, because speculative executions are scheduled on the driver's timer thread, + # the duration specified here must be greater than the timer tick duration defined by the + # advanced.netty.timer.tick-duration setting (see below). If that is not the case, speculative + # executions will not be triggered as timely as desired. + # + # This must be positive or 0. + // delay = 100 milliseconds + } + + # The component that handles authentication on each new connection. + # + # Required: no. If the 'class' child option is absent, no authentication will occur. + # Modifiable at runtime: no + # Overridable in a profile: no + # + # Note that the contents of this section can be overridden programmatically with + # SessionBuilder.withAuthProvider or SessionBuilder.withAuthCredentials. + advanced.auth-provider { + # The class of the provider. If it is not qualified, the driver assumes that it resides in the + # package com.datastax.oss.driver.internal.core.auth. + # + # The driver provides a single implementation out of the box: PlainTextAuthProvider, that uses + # plain-text credentials. It requires the `username` and `password` options below. + # If storing clear text credentials in the configuration is not acceptable for you, consider + # providing them programmatically with SessionBuilder#withAuthCredentials, or writing your own + # provider implementation. + # + # You can also specify a custom class that implements AuthProvider and has a public + # constructor with a DriverContext argument. + class = PlainTextAuthProvider + + # Sample configuration for the plain-text provider: + username = cassandra + password = cassandra + } + + # The SSL engine factory that will initialize an SSL engine for each new connection to a server. + # + # Required: no. If the 'class' child option is absent, SSL won't be activated. + # Modifiable at runtime: no + # Overridable in a profile: no + # + # Note that the contents of this section can be overridden programmatically with + # SessionBuilder.withSslEngineFactory or SessionBuilder#withSslContext. + advanced.ssl-engine-factory { + # The class of the factory. If it is not qualified, the driver assumes that it resides in the + # package com.datastax.oss.driver.internal.core.ssl. + # + # The driver provides a single implementation out of the box: DefaultSslEngineFactory, that uses + # the JDK's built-in SSL implementation. + # + # You can also specify a custom class that implements SslEngineFactory and has a public + # constructor with a DriverContext argument. + // class = DefaultSslEngineFactory + + # Sample configuration for the default SSL factory: + # The cipher suites to enable when creating an SSLEngine for a connection. + # This property is optional. If it is not present, the driver won't explicitly enable cipher + # suites on the engine, which according to the JDK documentations results in "a minimum quality + # of service". + // cipher-suites = [ "TLS_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA" ] + + # Whether or not to require validation that the hostname of the server certificate's common + # name matches the hostname of the server being connected to. If not set, defaults to true. + // hostname-validation = true + + # The locations and passwords used to access truststore and keystore contents. + # These properties are optional. If either truststore-path or keystore-path are specified, + # the driver builds an SSLContext from these files. If neither option is specified, the + # default SSLContext is used, which is based on system property configuration. + // truststore-path = /path/to/client.truststore + // truststore-password = password123 + // keystore-path = /path/to/client.keystore + // keystore-password = password123 + } + + # The generator that assigns a microsecond timestamp to each request. + # + # Required: yes + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: yes. Note that the driver creates as few instances as possible: if a + # named profile inherits from the default profile, or if two sibling profiles have the exact + # same configuration, they will share a single generator instance at runtime. + advanced.timestamp-generator { + # The class of the generator. If it is not qualified, the driver assumes that it resides in the + # package com.datastax.oss.driver.internal.core.time. + # + # The driver provides the following implementations out of the box: + # - AtomicTimestampGenerator: timestamps are guaranteed to be unique across all client threads. + # - ThreadLocalTimestampGenerator: timestamps that are guaranteed to be unique within each + # thread only. + # - ServerSideTimestampGenerator: do not generate timestamps, let the server assign them. + # + # You can also specify a custom class that implements TimestampGenerator and has a public + # constructor with two arguments: the DriverContext and a String representing the profile name. + class = AtomicTimestampGenerator + + # To guarantee that queries are applied on the server in the same order as the client issued + # them, timestamps must be strictly increasing. But this means that, if the driver sends more + # than one query per microsecond, timestamps will drift in the future. While this could happen + # occasionally under high load, it should not be a regular occurrence. Therefore the built-in + # implementations log a warning to detect potential issues. + drift-warning { + # How far in the future timestamps are allowed to drift before the warning is logged. + # If it is undefined or set to 0, warnings are disabled. + threshold = 1 second + + # How often the warning will be logged if timestamps keep drifting above the threshold. + interval = 10 seconds + } + + # Whether to force the driver to use Java's millisecond-precision system clock. + # If this is false, the driver will try to access the microsecond-precision OS clock via native + # calls (and fallback to the Java one if the native calls fail). + # Unless you explicitly want to avoid native calls, there's no reason to change this. + force-java-clock = false + } + + # A session-wide component that tracks the outcome of requests. + # + # Required: yes + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: no + advanced.request-tracker { + # The class of the tracker. If it is not qualified, the driver assumes that it resides in the + # package com.datastax.oss.driver.internal.core.tracker. + # + # The driver provides the following implementations out of the box: + # - NoopRequestTracker: does nothing. + # - RequestLogger: logs requests (see the parameters below). + # + # You can also specify a custom class that implements RequestTracker and has a public + # constructor with a DriverContext argument. + class = NoopRequestTracker + + # Parameters for RequestLogger. All of them can be overridden in a profile, and changed at + # runtime (the new values will be taken into account for requests logged after the change). + logs { + # Whether to log successful requests. + // success.enabled = true + + slow { + # The threshold to classify a successful request as "slow". If this is unset, all successful + # requests will be considered as normal. + // threshold = 1 second + + # Whether to log slow requests. + // enabled = true + } + + # Whether to log failed requests. + // error.enabled = true + + # The maximum length of the query string in the log message. If it is longer than that, it + # will be truncated. + // max-query-length = 500 + + # Whether to log bound values in addition to the query string. + // show-values = true + + # The maximum length for bound values in the log message. If the formatted representation of a + # value is longer than that, it will be truncated. + // max-value-length = 50 + + # The maximum number of bound values to log. If a request has more values, the list of values + # will be truncated. + // max-values = 50 + + # Whether to log stack traces for failed queries. If this is disabled, the log will just + # include the exception's string representation (generally the class name and message). + // show-stack-traces = true + } + } + + # A session-wide component that controls the rate at which requests are executed. + # + # Implementations vary, but throttlers generally track a metric that represents the level of + # utilization of the session, and prevent new requests from starting when that metric exceeds a + # threshold. Pending requests may be enqueued and retried later. + # + # From the public API's point of view, this process is mostly transparent: any time that the + # request is throttled is included in the session.execute() or session.executeAsync() call. + # Similarly, the request timeout encompasses throttling: the timeout starts ticking before the + # throttler has started processing the request; a request may time out while it is still in the + # throttler's queue, before the driver has even tried to send it to a node. + # + # The only visible effect is that a request may fail with a RequestThrottlingException, if the + # throttler has determined that it can neither allow the request to proceed now, nor enqueue it; + # this indicates that your session is overloaded. + # + # Required: yes + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: no + advanced.throttler { + # The class of the throttler. If it is not qualified, the driver assumes that it resides in + # the package com.datastax.oss.driver.internal.core.session.throttling. + # + # The driver provides the following implementations out of the box: + # + # - PassThroughRequestThrottler: does not perform any kind of throttling, all requests are + # allowed to proceed immediately. Required options: none. + # + # - ConcurrencyLimitingRequestThrottler: limits the number of requests that can be executed in + # parallel. Required options: max-concurrent-requests, max-queue-size. + # + # - RateLimitingRequestThrottler: limits the request rate per second. Required options: + # max-requests-per-second, max-queue-size, drain-interval. + # + # You can also specify a custom class that implements RequestThrottler and has a public + # constructor with a DriverContext argument. + class = PassThroughRequestThrottler + + # The maximum number of requests that can be enqueued when the throttling threshold is exceeded. + # Beyond that size, requests will fail with a RequestThrottlingException. + // max-queue-size = 10000 + + # The maximum number of requests that are allowed to execute in parallel. + # Only used by ConcurrencyLimitingRequestThrottler. + // max-concurrent-requests = 10000 + + # The maximum allowed request rate. + # Only used by RateLimitingRequestThrottler. + // max-requests-per-second = 10000 + + # How often the throttler attempts to dequeue requests. This is the only way for rate-based + # throttling, because the completion of an active request does not necessarily free a "slot" for + # a queued one (the rate might still be too high). + # + # You want to set this high enough that each attempt will process multiple entries in the queue, + # but not delay requests too much. A few milliseconds is probably a happy medium. + # + # Only used by RateLimitingRequestThrottler. + // drain-interval = 10 milliseconds + } + + # A session-wide component that listens for node state changes. If it is not qualified, the driver + # assumes that it resides in the package com.datastax.oss.driver.internal.core.metadata. + # + # The driver provides a single no-op implementation out of the box: NoopNodeStateListener. + # + # You can also specify a custom class that implements NodeStateListener and has a public + # constructor with a DriverContext argument. + # + # Alternatively, you can pass an instance of your listener programmatically with + # CqlSession.builder().withNodeStateListener(). In that case, this option will be ignored. + # + # Required: unless a listener has been provided programmatically + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: no + advanced.node-state-listener.class = NoopNodeStateListener + + # A session-wide component that listens for node state changes. If it is not qualified, the driver + # assumes that it resides in the package com.datastax.oss.driver.internal.core.metadata.schema. + # + # The driver provides a single no-op implementation out of the box: NoopSchemaChangeListener. + # + # You can also specify a custom class that implements SchemaChangeListener and has a public + # constructor with a DriverContext argument. + # + # Alternatively, you can pass an instance of your listener programmatically with + # CqlSession.builder().withSchemaChangeListener(). In that case, this option will be ignored. + # + # Required: unless a listener has been provided programmatically + # Modifiable at runtime: no (but custom implementations may elect to watch configuration changes + # and allow child options to be changed at runtime). + # Overridable in a profile: no + advanced.schema-change-listener.class = NoopSchemaChangeListener + + # The address translator to use to convert the addresses sent by Cassandra nodes into ones that + # the driver uses to connect. + # This is only needed if the nodes are not directly reachable from the driver (for example, the + # driver is in a different network region and needs to use a public IP, or it connects through a + # proxy). + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + advanced.address-translator { + # The class of the translator. If it is not qualified, the driver assumes that it resides in + # the package com.datastax.oss.driver.internal.core.addresstranslation. + # + # The driver provides the following implementations out of the box: + # - PassThroughAddressTranslator: returns all addresses unchanged + # + # You can also specify a custom class that implements AddressTranslator and has a public + # constructor with a DriverContext argument. + class = PassThroughAddressTranslator + } + + # Whether to resolve the addresses passed to `basic.contact-points`. + # + # If this is true, addresses are created with `InetSocketAddress(String, int)`: the host name will + # be resolved the first time, and the driver will use the resolved IP address for all subsequent + # connection attempts. + # + # If this is false, addresses are created with `InetSocketAddress.createUnresolved()`: the host + # name will be resolved again every time the driver opens a new connection. This is useful for + # containerized environments where DNS records are more likely to change over time (note that the + # JVM and OS have their own DNS caching mechanisms, so you might need additional configuration + # beyond the driver). + # + # This option only applies to the contact points specified in the configuration. It has no effect + # on: + # - programmatic contact points passed to SessionBuilder.addContactPoints: these addresses are + # built outside of the driver, so it is your responsibility to provide unresolved instances. + # - dynamically discovered peers: the driver relies on Cassandra system tables, which expose raw + # IP addresses. Use a custom address translator to convert them to unresolved addresses (if + # you're in a containerized environment, you probably already need address translation anyway). + # + # Required: no (defaults to true) + # Modifiable at runtime: no + # Overridable in a profile: no + advanced.resolve-contact-points = true + + advanced.protocol { + # The native protocol version to use. + # + # If this option is absent, the driver looks up the versions of the nodes at startup (by default + # in system.peers.release_version), and chooses the highest common protocol version. + # For example, if you have a mixed cluster with Apache Cassandra 2.1 nodes (protocol v3) and + # Apache Cassandra 3.0 nodes (protocol v3 and v4), then protocol v3 is chosen. If the nodes + # don't have a common protocol version, initialization fails. + # + # If this option is set, then the given version will be used for all connections, without any + # negotiation or downgrading. If any of the contact points doesn't support it, that contact + # point will be skipped. + # + # Once the protocol version is set, it can't change for the rest of the driver's lifetime; if + # an incompatible node joins the cluster later, connection will fail and the driver will force + # it down (i.e. never try to connect to it again). + # + # You can check the actual version at runtime with Cluster.getContext().getProtocolVersion(). + # + # Required: no + # Modifiable at runtime: no + # Overridable in a profile: no + // version = V4 + + # The name of the algorithm used to compress protocol frames. + # + # The possible values are: + # - lz4: requires net.jpountz.lz4:lz4 in the classpath. + # - snappy: requires org.xerial.snappy:snappy-java in the classpath. + # - the string "none" to indicate no compression (this is functionally equivalent to omitting + # the option). + # + # The driver depends on the compression libraries, but they are optional. Make sure you + # redeclare an explicit dependency in your project. Refer to the driver's POM or manual for the + # exact version. + # + # Required: no. If the option is absent, protocol frames are not compressed. + # Modifiable at runtime: no + # Overridable in a profile: no + // compression = lz4 + + # The maximum length of the frames supported by the driver. Beyond that limit, requests will + # fail with an exception + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + max-frame-length = 256 MB + } + + advanced.request { + # Whether a warning is logged when a request (such as a CQL `USE ...`) changes the active + # keyspace. + # Switching keyspace at runtime is highly discouraged, because it is inherently unsafe (other + # requests expecting the old keyspace might be running concurrently), and may cause statements + # prepared before the change to fail. + # It should only be done in very specific use cases where there is only a single client thread + # executing synchronous queries (such as a cqlsh-like interpreter). In other cases, clients + # should prefix table names in their queries instead. + # + # Note that CASSANDRA-10145 (scheduled for C* 4.0) will introduce a per-request keyspace option + # as a workaround to this issue. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for keyspace switches occurring after + # the change. + # Overridable in a profile: no + warn-if-set-keyspace = true + + # If tracing is enabled for a query, this controls how the trace is fetched. + trace { + # How many times the driver will attempt to fetch the query if it is not ready yet. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for traces fetched after the change. + # Overridable in a profile: yes + attempts = 5 + + # The interval between each attempt. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for traces fetched after the change. + # Overridable in a profile: yes + interval = 3 milliseconds + + # The consistency level to use for trace queries. + # Note that the default replication strategy for the system_traces keyspace is SimpleStrategy + # with RF=2, therefore LOCAL_ONE might not work if the local DC has no replicas for a given + # trace id. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for traces fetched after the change. + # Overridable in a profile: yes + consistency = ONE + } + + # Whether logging of server warnings generated during query execution should be disabled by the + # driver. All server generated warnings will be available programmatically via the ExecutionInfo + # object on the executed statement's ResultSet. If set to "false", this will prevent the driver + # from logging these warnings. + # + # NOTE: The log formatting for these warning messages will reuse the options defined for + # advanced.request-tracker. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for query warnings received after the change. + # Overridable in a profile: yes + log-warnings = true + } + + advanced.metrics { + # The session-level metrics (all disabled by default). + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + session { + enabled = [ + # The number and rate of bytes sent for the entire session (exposed as a Meter). + // bytes-sent, + + # The number and rate of bytes received for the entire session (exposed as a Meter). + // bytes-received + + # The number of nodes to which the driver has at least one active connection (exposed as a + # Gauge). + // connected-nodes, + + # The throughput and latency percentiles of CQL requests (exposed as a Timer). + # + # This corresponds to the overall duration of the session.execute() call, including any + # retry. + // cql-requests, + + # The number of CQL requests that timed out -- that is, the session.execute() call failed + # with a DriverTimeoutException (exposed as a Counter). + // cql-client-timeouts, + + # The size of the driver-side cache of CQL prepared statements. + # + # The cache uses weak values eviction, so this represents the number of PreparedStatement + # instances that your application has created, and is still holding a reference to. Note + # that the returned value is approximate. + // cql-prepared-cache-size, + + # How long requests are being throttled (exposed as a Timer). + # + # This is the time between the start of the session.execute() call, and the moment when + # the throttler allows the request to proceed. + // throttling.delay, + + # The size of the throttling queue (exposed as a Gauge). + # + # This is the number of requests that the throttler is currently delaying in order to + # preserve its SLA. This metric only works with the built-in concurrency- and rate-based + # throttlers; in other cases, it will always be 0. + // throttling.queue-size, + + # The number of times a request was rejected with a RequestThrottlingException (exposed as + # a Counter) + // throttling.errors, + ] + + # Extra configuration (for the metrics that need it) + + # Required: if the 'cql-requests' metric is enabled + # Modifiable at runtime: no + # Overridable in a profile: no + cql-requests { + # The largest latency that we expect to record. + # + # This should be slightly higher than request.timeout (in theory, readings can't be higher + # than the timeout, but there might be a small overhead due to internal scheduling). + # + # This is used to scale internal data structures. If a higher recording is encountered at + # runtime, it is discarded and a warning is logged. + highest-latency = 3 seconds + + # The number of significant decimal digits to which internal structures will maintain + # value resolution and separation (for example, 3 means that recordings up to 1 second + # will be recorded with a resolution of 1 millisecond or better). + # + # This must be between 0 and 5. If the value is out of range, it defaults to 3 and a + # warning is logged. + significant-digits = 3 + + # The interval at which percentile data is refreshed. + # + # The driver records latency data in a "live" histogram, and serves results from a cached + # snapshot. Each time the snapshot gets older than the interval, the two are switched. + # Note that this switch happens upon fetching the metrics, so if you never fetch the + # recording interval might grow higher (that shouldn't be an issue in a production + # environment because you would typically have a metrics reporter that exports to a + # monitoring tool at a regular interval). + # + # In practice, this means that if you set this to 5 minutes, you're looking at data from a + # 5-minute interval in the past, that is at most 5 minutes old. If you fetch the metrics + # at a faster pace, you will observe the same data for 5 minutes until the interval + # expires. + # + # Note that this does not apply to the total count and rates (those are updated in real + # time). + refresh-interval = 5 minutes + } + + # Required: if the 'throttling.delay' metric is enabled + # Modifiable at runtime: no + # Overridable in a profile: no + throttling.delay { + highest-latency = 3 seconds + significant-digits = 3 + refresh-interval = 5 minutes + } + } + # The node-level metrics (all disabled by default). + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + node { + enabled = [ + # The number of connections open to this node for regular requests (exposed as a + # Gauge). + # + # This includes the control connection (which uses at most one extra connection to a + # random node in the cluster). + // pool.open-connections, + + # The number of stream ids available on the connections to this node (exposed as a + # Gauge). + # + # Stream ids are used to multiplex requests on each connection, so this is an indication + # of how many more requests the node could handle concurrently before becoming saturated + # (note that this is a driver-side only consideration, there might be other limitations on + # the server that prevent reaching that theoretical limit). + // pool.available-streams, + + # The number of requests currently executing on the connections to this node (exposed as a + # Gauge). This includes orphaned streams. + // pool.in-flight, + + # The number of "orphaned" stream ids on the connections to this node (exposed as a + # Gauge). + # + # See the description of the connection.max-orphan-requests option for more details. + // pool.orphaned-streams, + + # The number and rate of bytes sent to this node (exposed as a Meter). + // bytes-sent, + + # The number and rate of bytes received from this node (exposed as a Meter). + // bytes-received, + + # The throughput and latency percentiles of individual CQL messages sent to this node as + # part of an overall request (exposed as a Timer). + # + # Note that this does not necessarily correspond to the overall duration of the + # session.execute() call, since the driver might query multiple nodes because of retries + # and speculative executions. Therefore a single "request" (as seen from a client of the + # driver) can be composed of more than one of the "messages" measured by this metric. + # + # Therefore this metric is intended as an insight into the performance of this particular + # node. For statistics on overall request completion, use the session-level cql-requests. + // cql-messages, + + # The number of times the driver failed to send a request to this node (exposed as a + # Counter). + # + # In those case we know the request didn't even reach the coordinator, so they are retried + # on the next node automatically (without going through the retry policy). + // errors.request.unsent, + + # The number of times a request was aborted before the driver even received a response + # from this node (exposed as a Counter). + # + # This can happen in two cases: if the connection was closed due to an external event + # (such as a network error or heartbeat failure); or if there was an unexpected error + # while decoding the response (this can only be a driver bug). + // errors.request.aborted, + + # The number of times this node replied with a WRITE_TIMEOUT error (exposed as a Counter). + # + # Whether this error is rethrown directly to the client, rethrown or ignored is determined + # by the RetryPolicy. + // errors.request.write-timeouts, + + # The number of times this node replied with a READ_TIMEOUT error (exposed as a Counter). + # + # Whether this error is rethrown directly to the client, rethrown or ignored is determined + # by the RetryPolicy. + // errors.request.read-timeouts, + + # The number of times this node replied with an UNAVAILABLE error (exposed as a Counter). + # + # Whether this error is rethrown directly to the client, rethrown or ignored is determined + # by the RetryPolicy. + // errors.request.unavailables, + + # The number of times this node replied with an error that doesn't fall under other + # 'errors.*' metrics (exposed as a Counter). + // errors.request.others, + + # The total number of errors on this node that caused the RetryPolicy to trigger a retry + # (exposed as a Counter). + # + # This is a sum of all the other retries.* metrics. + // retries.total, + + # The number of errors on this node that caused the RetryPolicy to trigger a retry, broken + # down by error type (exposed as Counters). + // retries.aborted, + // retries.read-timeout, + // retries.write-timeout, + // retries.unavailable, + // retries.other, + + # The total number of errors on this node that were ignored by the RetryPolicy (exposed as + # a Counter). + # + # This is a sum of all the other ignores.* metrics. + // ignores.total, + + # The number of errors on this node that were ignored by the RetryPolicy, broken down by + # error type (exposed as Counters). + // ignores.aborted, + // ignores.read-timeout, + // ignores.write-timeout, + // ignores.unavailable, + // ignores.other, + + # The number of speculative executions triggered by a slow response from this node + # (exposed as a Counter). + // speculative-executions, + + # The number of errors encountered while trying to establish a connection to this node + # (exposed as a Counter). + # + # Connection errors are not a fatal issue for the driver, failed connections will be + # retried periodically according to the reconnection policy. You can choose whether or not + # to log those errors at WARN level with the connection.warn-on-init-error option. + # + # Authentication errors are not included in this counter, they are tracked separately in + # errors.connection.auth. + // errors.connection.init, + + # The number of authentication errors encountered while trying to establish a connection + # to this node (exposed as a Counter). + # Authentication errors are also logged at WARN level. + // errors.connection.auth, + ] + + # See cql-requests in the `session` section + # + # Required: if the 'cql-messages' metric is enabled + # Modifiable at runtime: no + # Overridable in a profile: no + cql-messages { + highest-latency = 3 seconds + significant-digits = 3 + refresh-interval = 5 minutes + } + } + } + + advanced.socket { + # Whether or not to disable the Nagle algorithm. + # + # By default, this option is set to true (Nagle disabled), because the driver has its own + # internal message coalescing algorithm. + # + # See java.net.StandardSocketOptions.TCP_NODELAY. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + tcp-no-delay = true + + # All other socket options are unset by default. The actual value depends on the underlying + # Netty transport: + # - NIO uses the defaults from java.net.Socket (refer to the javadocs of + # java.net.StandardSocketOptions for each option). + # - Epoll delegates to the underlying file descriptor, which uses the O/S defaults. + + # Whether or not to enable TCP keep-alive probes. + # + # See java.net.StandardSocketOptions.SO_KEEPALIVE. + # + # Required: no + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + //keep-alive = false + + # Whether or not to allow address reuse. + # + # See java.net.StandardSocketOptions.SO_REUSEADDR. + # + # Required: no + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + //reuse-address = true + + # Sets the linger interval. + # + # If the value is zero or greater, then it represents a timeout value, in seconds; + # if the value is negative, it means that this option is disabled. + # + # See java.net.StandardSocketOptions.SO_LINGER. + # + # Required: no + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + //linger-interval = 0 + + # Sets a hint to the size of the underlying buffers for incoming network I/O. + # + # See java.net.StandardSocketOptions.SO_RCVBUF. + # + # Required: no + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + //receive-buffer-size = 65535 + + # Sets a hint to the size of the underlying buffers for outgoing network I/O. + # + # See java.net.StandardSocketOptions.SO_SNDBUF. + # + # Required: no + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + //send-buffer-size = 65535 + } + + advanced.heartbeat { + # The heartbeat interval. If a connection stays idle for that duration (no reads), the driver + # sends a dummy message on it to make sure it's still alive. If not, the connection is trashed + # and replaced. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + interval = 30 seconds + + # How long the driver waits for the response to a heartbeat. If this timeout fires, the + # heartbeat is considered failed. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for connections created after the + # change. + # Overridable in a profile: no + timeout = ${datastax-java-driver.advanced.connection.init-query-timeout} + } + + advanced.metadata { + # Topology events are external signals that inform the driver of the state of Cassandra nodes + # (by default, they correspond to gossip events received on the control connection). + # The debouncer helps smoothen out oscillations if conflicting events are sent out in short + # bursts. + # Debouncing may be disabled by setting the window to 0 or max-events to 1 (this is not + # recommended). + topology-event-debouncer { + # How long the driver waits to propagate an event. If another event is received within that + # time, the window is reset and a batch of accumulated events will be delivered. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + window = 1 second + + # The maximum number of events that can accumulate. If this count is reached, the events are + # delivered immediately and the time window is reset. This avoids holding events indefinitely + # if the window keeps getting reset. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + max-events = 20 + } + + # Options relating to schema metadata (Cluster.getMetadata.getKeyspaces). + # This metadata is exposed by the driver for informational purposes, and is also necessary for + # token-aware routing. + schema { + # Whether schema metadata is enabled. + # If this is false, the schema will remain empty, or to the last known value. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for refreshes issued after the + # change. It can also be overridden programmatically via Cluster.setSchemaMetadataEnabled. + # Overridable in a profile: no + enabled = true + + # The list of keyspaces for which schema and token metadata should be maintained. If this + # property is absent or empty, all existing keyspaces are processed. + # + # Required: no + # Modifiable at runtime: yes, the new value will be used for refreshes issued after the + # change. + # Overridable in a profile: no + // refreshed-keyspaces = [ "ks1", "ks2" ] + + # The timeout for the requests to the schema tables. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for refreshes issued after the + # change. + # Overridable in a profile: no + request-timeout = ${datastax-java-driver.basic.request.timeout} + + # The page size for the requests to the schema tables. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for refreshes issued after the + # change. + # Overridable in a profile: no + request-page-size = ${datastax-java-driver.basic.request.page-size} + + # Protects against bursts of schema updates (for example when a client issues a sequence of + # DDL queries), by coalescing them into a single update. + # Debouncing may be disabled by setting the window to 0 or max-events to 1 (this is highly + # discouraged for schema refreshes). + debouncer { + # How long the driver waits to apply a refresh. If another refresh is requested within that + # time, the window is reset and a single refresh will be triggered when it ends. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + window = 1 second + + # The maximum number of refreshes that can accumulate. If this count is reached, a refresh + # is done immediately and the window is reset. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + max-events = 20 + } + } + + # Whether token metadata (Cluster.getMetadata.getTokenMap) is enabled. + # This metadata is exposed by the driver for informational purposes, and is also necessary for + # token-aware routing. + # If this is false, it will remain empty, or to the last known value. Note that its computation + # requires information about the schema; therefore if schema metadata is disabled or filtered to + # a subset of keyspaces, the token map will be incomplete, regardless of the value of this + # property. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for refreshes issued after the change. + # Overridable in a profile: no + token-map.enabled = true + } + + advanced.control-connection { + # How long the driver waits for responses to control queries (e.g. fetching the list of nodes, + # refreshing the schema). + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + timeout = ${datastax-java-driver.advanced.connection.init-query-timeout} + + # Due to the distributed nature of Cassandra, schema changes made on one node might not be + # immediately visible to others. Under certain circumstances, the driver waits until all nodes + # agree on a common schema version (namely: before a schema refresh, before repreparing all + # queries on a newly up node, and before completing a successful schema-altering query). To do + # so, it queries system tables to find out the schema version of all nodes that are currently + # UP. If all the versions match, the check succeeds, otherwise it is retried periodically, until + # a given timeout. + # + # A schema agreement failure is not fatal, but it might produce unexpected results (for example, + # getting an "unconfigured table" error for a table that you created right before, just because + # the two queries went to different coordinators). + # + # Note that schema agreement never succeeds in a mixed-version cluster (it would be challenging + # because the way the schema version is computed varies across server versions); the assumption + # is that schema updates are unlikely to happen during a rolling upgrade anyway. + schema-agreement { + # The interval between each attempt. + # Required: yes + # Modifiable at runtime: yes, the new value will be used for checks issued after the change. + # Overridable in a profile: no + interval = 200 milliseconds + + # The timeout after which schema agreement fails. + # If this is set to 0, schema agreement is skipped and will always fail. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for checks issued after the change. + # Overridable in a profile: no + timeout = 10 seconds + + # Whether to log a warning if schema agreement fails. + # You might want to change this if you've set the timeout to 0. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for checks issued after the change. + # Overridable in a profile: no + warn-on-failure = true + } + } + + advanced.prepared-statements { + # Whether `Session.prepare` calls should be sent to all nodes in the cluster. + # + # A request to prepare is handled in two steps: + # 1) send to a single node first (to rule out simple errors like malformed queries). + # 2) if step 1 succeeds, re-send to all other active nodes (i.e. not ignored by the load + # balancing policy). + # This option controls whether step 2 is executed. + # + # The reason why you might want to disable it is to optimize network usage if you have a large + # number of clients preparing the same set of statements at startup. If your load balancing + # policy distributes queries randomly, each client will pick a different host to prepare its + # statements, and on the whole each host has a good chance of having been hit by at least one + # client for each statement. + # On the other hand, if that assumption turns out to be wrong and one host hasn't prepared a + # given statement, it needs to be re-prepared on the fly the first time it gets executed; this + # causes a performance penalty (one extra roundtrip to resend the query to prepare, and another + # to retry the execution). + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for prepares issued after the change. + # Overridable in a profile: yes + prepare-on-all-nodes = true + + # How the driver replicates prepared statements on a node that just came back up or joined the + # cluster. + reprepare-on-up { + # Whether the driver tries to prepare on new nodes at all. + # + # The reason why you might want to disable it is to optimize reconnection time when you + # believe nodes often get marked down because of temporary network issues, rather than the + # node really crashing. In that case, the node still has prepared statements in its cache when + # the driver reconnects, so re-preparing is redundant. + # + # On the other hand, if that assumption turns out to be wrong and the node had really + # restarted, its prepared statement cache is empty (before CASSANDRA-8831), and statements + # need to be re-prepared on the fly the first time they get executed; this causes a + # performance penalty (one extra roundtrip to resend the query to prepare, and another to + # retry the execution). + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for nodes that come back up after the + # change. + # Overridable in a profile: no + enabled = true + + # Whether to check `system.prepared_statements` on the target node before repreparing. + # + # This table exists since CASSANDRA-8831 (merged in 3.10). It stores the statements already + # prepared on the node, and preserves them across restarts. + # + # Checking the table first avoids repreparing unnecessarily, but the cost of the query is not + # always worth the improvement, especially if the number of statements is low. + # + # If the table does not exist, or the query fails for any other reason, the error is ignored + # and the driver proceeds to reprepare statements according to the other parameters. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for nodes that come back up after the + # change. + # Overridable in a profile: no + check-system-table = false + + # The maximum number of statements that should be reprepared. 0 or a negative value means no + # limit. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for nodes that come back up after the + # change. + # Overridable in a profile: no + max-statements = 0 + + # The maximum number of concurrent requests when repreparing. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for nodes that come back up after the + # change. + # Overridable in a profile: no + max-parallelism = 100 + + # The request timeout. This applies both to querying the system.prepared_statements table (if + # relevant), and the prepare requests themselves. + # + # Required: yes + # Modifiable at runtime: yes, the new value will be used for nodes that come back up after the + # change. + # Overridable in a profile: no + timeout = ${datastax-java-driver.advanced.connection.init-query-timeout} + } + } + + # Options related to the Netty event loop groups used internally by the driver. + advanced.netty { + + # Whether the threads created by the driver should be daemon threads. + # This will apply to the threads in io-group, admin-group, and the timer thread. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + daemon = false + + # The event loop group used for I/O operations (reading and writing to Cassandra nodes). + # By default, threads in this group are named after the session name, "-io-" and an incrementing + # counter, for example "s0-io-0". + io-group { + # The number of threads. + # If this is set to 0, the driver will use `Runtime.getRuntime().availableProcessors() * 2`. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + size = 0 + + # The options to shut down the event loop group gracefully when the driver closes. If a task + # gets submitted during the quiet period, it is accepted and the quiet period starts over. + # The timeout limits the overall shutdown time. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + shutdown {quiet-period = 2, timeout = 15, unit = SECONDS} + } + # The event loop group used for admin tasks not related to request I/O (handle cluster events, + # refresh metadata, schedule reconnections, etc.) + # By default, threads in this group are named after the session name, "-admin-" and an + # incrementing counter, for example "s0-admin-0". + admin-group { + size = 2 + + shutdown {quiet-period = 2, timeout = 15, unit = SECONDS} + } + # The timer used for scheduling request timeouts and speculative executions + # By default, this thread is named after the session name and "-timer-0", for example + # "s0-timer-0". + timer { + # The timer tick duration. + # This is how frequent the timer should wake up to check for timed-out tasks or speculative + # executions. Lower resolution (i.e. longer durations) will leave more CPU cycles for running + # I/O operations at the cost of precision of exactly when a request timeout will expire or a + # speculative execution will run. Higher resolution (i.e. shorter durations) will result in + # more precise request timeouts and speculative execution scheduling, but at the cost of CPU + # cycles taken from I/O operations, which could lead to lower overall I/O throughput. + # + # The default value is 100 milliseconds, which is a comfortable value for most use cases. + # However if you are using more agressive timeouts or speculative execution delays, then you + # should lower the timer tick duration as well, so that its value is always equal to or lesser + # than the timeout duration and/or speculative execution delay you intend to use. + # + # Note for Windows users: avoid setting this to aggressive values, that is, anything under 100 + # milliseconds; doing so is known to cause extreme CPU usage. Also, the tick duration must be + # a multiple of 10 under Windows; if that is not the case, it will be automatically rounded + # down to the nearest multiple of 10 (e.g. 99 milliseconds will be rounded down to 90 + # milliseconds). + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + tick-duration = 100 milliseconds + + # Number of ticks in a Timer wheel. The underlying implementation uses Netty's + # HashedWheelTimer, which uses hashes to arrange the timeouts. This effectively controls the + # size of the timer wheel. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + ticks-per-wheel = 2048 + } + } + + # The component that coalesces writes on the connections. + # This is exposed mainly to facilitate tuning during development. You shouldn't have to adjust + # this. + advanced.coalescer { + # How many times the coalescer is allowed to reschedule itself when it did no work. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + max-runs-with-no-work = 5 + + # The reschedule interval. + # + # Required: yes + # Modifiable at runtime: no + # Overridable in a profile: no + reschedule-interval = 10 microseconds + } + + profiles { + # This is where your custom profiles go, for example: + # olap { + # basic.request.timeout = 5 seconds + # } + } +}