-
Notifications
You must be signed in to change notification settings - Fork 1.1k
feat(Spanner): integrate SourceConfigParser to centralize shard configuration loading for SpannerToSourceDb pipelines #3840
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |
| */ | ||
| package com.google.cloud.teleport.v2.templates; | ||
|
|
||
| import static com.google.cloud.teleport.v2.spanner.migrations.constants.Constants.CASSANDRA_SOURCE_TYPE; | ||
| import static com.google.cloud.teleport.v2.spanner.migrations.constants.Constants.MYSQL_SOURCE_TYPE; | ||
| import static com.google.cloud.teleport.v2.spanner.migrations.constants.Constants.POSTGRES_SOURCE_TYPE; | ||
| import static com.google.cloud.teleport.v2.spanner.migrations.constants.Constants.RUN_MODE_REGULAR; | ||
|
|
@@ -38,12 +39,16 @@ | |
| import com.google.cloud.teleport.v2.spanner.ddl.Ddl; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.shard.CassandraShard; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.shard.Shard; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.source.config.CassandraConnectionConfig; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.source.config.JdbcShardConfig; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.source.config.SourceConfigParser; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.source.config.SourceConnectionConfig; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.transformation.CustomTransformation; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.utils.CassandraConfigFileReader; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.utils.CassandraDriverConfigLoader; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.utils.DataflowWorkerMachineTypeUtils; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.utils.ISecretManagerAccessor; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.utils.SecretManagerAccessorImpl; | ||
| import com.google.cloud.teleport.v2.spanner.migrations.utils.ShardFileReader; | ||
| import com.google.cloud.teleport.v2.spanner.sourceddl.CassandraInformationSchemaScanner; | ||
| import com.google.cloud.teleport.v2.spanner.sourceddl.MySqlInformationSchemaScanner; | ||
| import com.google.cloud.teleport.v2.spanner.sourceddl.PostgreSQLInformationSchemaScanner; | ||
|
|
@@ -61,6 +66,7 @@ | |
| import com.google.cloud.teleport.v2.templates.transforms.UpdateDlqMetricsFn; | ||
| import com.google.cloud.teleport.v2.transforms.DLQWriteTransform; | ||
| import com.google.cloud.teleport.v2.values.FailsafeElement; | ||
| import com.google.common.base.Preconditions; | ||
| import com.google.common.base.Strings; | ||
| import com.zaxxer.hikari.HikariConfig; | ||
| import com.zaxxer.hikari.HikariDataSource; | ||
|
|
@@ -647,20 +653,14 @@ public static PipelineResult run(Options options) { | |
| .get(SpannerInformationSchemaProcessorTransform.SHADOW_TABLE_DDL_TAG) | ||
| .apply("View Shadow DDL", View.asSingleton()); | ||
|
|
||
| List<Shard> shards; | ||
| String shardingMode; | ||
| if (MYSQL_SOURCE_TYPE.equals(options.getSourceType()) | ||
| || POSTGRES_SOURCE_TYPE.equals(options.getSourceType())) { | ||
| ShardFileReader shardFileReader = new ShardFileReader(new SecretManagerAccessorImpl()); | ||
| shards = shardFileReader.getOrderedShardDetails(options.getSourceShardsFilePath()); | ||
| shardingMode = Constants.SHARDING_MODE_MULTI_SHARD; | ||
| List<Shard> shards = getShardList(options.getSourceType(), options.getSourceShardsFilePath()); | ||
|
|
||
| } else { | ||
| CassandraConfigFileReader cassandraConfigFileReader = new CassandraConfigFileReader(); | ||
| shards = cassandraConfigFileReader.getCassandraShard(options.getSourceShardsFilePath()); | ||
| LOG.info("Cassandra config is: {}", shards.get(0)); | ||
| shardingMode = Constants.SHARDING_MODE_SINGLE_SHARD; | ||
| } | ||
| // cassandra is always a single sharded migration. | ||
| // for JDBC, shards size and IsShardedMigration option is used below. | ||
| String shardingMode = | ||
| options.getSourceType().equals(CASSANDRA_SOURCE_TYPE) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be moved to the respective config parsing flow rather than having it as a top level decision
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does the non-sharded workflow work here ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shardingMode: this is used in AssignShardIdFn to identify if default shard id is needed to be assigned to stream events. I could have used the size of shards list to identify The primary concern in doing so was the logic below (at line 672) where this is also dependent on the user input
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can AND that flag here to get to the output. In any case, not a blocker to letting this through. Lets take it up as a follow up |
||
| ? Constants.SHARDING_MODE_SINGLE_SHARD | ||
| : Constants.SHARDING_MODE_MULTI_SHARD; | ||
|
|
||
| if (MYSQL_SOURCE_TYPE.equals(options.getSourceType())) { | ||
| validateMySQLNotReadOnly(shards); | ||
|
|
@@ -951,6 +951,53 @@ static void buildPipeline( | |
| .build()); | ||
| } | ||
|
|
||
| /** | ||
| * Returns a list of shards based on the source type and source shards file path. This should be | ||
| * removed in Phase 2 of Standardizing config. | ||
| * | ||
| * @param sourceType The type of the source database. | ||
| * @param sourceShardsFilePath The GCS path to the source shards configuration file. | ||
| * @return A list of shards. | ||
| */ | ||
| public static List<Shard> getShardList(String sourceType, String sourceShardsFilePath) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ideally this method should move to spanner-common
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic below is specific to Reverse replication. There is no synergy on this between reverse and bulk. Also, the plan is to remove this in phase 2. I have added a comment to reflect this. |
||
| ISecretManagerAccessor secretManagerAccessor = new SecretManagerAccessorImpl(); | ||
| SourceConfigParser sourceConfigParser = new SourceConfigParser(secretManagerAccessor); | ||
| SourceConnectionConfig sourceConnectionConfig; | ||
| try { | ||
| // Parse the source shards configuration file to respective | ||
| // SourceConnectionConfig. | ||
| sourceConnectionConfig = | ||
| sourceConfigParser.parseConfiguration(sourceType, sourceShardsFilePath); | ||
| } catch (Exception e) { | ||
| LOG.error("Error parsing source config", e); | ||
| throw new RuntimeException("Error parsing source config", e); | ||
| } | ||
| List<Shard> shards; | ||
| if (sourceConnectionConfig instanceof JdbcShardConfig) { | ||
| shards = ((JdbcShardConfig) sourceConnectionConfig).getShardConfigs(); | ||
| LOG.info("JDBC shard config is parsed."); | ||
| } else if (sourceConnectionConfig instanceof CassandraConnectionConfig) { | ||
| CassandraConfigFileReader cassandraConfigFileReader = new CassandraConfigFileReader(); | ||
| shards = | ||
| cassandraConfigFileReader.getCassandraShard( | ||
| ((CassandraConnectionConfig) sourceConnectionConfig).getOptionsMap()); | ||
| LOG.info("Cassandra shard config is parsed."); | ||
| } else { | ||
| String errorMessage = | ||
| "Invalid source config for source type: " | ||
| + sourceType | ||
| + ". Source config parsed to: " | ||
| + sourceConnectionConfig.getClass() | ||
| + ". Source config file path: " | ||
| + sourceShardsFilePath; | ||
| LOG.error(errorMessage); | ||
| throw new RuntimeException(errorMessage); | ||
| } | ||
| Preconditions.checkArgument( | ||
| shards != null && !shards.isEmpty(), "Shard list should have at least 1 element."); | ||
| return shards; | ||
| } | ||
|
|
||
| public static SpannerIO.ReadChangeStream getReadChangeStreamDoFn( | ||
| Options options, SpannerConfig spannerConfig) { | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this excluded ? Is it a change in beam ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Without this exclusion, there is a version conflict with:
org.apache.cassandra
java-driver-core
${cassandra-java-driver-core.version}
This dependency is test scoped. It was not causing any issue earlier as we didn't have tests which was using this package.