-
Notifications
You must be signed in to change notification settings - Fork 2.1k
[Storage] Add tableIdentifier to UCClient getCommits #6788
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
| import dev.failsafe.function.CheckedSupplier; | ||
| import io.delta.kernel.internal.types.DataTypeJsonSerDe; | ||
| import io.delta.kernel.types.*; | ||
| import io.delta.kernel.unitycatalog.UCTableIdentifier; | ||
| import io.unitycatalog.client.ApiClient; | ||
| import io.unitycatalog.client.ApiClientBuilder; | ||
| import io.unitycatalog.client.ApiException; | ||
|
|
@@ -231,6 +232,33 @@ public ApiClient getApiClient() { | |
| return apiClient; | ||
| } | ||
|
|
||
| /** | ||
| * Parses {@code schema.table} or {@code catalog.schema.table} into a {@link UCTableIdentifier}. | ||
| * In the 2-part form the catalog defaults to this catalog's name; in the 3-part form the leading | ||
| * segment must equal this catalog's name. | ||
| */ | ||
| UCTableIdentifier toUcTableIdentifier(String qualifiedTableName) { | ||
| String[] namespaces = qualifiedTableName.split("\\."); | ||
| Preconditions.checkArgument(namespaces.length == 2 || namespaces.length == 3); | ||
| String catalogName; | ||
| String schemaName; | ||
| String tableName; | ||
| if (namespaces.length == 3) { | ||
| Preconditions.checkArgument( | ||
| namespaces[0].equals(getName()), | ||
| String.format( | ||
| "table's catalog name %s must match catalog's name %s", namespaces[0], getName())); | ||
| catalogName = namespaces[0]; | ||
| schemaName = namespaces[1]; | ||
| tableName = namespaces[2]; | ||
| } else { | ||
| catalogName = getName(); | ||
| schemaName = namespaces[0]; | ||
| tableName = namespaces[1]; | ||
| } | ||
| return new UCTableIdentifier(catalogName, schemaName, tableName); | ||
| } | ||
|
|
||
| @Override | ||
| public void open() { | ||
| if (apiClient == null) { | ||
|
|
@@ -329,22 +357,9 @@ public void createTable( | |
| () -> { | ||
| TablesApi tablesApi = new TablesApi(apiClient); | ||
| // Obtain names | ||
| String[] namespaces = tableId.split("\\."); | ||
| Preconditions.checkArgument(namespaces.length == 2 || namespaces.length == 3); | ||
| String schemaName; | ||
| String tableName; | ||
| if (namespaces.length == 3) { | ||
| Preconditions.checkArgument( | ||
| namespaces[0].equals(getName()), | ||
| String.format( | ||
| "table's catalog name %s must match catalog's name %s", | ||
| namespaces[0], getName())); | ||
| schemaName = namespaces[1]; | ||
| tableName = namespaces[2]; | ||
| } else { | ||
| schemaName = namespaces[0]; | ||
| tableName = namespaces[1]; | ||
| } | ||
| UCTableIdentifier tableIdentifier = toUcTableIdentifier(tableId); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: okay, so this not a blocker for the PR, just a comment here, for knowing the context. |
||
| String schemaName = tableIdentifier.getSchemaName(); | ||
| String tableName = tableIdentifier.getTableName(); | ||
| // Column Info | ||
| List<ColumnInfo> columnInfos = | ||
| IntStream.range(0, schema.fields().size()) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,7 @@ | |
| import io.delta.kernel.unitycatalog.metrics.UcLoadSnapshotTelemetry; | ||
| import io.delta.storage.commit.Commit; | ||
| import io.delta.storage.commit.GetCommitsResponse; | ||
| import io.delta.storage.commit.TableIdentifier; | ||
| import io.delta.storage.commit.uccommitcoordinator.UCClient; | ||
| import io.delta.storage.commit.uccommitcoordinator.UCCommitCoordinatorException; | ||
| import java.io.IOException; | ||
|
|
@@ -82,6 +83,7 @@ public UCCatalogManagedClient(UCClient ucClient) { | |
| * @param engine The Delta Kernel {@link Engine} to use for loading the table. | ||
| * @param ucTableId The Unity Catalog table ID, which is a unique identifier for the table in UC. | ||
| * @param tablePath The path to the Delta table in the underlying storage system. | ||
| * @param ucTableIdentifier The three-part Unity Catalog table identifier. | ||
| * @param versionOpt The optional version to time-travel to when loading the table. This must be | ||
| * mutually exclusive with timestampOpt. | ||
| * @param timestampOpt The optional timestamp to time-travel to when loading the table. This must | ||
|
|
@@ -93,11 +95,13 @@ public Snapshot loadSnapshot( | |
| Engine engine, | ||
| String ucTableId, | ||
| String tablePath, | ||
| UCTableIdentifier ucTableIdentifier, | ||
| Optional<Long> versionOpt, | ||
| Optional<Long> timestampOpt) { | ||
| Objects.requireNonNull(engine, "engine is null"); | ||
| Objects.requireNonNull(ucTableId, "ucTableId is null"); | ||
| Objects.requireNonNull(tablePath, "tablePath is null"); | ||
| Objects.requireNonNull(ucTableIdentifier, "ucTableIdentifier is null"); | ||
| Objects.requireNonNull(versionOpt, "versionOpt is null"); | ||
| Objects.requireNonNull(timestampOpt, "timestampOpt is null"); | ||
| versionOpt.ifPresent(version -> checkArgument(version >= 0, "version must be non-negative")); | ||
|
|
@@ -122,7 +126,13 @@ public Snapshot loadSnapshot( | |
| () -> { | ||
| final GetCommitsResponse response = | ||
| metricsCollector.getCommitsTimer.timeChecked( | ||
| () -> getRatifiedCommitsFromUC(ucTableId, tablePath, versionOpt)); | ||
| () -> | ||
| getRatifiedCommitsFromUC( | ||
| ucTableId, | ||
| tablePath, | ||
| versionOpt, | ||
| UCCatalogManagedCommitter.toStorageTableIdentifier( | ||
| ucTableIdentifier))); | ||
|
|
||
| metricsCollector.setNumCatalogCommits(response.getCommits().size()); | ||
|
|
||
|
|
@@ -249,6 +259,7 @@ public CreateTableTransactionBuilder buildCreateTableTransaction( | |
| * @param engine The Delta Kernel {@link Engine} to use for loading the table. | ||
| * @param ucTableId The Unity Catalog table ID, which is a unique identifier for the table in UC. | ||
| * @param tablePath The path to the Delta table in the underlying storage system. | ||
| * @param ucTableIdentifier The three-part Unity Catalog table identifier. | ||
| * @param startVersionOpt The optional start version boundary. This must be mutually exclusive | ||
| * with startTimestampOpt. Either this or startTimestampOpt must be provided. | ||
| * @param startTimestampOpt The optional start timestamp boundary. This must be mutually exclusive | ||
|
|
@@ -267,13 +278,15 @@ public CommitRange loadCommitRange( | |
| Engine engine, | ||
| String ucTableId, | ||
| String tablePath, | ||
| UCTableIdentifier ucTableIdentifier, | ||
| Optional<Long> startVersionOpt, | ||
| Optional<Long> startTimestampOpt, | ||
| Optional<Long> endVersionOpt, | ||
| Optional<Long> endTimestampOpt) { | ||
| Objects.requireNonNull(engine, "engine is null"); | ||
| Objects.requireNonNull(ucTableId, "ucTableId is null"); | ||
| Objects.requireNonNull(tablePath, "tablePath is null"); | ||
| Objects.requireNonNull(ucTableIdentifier, "ucTableIdentifier is null"); | ||
| Objects.requireNonNull(startVersionOpt, "startVersionOpt is null"); | ||
| Objects.requireNonNull(startTimestampOpt, "startTimestampOpt is null"); | ||
| Objects.requireNonNull(endVersionOpt, "endVersionOpt is null"); | ||
|
|
@@ -308,7 +321,11 @@ public CommitRange loadCommitRange( | |
| Optional<Long> endVersionOptForCommitQuery = | ||
| endVersionOpt.filter(v -> !startTimestampOpt.isPresent()); | ||
| final GetCommitsResponse response = | ||
| getRatifiedCommitsFromUC(ucTableId, tablePath, endVersionOptForCommitQuery); | ||
| getRatifiedCommitsFromUC( | ||
| ucTableId, | ||
| tablePath, | ||
| endVersionOptForCommitQuery, | ||
| UCCatalogManagedCommitter.toStorageTableIdentifier(ucTableIdentifier)); | ||
| final long ucTableVersion = response.getLatestTableVersion(); | ||
| validateVersionBoundariesExist(ucTableId, startVersionOpt, endVersionOpt, ucTableVersion); | ||
| final List<ParsedLogData> logData = | ||
|
|
@@ -413,7 +430,11 @@ private String getCommitRangeBoundariesString( | |
| } | ||
|
|
||
| private GetCommitsResponse getRatifiedCommitsFromUC( | ||
| String ucTableId, String tablePath, Optional<Long> versionOpt) { | ||
| String ucTableId, | ||
| String tablePath, | ||
| Optional<Long> versionOpt, | ||
| TableIdentifier tableIdentifier) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: if all the caller need to conver the |
||
| Objects.requireNonNull(tableIdentifier, "tableIdentifier is null"); | ||
| logger.info( | ||
| "[{}] Invoking the UCClient to get ratified commits at version {}", | ||
| ucTableId, | ||
|
|
@@ -430,6 +451,7 @@ private GetCommitsResponse getRatifiedCommitsFromUC( | |
| return ucClient.getCommits( | ||
| ucTableId, | ||
| new Path(tablePath).toUri(), | ||
| tableIdentifier, | ||
| Optional.empty() /* startVersion */, | ||
| versionOpt /* endVersion */); | ||
| } catch (IOException ex) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change is needed because in Maven mode (
-DkernelVersion=...), sparkV2 can't reachkernel-unitycatalog's test helpers (InMemoryUCClient, UCCatalogManagedTestUtils) via the source-modetest->testdep, so we publish and consume them as a-testsclassifier jar -- same pattern kernelApi already uses.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same as the change below.