diff --git a/build.sbt b/build.sbt index 27cf4cacd61..fba7537afe8 100644 --- a/build.sbt +++ b/build.sbt @@ -340,8 +340,22 @@ lazy val sparkV1 = (project in file("spark")) "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", "org.apache.spark" %% "spark-catalyst" % sparkVersion.value % "provided", // For DynamoDBCommitStore - "com.amazonaws" % "aws-java-sdk" % "1.12.262" % "provided", - + "com.amazonaws" % "aws-java-sdk" % "1.12.262" % "provided" + ) ++ { + if (unityCatalogVersion >= "0.5.0") { + Seq( + "io.unitycatalog" % "unitycatalog-hadoop" % unityCatalogVersion excludeAll( + ExclusionRule(organization = "org.openapitools"), + ExclusionRule(organization = "com.fasterxml.jackson.core"), + ExclusionRule(organization = "com.fasterxml.jackson.module"), + ExclusionRule(organization = "com.fasterxml.jackson.datatype"), + ExclusionRule(organization = "com.fasterxml.jackson.dataformat") + ) + ) + } else { + Seq.empty + } + } ++ Seq( // Test deps "org.scalatest" %% "scalatest" % scalaTestVersion % "test", "org.scalatestplus" %% "scalacheck-1-15" % "3.2.9.0" % "test", @@ -660,8 +674,22 @@ lazy val spark = (project in file("spark-unified")) "org.apache.spark" %% "spark-sql" % sparkVersion.value % "provided", "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", "org.apache.spark" %% "spark-catalyst" % sparkVersion.value % "provided", - "com.amazonaws" % "aws-java-sdk" % "1.12.262" % "provided", - + "com.amazonaws" % "aws-java-sdk" % "1.12.262" % "provided" + ) ++ { + if (unityCatalogVersion >= "0.5.0") { + Seq( + "io.unitycatalog" % "unitycatalog-hadoop" % unityCatalogVersion excludeAll( + ExclusionRule(organization = "org.openapitools"), + ExclusionRule(organization = "com.fasterxml.jackson.core"), + ExclusionRule(organization = "com.fasterxml.jackson.module"), + ExclusionRule(organization = "com.fasterxml.jackson.datatype"), + ExclusionRule(organization = "com.fasterxml.jackson.dataformat") + ) + ) + } else { + Seq.empty + } + } ++ Seq( "org.scalatest" %% "scalatest" % scalaTestVersion % "test", "org.scalatestplus" %% "scalacheck-1-15" % "3.2.9.0" % "test", "junit" % "junit" % "4.13.2" % "test", @@ -669,7 +697,17 @@ lazy val spark = (project in file("spark-unified")) "org.apache.spark" %% "spark-catalyst" % sparkVersion.value % "test" classifier "tests", "org.apache.spark" %% "spark-core" % sparkVersion.value % "test" classifier "tests", "org.apache.spark" %% "spark-sql" % sparkVersion.value % "test" classifier "tests", - "org.apache.spark" %% "spark-hive" % sparkVersion.value % "test" classifier "tests", + "org.apache.spark" %% "spark-hive" % sparkVersion.value % "test" classifier "tests" + ) ++ { + if (unityCatalogVersion >= "0.5.0") { + Seq( + // unitycatalog-hadoop references the ABFS token-provider interface during classloading. + "org.apache.hadoop" % "hadoop-azure" % hadoopVersion % "test" + ) + } else { + Seq.empty + } + } ++ Seq( "org.mockito" % "mockito-inline" % "4.11.0" % "test", ), @@ -843,13 +881,19 @@ Global / ensurePinnedUnityCatalog := { val home = file(sys.props("user.home")) // Check both layouts: a restored sbt cache can pre-populate ivy alone, leaving m2 empty - // checking only ivy would silently skip the slow publish and break mvn-based consumers. - val ivy2Canary = home / ".ivy2" / "local" / "io.unitycatalog" / + val ivy2ClientCanary = home / ".ivy2" / "local" / "io.unitycatalog" / "unitycatalog-client" / unityCatalogVersion / "ivys" / "ivy.xml" - val m2Canary = home / ".m2" / "repository" / "io" / "unitycatalog" / + val m2ClientCanary = home / ".m2" / "repository" / "io" / "unitycatalog" / "unitycatalog-client" / unityCatalogVersion / s"unitycatalog-client-$unityCatalogVersion.pom" - if (!ivy2Canary.exists || !m2Canary.exists) { - publishPinnedUnityCatalog(log, ivy2Canary) + val ivy2HadoopCanary = home / ".ivy2" / "local" / "io.unitycatalog" / + "unitycatalog-hadoop" / unityCatalogVersion / "ivys" / "ivy.xml" + val m2HadoopCanary = home / ".m2" / "repository" / "io" / "unitycatalog" / + "unitycatalog-hadoop" / unityCatalogVersion / + s"unitycatalog-hadoop-$unityCatalogVersion.pom" + if (!Seq(ivy2ClientCanary, m2ClientCanary, ivy2HadoopCanary, m2HadoopCanary) + .forall(_.exists)) { + publishPinnedUnityCatalog(log, ivy2ClientCanary) } } } @@ -1197,6 +1241,7 @@ lazy val storage = (project in file("storage")) commonSettings, exportJars := true, javaOnlyReleaseSettings, + libraryDependencies ++= Seq( // User can provide any 2.x or 3.x version. We don't use any new fancy APIs. Watch out for // versions with known vulnerabilities. diff --git a/project/scripts/setup_unitycatalog_main.sh b/project/scripts/setup_unitycatalog_main.sh index d66896365a7..8c9b442ebcf 100755 --- a/project/scripts/setup_unitycatalog_main.sh +++ b/project/scripts/setup_unitycatalog_main.sh @@ -57,7 +57,7 @@ set -euo pipefail # The pin. Bump both lines together if UC's version.sbt changed at the new SHA. build.sbt's # `unityCatalogVersion` is obtained by running this script with `--print-version`, so these two # values are the single source of truth. -UC_PIN_SHA=e6deb37e890a0a6fb8ae495b5bec52326731f6a6 +UC_PIN_SHA=9844a3002d7fdf41e8ad65ff3c07117fc2a9eba0 UC_BASE_VERSION=0.5.0-SNAPSHOT # --------------------------------------------------------------------------------------------- @@ -89,7 +89,7 @@ if [[ "${1:-}" == "--print-version" ]]; then exit 0 fi -# Canonical Ivy + Maven artifact paths. Delta depends on all three UC modules; sbt resolves from +# Canonical Ivy + Maven artifact paths. Delta depends on these UC modules; sbt resolves from # ~/.ivy2/local, mvn (kernel-examples integration tests) resolves from ~/.m2/repository. If any # is missing in either layout we must re-publish. IVY_LOCAL="$HOME/.ivy2/local/io.unitycatalog" @@ -118,24 +118,26 @@ if [[ "$UC_FORCE" != "1" ]] && all_canaries_present; then exit 0 fi -echo ">>> Fetching Unity Catalog main from $UC_REPO" +echo ">>> Fetching Unity Catalog from $UC_REPO" rm -rf "$UC_DIR" mkdir -p "$UC_DIR" -# Fetch main's full history so we can run `git merge-base --is-ancestor` below to verify the -# pinned SHA is actually on main. UC's repo is small; full fetch of one branch is cheap. +# Fetch the target branch so we can verify the pinned SHA is reachable. git -C "$UC_DIR" init --quiet git -C "$UC_DIR" remote add origin "$UC_REPO" -git -C "$UC_DIR" fetch --quiet origin main +if [[ "$UC_REF" == "main" ]]; then + git -C "$UC_DIR" fetch --quiet origin main +else + git -C "$UC_DIR" fetch --quiet origin "$UC_PIN_SHA" +fi cd "$UC_DIR" -# Safety check: the pinned SHA must be reachable from UC main. Local `merge-base --is-ancestor` +# Safety check: the pinned SHA must be reachable from the fetched branch. Local `merge-base --is-ancestor` # on the history we just fetched - no GitHub API, no token needed. Only applies when UC_REF is # the pinned SHA; UC_REF=main is trivially on main. if [[ "$UC_REF" == "$UC_PIN_SHA" ]]; then - if ! git merge-base --is-ancestor "$UC_PIN_SHA" origin/main 2>/dev/null; then - echo "ERROR: UC_PIN_SHA=$UC_PIN_SHA is not reachable from unitycatalog/unitycatalog main." >&2 - echo " Pin must reference a commit on https://github.com/unitycatalog/unitycatalog/commits/main" >&2 + if ! git rev-parse --verify "$UC_PIN_SHA^{commit}" >/dev/null 2>&1; then + echo "ERROR: UC_PIN_SHA=$UC_PIN_SHA could not be fetched from $UC_REPO." >&2 exit 1 fi fi @@ -161,7 +163,7 @@ fi # coordinate. Applied as a persistent setting so it sticks across the two sbt invocations below. SET_VERSION_CMD="set ThisBuild / version := \"$UC_VERSION\"" -echo ">>> Building and publishing UC client + server to local Maven repo" +echo ">>> Building and publishing UC client + server + hadoop to local Maven repo" ./build/sbt \ "$SET_VERSION_CMD" \ "set client / Compile / packageDoc / publishArtifact := false" \ diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/AbstractDeltaCatalog.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/AbstractDeltaCatalog.scala index 86a0f9b4ee9..1d724beefb4 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/AbstractDeltaCatalog.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/AbstractDeltaCatalog.scala @@ -83,6 +83,9 @@ class AbstractDeltaCatalog extends DelegatingCatalogExtension val spark = SparkSession.active + private lazy val deltaCatalogClient: Option[DeltaCatalogClient] = + Some(UCDeltaCatalogClient(delegate, spark)) + private lazy val isUnityCatalog: Boolean = { val delegateField = classOf[DelegatingCatalogExtension].getDeclaredField("delegate") delegateField.setAccessible(true) @@ -178,7 +181,6 @@ class AbstractDeltaCatalog extends DelegatingCatalogExtension base } } - var locUriOpt = location.map(CatalogUtils.stringToURI) val existingTableOpt = getExistingTableIfExists(id, Some(ident), operation) // PROP_IS_MANAGED_LOCATION indicates that the table location is not user-specified but // system-generated. The table should be created as managed table in this case. @@ -193,10 +195,24 @@ class AbstractDeltaCatalog extends DelegatingCatalogExtension } else { CatalogTableType.EXTERNAL } + // operation.isCreate covers CREATE and CREATE OR REPLACE when no existing table was found. + val ucDeltaApiCreate = if (isUnityCatalog && existingTableOpt.isEmpty && operation.isCreate) { + deltaCatalogClient.flatMap(_.prepareCreateTable( + ident, + tableType, + location.map(CatalogUtils.stringToURI))) + } else { + None + } + val locUriOpt = ucDeltaApiCreate.map(_.location).orElse(location.map(CatalogUtils.stringToURI)) + val tablePropertiesWithUCDeltaApi = + tableProperties ++ ucDeltaApiCreate.map(_.tableProperties).getOrElse(Map.empty) + val writeOptionsWithUCDeltaApi = + writeOptions ++ ucDeltaApiCreate.map(_.storageProperties).getOrElse(Map.empty) val loc = locUriOpt .orElse(existingTableOpt.flatMap(_.storage.locationUri)) .getOrElse(spark.sessionState.catalog.defaultTablePath(id)) - val storage = DataSource.buildStorageFormatFromOptions(writeOptions) + val storage = DataSource.buildStorageFormatFromOptions(writeOptionsWithUCDeltaApi) .copy(locationUri = Option(loc)) val commentOpt = Option(allTableProperties.get("comment")) @@ -209,7 +225,7 @@ class AbstractDeltaCatalog extends DelegatingCatalogExtension provider = Some(DeltaSourceUtils.ALT_NAME), partitionColumnNames = newPartitionColumns, bucketSpec = newBucketSpec, - properties = tableProperties, + properties = tablePropertiesWithUCDeltaApi, comment = commentOpt ) @@ -223,7 +239,7 @@ class AbstractDeltaCatalog extends DelegatingCatalogExtension val writer = sourceQuery.map { df => val catalogTbl = Some(tableDesc) // For safety, only extract the file system options here, to create deltaLog. - val fileSystemOptions = writeOptions.filter { case (k, _) => + val fileSystemOptions = writeOptionsWithUCDeltaApi.filter { case (k, _) => DeltaTableUtils.validDeltaTableHadoopPrefixes.exists(k.startsWith) } val deltaOptions = new DeltaOptions( @@ -277,9 +293,14 @@ class AbstractDeltaCatalog extends DelegatingCatalogExtension // Before this bug is fixed, we should only call the catalog plugin API to create tables // if UC is enabled to replace `V2SessionCatalog`. createTableFunc = Option.when(isUnityCatalog) { - v1Table => { - val t = V1Table(v1Table) - super.createTable(ident, t.columns(), t.partitioning, t.properties) + (v1Table, snapshot) => { + ucDeltaApiCreate match { + case Some(_) => + deltaCatalogClient.foreach(_.createTable(ident, v1Table, snapshot)) + case None => + val t = V1Table(v1Table) + super.createTable(ident, t.columns(), t.partitioning, t.properties) + } } }).run(spark) @@ -290,7 +311,14 @@ class AbstractDeltaCatalog extends DelegatingCatalogExtension "DeltaCatalog", "loadTable") { setVariantBlockingConfigIfUC() try { - val table = super.loadTable(ident) + val table = + if (isPathIdentifier(ident)) { + loadPathTable(ident) + } else if (isIcebergPathIdentifier(ident)) { + newIcebergPathTable(ident) + } else { + deltaCatalogClient.flatMap(_.loadTable(ident)).getOrElse(super.loadTable(ident)) + } ServerSidePlannedTable.tryCreate(spark, ident, table, isUnityCatalog).foreach { sspt => return sspt diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaCatalogClient.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaCatalogClient.scala new file mode 100644 index 00000000000..43af9ff31fc --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaCatalogClient.scala @@ -0,0 +1,87 @@ +/* + * Copyright (2026) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.catalog + +import java.net.URI + +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} +import org.apache.spark.sql.connector.catalog.{Identifier, Table} +import org.apache.spark.sql.delta.Snapshot + +/** + * Values returned by the UC Delta Rest Catalog API prepare-create step. + * + * @param location UC-chosen location where Delta should write the initial log. + * @param tableProperties properties added to the CatalogTable so the Delta commit uses the + * server-required protocol/features and UC table id. + * @param storageProperties Hadoop storage options, usually UC-vended credentials, added to the + * write options for the initial Delta commit. + */ +private[catalog] case class PreparedUCDeltaRestCatalogApiCreate( + location: URI, + tableProperties: Map[String, String], + storageProperties: Map[String, String]) + +/** + * Spark-facing Delta catalog API hook. + * + *

The interface is intentionally free of UC SDK and Hadoop credential dependencies so the shared + * catalog path does not depend on a specific UC client implementation. + */ +private[catalog] trait DeltaCatalogClient { + def loadTable(ident: Identifier): Option[Table] + + def prepareCreateTable( + ident: Identifier, + tableType: CatalogTableType, + location: Option[URI]): Option[PreparedUCDeltaRestCatalogApiCreate] + + def createTable( + ident: Identifier, + table: CatalogTable, + snapshot: Snapshot): Unit +} + +private[delta] object DeltaCatalogClient { + private[catalog] val UCDeltaRestCatalogApiEnabledKey = + UCDeltaCatalogClient.UCDeltaRestCatalogApiEnabledKey + private[catalog] val RenewCredentialEnabledKey = + UCDeltaCatalogClient.RenewCredentialEnabledKey + private[catalog] val CredScopedFsEnabledKey = + UCDeltaCatalogClient.CredScopedFsEnabledKey + + private[catalog] def deltaRestApiEnabledConf(catalogName: String): String = { + UCDeltaCatalogClient.deltaRestApiEnabledConf(catalogName) + } + + private[catalog] def renewCredentialEnabledConf(catalogName: String): String = { + UCDeltaCatalogClient.renewCredentialEnabledConf(catalogName) + } + + private[catalog] def credScopedFsEnabledConf(catalogName: String): String = { + UCDeltaCatalogClient.credScopedFsEnabledConf(catalogName) + } + + private[delta] def pathCredentialOptions( + spark: SparkSession, + path: Path): Map[String, String] = { + UCDeltaCatalogClient.pathCredentialOptions(spark, path) + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala index 9508477ad28..796ce96c633 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/DeltaTableV2.scala @@ -86,11 +86,19 @@ class DeltaTableV2 private( PathInfo(new Path(catalogTable.get.location), Seq.empty, None) } else { val (rootPath, filters, timeTravel) = - DeltaDataSource.parsePathIdentifier(spark, path.toString, options) + DeltaDataSource.parsePathIdentifier(spark, path.toString, pathBasedOptions) PathInfo(rootPath, filters, timeTravel) } } + private lazy val pathBasedOptions: Map[String, String] = { + if (catalogTable.isDefined) { + options + } else { + DeltaCatalogClient.pathCredentialOptions(spark, path) ++ options + } + } + private def rootPath = pathInfo.rootPath private def partitionFilters = pathInfo.partitionFilters @@ -122,7 +130,7 @@ class DeltaTableV2 private( } fileSystemOptions ++ options } else { - options + pathBasedOptions } DeltaLog.forTable( spark, diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/UCDeltaCatalogClient.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/UCDeltaCatalogClient.scala new file mode 100644 index 00000000000..dd5d40d1bc2 --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/UCDeltaCatalogClient.scala @@ -0,0 +1,558 @@ +/* + * Copyright (2026) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.catalog + +import java.io.IOException +import java.lang.reflect.InvocationTargetException +import java.net.URI +import java.util.{Locale, Map => JMap} + +import scala.collection.JavaConverters._ + +import io.delta.storage.commit.actions.AbstractMetadata +import io.delta.storage.commit.uccommitcoordinator.UCCommitCoordinatorClient.UC_TABLE_ID_KEY +import io.delta.storage.commit.uccommitcoordinator.UCDeltaClient +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.{ + CreateTableRequest, + DataSourceFormat => DeltaDataSourceFormat, + DeltaProtocol => UCDeltaRestCatalogApiProtocol, + StagingTableResponse, + TableType => DeltaTableType +} +import io.unitycatalog.client.ApiException +import io.unitycatalog.client.auth.TokenProvider +import io.unitycatalog.hadoop.UCCredentialHadoopConfs +import io.unitycatalog.hadoop.UCCredentialHadoopConfs.{PathOperation, TableOperation} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils} +import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCatalog, V1Table} +import org.apache.spark.sql.delta.Snapshot +import org.apache.spark.sql.delta.actions.Protocol +import org.apache.spark.sql.delta.coordinatedcommits.{ + UCCommitCoordinatorBuilder, + UCTokenBasedRestClientFactory +} +import org.apache.spark.sql.delta.sources.DeltaSourceUtils + +/** + * Spark-side client for using UC Delta Rest Catalog API responses inside `DeltaCatalog`. + * + * This class owns the UC-specific implementation behind the Spark-facing [[DeltaCatalogClient]] + * interface: choosing when to try the Delta API path, translating Delta API metadata into Spark + * catalog objects, fetching credentials for Spark/Hadoop, and returning `None` when + * `DeltaCatalog` should fall back to the legacy catalog path. + */ +private class UCDeltaCatalogClient private ( + private val ucDeltaClient: Option[UCDeltaClient], + catalogName: String, + credentialContext: Option[UCDeltaRestCatalogApiCredentialContext]) extends DeltaCatalogClient { + + import UCDeltaCatalogClient._ + + def loadTable(ident: Identifier): Option[Table] = { + ucDeltaClient match { + case Some(client) if isNamedTableIdentifier(ident) => + val schemaName = ident.namespace().head + val tableName = ident.name() + val metadata = try { + client.loadTable(catalogName, schemaName, tableName) + } catch { + case e: IOException if isUnsupportedTableFormat(e) => + return None + case e: IOException => + throw translateLoadTableException(ident, e) + } + val location = reflectedString(metadata, "getLocation") + val locationUri = CatalogUtils.stringToURI(location) + Some(V1Table(buildCatalogTableFromUCDeltaMetadata( + ident, + metadata, + location, + locationUri))) + case _ => + // UC Delta Rest Catalog API only supports catalog.schema.table identifiers for named + // tables. + None + } + } + + /** + * Prepares a UC Delta Rest Catalog API-backed CREATE TABLE before Delta writes the initial log. + */ + def prepareCreateTable( + ident: Identifier, + tableType: CatalogTableType, + location: Option[URI]): Option[PreparedUCDeltaRestCatalogApiCreate] = { + ucDeltaClient match { + case Some(client) if isNamedTableIdentifier(ident) => + val schemaName = ident.namespace().head + val tableName = ident.name() + (tableType, location) match { + case (CatalogTableType.MANAGED, None) => + val staging = client.createStagingTable(catalogName, schemaName, tableName) + val stagingLocation = CatalogUtils.stringToURI(staging.getLocation) + Some(PreparedUCDeltaRestCatalogApiCreate( + location = stagingLocation, + tableProperties = toTableProperties(staging), + storageProperties = buildHadoopCredentialPropertiesForTable( + staging.getLocation, + stagingLocation.getScheme, + schemaName, + tableName))) + case (CatalogTableType.EXTERNAL, Some(externalLocation)) + if isCloudScheme(externalLocation.getScheme) => + val locationText = externalLocation.toString + // External create must write the initial _delta_log, so READ fallback would be wrong. + Some(PreparedUCDeltaRestCatalogApiCreate( + location = externalLocation, + tableProperties = Map.empty, + storageProperties = buildHadoopCredentialPropertiesForPath( + locationText, + externalLocation.getScheme, + PathOperation.PATH_CREATE_TABLE, + credentialContext))) + case _ => + None + } + case _ => + // UC Delta Rest Catalog API only supports catalog.schema.table identifiers for create. + None + } + } + + /** + * Finalizes a UC Delta Rest Catalog API-backed CREATE TABLE after Delta has written the + * initial log. + */ + def createTable( + ident: Identifier, + table: CatalogTable, + snapshot: Snapshot): Unit = { + ucDeltaClient match { + case Some(client) if isNamedTableIdentifier(ident) => + client.createTable( + catalogName, + ident.namespace().head, + toCreateTableRequest(ident, table, snapshot)) + case _ => + // Safety net: AbstractDeltaCatalog only calls this after prepareCreateTable returned Some. + throw new IllegalStateException( + s"UC Delta Rest Catalog API createTable is not available for $ident.") + } + } + + private def translateLoadTableException(ident: Identifier, e: IOException): Throwable = { + e.getCause match { + case api: ApiException if api.getCode == 404 => + new NoSuchTableException(ident) + case _ => + e + } + } + + /** + * UC Delta APIs use this explicit 501 response when a table exists in UC but cannot be served + * through the Delta endpoint, such as a metric view or another non-Delta table format. + */ + private def isUnsupportedTableFormat(e: IOException): Boolean = e.getCause match { + case api: ApiException => + api.getCode == 501 && + Option(api.getResponseBody).exists(_.contains(UnsupportedTableFormatExceptionType)) + case _ => false + } + + private def isNamedTableIdentifier(ident: Identifier): Boolean = { + ident.namespace().length == 1 && !isDeltaPathIdentifier(ident) + } + + private def isDeltaPathIdentifier(ident: Identifier): Boolean = { + try { + ident.namespace().length == 1 && + DeltaSourceUtils.isDeltaDataSourceName(ident.namespace().head) && + new Path(ident.name()).isAbsolute + } catch { + case _: IllegalArgumentException => false + } + } + + /** + * Builds the Spark V1 catalog table returned from UC Delta Rest Catalog API metadata. + * The UC Delta response supplies the Spark table type, schema, provider, and storage metadata. + */ + private def buildCatalogTableFromUCDeltaMetadata( + ident: Identifier, + metadata: AbstractMetadata, + location: String, + locationUri: URI): CatalogTable = { + val schemaName = ident.namespace().head + val tableName = ident.name() + CatalogTable( + identifier = + TableIdentifier(ident.name(), ident.namespace().lastOption, Some(catalogName)), + tableType = reflectedString(metadata, "getTableType") match { + case ManagedTableType => + CatalogTableType.MANAGED + case ExternalTableType => + CatalogTableType.EXTERNAL + case other => + throw new IllegalArgumentException( + s"Unsupported UC Delta Rest Catalog API table type for " + + s"$catalogName.${ident.namespace().mkString(".")}.${ident.name()}: $other") + }, + storage = CatalogStorageFormat.empty.copy( + locationUri = Some(locationUri), + properties = buildCatalogStorageProperties( + metadata, + location, + locationUri.getScheme, + schemaName, + tableName)), + schema = UCDeltaRestCatalogApiSchemaConverter.toSparkType(metadata.getSchemaString), + provider = Option(metadata.getProvider), + partitionColumnNames = Option(metadata.getPartitionColumns) + .map(_.asScala.toSeq) + .getOrElse(Nil)) + } + + /** + * Builds CatalogStorageFormat.properties for the Spark V1 table. + * V1Table later exposes these to Delta as option.* table properties. + */ + private def buildCatalogStorageProperties( + metadata: AbstractMetadata, + location: String, + locationScheme: String, + schemaName: String, + tableName: String): Map[String, String] = { + val credentialProperties = buildHadoopCredentialPropertiesForTable( + location, + locationScheme, + schemaName, + tableName) + // V1Table exposes storage properties as option.* table properties. Keep UC Delta Rest Catalog + // API table features here so the Delta load path receives them with the same option.* shape as + // other storage-level UC properties, while CatalogTable.properties stays reserved for Spark + // metadata. + Option(metadata.getConfiguration).map(_.asScala.toMap).getOrElse(Map.empty) ++ + credentialProperties + } + + private def buildHadoopCredentialPropertiesForTable( + location: String, + locationScheme: String, + schemaName: String, + tableName: String): Map[String, String] = { + if (!isCloudScheme(locationScheme)) { + Map.empty[String, String] + } else { + val context = credentialContext.getOrElse { + throw new IllegalStateException( + "UC Delta Rest Catalog API credential context is missing for cloud location " + location) + } + val builder = UCCredentialHadoopConfs.builder( + context.uri, + locationScheme.toLowerCase(Locale.ROOT)) + .tokenProvider(context.tokenProvider) + .enableCredentialRenewal(context.renewCredentialEnabled) + .enableCredentialScopedFs(context.credScopedFsEnabled) + .hadoopConf(context.hadoopConf) + .addAppVersions(context.appVersions) + try { + // Prefer READ_WRITE so a loaded table can be used for writes without reloading + // credentials; read-only principals fall back to READ below. + builder.buildForTable( + catalogName, + schemaName, + tableName, + TableOperation.READ_WRITE, + location) + .asScala + .toMap + } catch { + case e: ApiException if e.getCode == 401 || e.getCode == 403 => + builder.buildForTable(catalogName, schemaName, tableName, TableOperation.READ, location) + .asScala + .toMap + } + } + } +} + +private case class UCDeltaRestCatalogApiCredentialContext( + uri: String, + tokenProvider: TokenProvider, + renewCredentialEnabled: Boolean, + credScopedFsEnabled: Boolean, + hadoopConf: Configuration, + appVersions: JMap[String, String]) + +private[catalog] object UCDeltaCatalogClient { + private[catalog] val UCDeltaRestCatalogApiEnabledKey = "deltaRestApi.enabled" + private[catalog] val RenewCredentialEnabledKey = "renewCredential.enabled" + private[catalog] val CredScopedFsEnabledKey = "credScopedFs.enabled" + private val ManagedTableType = "MANAGED" + private val ExternalTableType = "EXTERNAL" + private val UnsupportedTableFormatExceptionType = "UnsupportedTableFormatException" + private val DefaultCatalogConf = "spark.sql.defaultCatalog" + private val DefaultRenewCredentialEnabled = true + private val DefaultCredScopedFsEnabled = false + private val CloudSchemes = Set("s3", "s3a", "gs", "abfs", "abfss") + + private[catalog] def deltaRestApiEnabledConf(catalogName: String): String = { + s"spark.sql.catalog.$catalogName.$UCDeltaRestCatalogApiEnabledKey" + } + + private[catalog] def renewCredentialEnabledConf(catalogName: String): String = { + s"spark.sql.catalog.$catalogName.$RenewCredentialEnabledKey" + } + + private[catalog] def credScopedFsEnabledConf(catalogName: String): String = { + s"spark.sql.catalog.$catalogName.$CredScopedFsEnabledKey" + } + + private def isCloudScheme(scheme: String): Boolean = { + Option(scheme).exists(s => CloudSchemes.contains(s.toLowerCase(Locale.ROOT))) + } + + /** + * Returns UC Delta Rest Catalog API path credential options for raw path-based Delta access. + * + * Path-based access has no catalog identifier, so this uses the UC Delta Rest Catalog API-enabled + * default catalog as the credential authority. If the session has no such default catalog, path + * reads keep their original options. + */ + private[delta] def pathCredentialOptions( + spark: SparkSession, + path: Path): Map[String, String] = { + val location = path.toString + val locationScheme = path.toUri.getScheme + if (!isCloudScheme(locationScheme)) { + return Map.empty[String, String] + } + + selectedUCDeltaRestCatalogApiConfigForPathCredentials(spark) + .map { context => + try { + buildHadoopCredentialPropertiesForPath( + location, + locationScheme, + PathOperation.PATH_READ, + context) + } catch { + case e: ApiException if e.getCode == 404 => + Map.empty[String, String] + } + } + .getOrElse(Map.empty[String, String]) + } + + private def selectedUCDeltaRestCatalogApiConfigForPathCredentials( + spark: SparkSession): Option[UCDeltaRestCatalogApiCredentialContext] = { + spark.conf.getOption(DefaultCatalogConf) + .filter(_.nonEmpty) + .filter(catalogName => + spark.conf.get(deltaRestApiEnabledConf(catalogName), "false").toBoolean) + .flatMap(catalogName => ucDeltaRestCatalogApiCredentialContext(spark, catalogName)) + } + + private def reflectedString(metadata: AbstractMetadata, methodName: String): String = { + try { + metadata.getClass.getMethod(methodName).invoke(metadata).asInstanceOf[String] + } catch { + case e: NoSuchMethodException => + throw new IllegalStateException( + s"UC Delta metadata is missing required method $methodName.", e) + case e: InvocationTargetException => + e.getCause match { + case runtime: RuntimeException => throw runtime + case error: Error => throw error + case cause => throw new IllegalStateException( + s"Failed to read $methodName from UC Delta metadata.", cause) + } + } + } + + private def ucDeltaRestCatalogApiCredentialContext( + spark: SparkSession, + catalogName: String): Option[UCDeltaRestCatalogApiCredentialContext] = { + if (!spark.conf.get(deltaRestApiEnabledConf(catalogName), "false").toBoolean) { + return None + } + + val (_, uri, authConfig) = UCCommitCoordinatorBuilder.getCatalogConfigs(spark) + .collectFirst { case (`catalogName`, configuredUri, configuredAuthConfig) => + (catalogName, configuredUri, configuredAuthConfig) + } + .getOrElse { + throw new IllegalArgumentException( + "UC Delta Rest Catalog API is enabled for catalog " + + s"$catalogName, but its Unity Catalog " + + "configuration is missing or incomplete.") + } + val tokenProvider = TokenProvider.create(authConfig.asJava) + // Catalog load has no DeltaLog yet, so pass the Spark session Hadoop conf to the UC + // credential builder. + // scalastyle:off deltahadoopconfiguration + val hadoopConf = spark.sessionState.newHadoopConf() + // scalastyle:on deltahadoopconfiguration + Some(UCDeltaRestCatalogApiCredentialContext( + uri.toString, + tokenProvider, + spark.conf.get( + renewCredentialEnabledConf(catalogName), + DefaultRenewCredentialEnabled.toString).toBoolean, + spark.conf.get( + credScopedFsEnabledConf(catalogName), + DefaultCredScopedFsEnabled.toString).toBoolean, + hadoopConf, + UCTokenBasedRestClientFactory.defaultAppVersionsAsJava)) + } + + private def buildHadoopCredentialPropertiesForPath( + location: String, + locationScheme: String, + pathOperation: PathOperation, + credentialContext: UCDeltaRestCatalogApiCredentialContext): Map[String, String] = { + UCCredentialHadoopConfs.builder( + credentialContext.uri, + locationScheme.toLowerCase(Locale.ROOT)) + .tokenProvider(credentialContext.tokenProvider) + .addAppVersions(credentialContext.appVersions) + .enableCredentialRenewal(credentialContext.renewCredentialEnabled) + .enableCredentialScopedFs(credentialContext.credScopedFsEnabled) + .hadoopConf(credentialContext.hadoopConf) + .buildForPath(location, pathOperation) + .asScala + .toMap + } + + private def buildHadoopCredentialPropertiesForPath( + location: String, + locationScheme: String, + pathOperation: PathOperation, + credentialContext: Option[UCDeltaRestCatalogApiCredentialContext]): Map[String, String] = { + val context = credentialContext.getOrElse { + throw new IllegalStateException( + "UC Delta Rest Catalog API credential context is missing for cloud path location " + + s"$location.") + } + buildHadoopCredentialPropertiesForPath(location, locationScheme, pathOperation, context) + } + + private def toTableProperties(staging: StagingTableResponse): Map[String, String] = { + val stagingTableId = staging.getTableId.toString + val requiredProperties = Option(staging.getRequiredProperties) + .map(_.asScala.collect { case (key, value) if value != null => key -> value }.toMap) + .getOrElse(Map.empty) + requiredProperties.get(UC_TABLE_ID_KEY).foreach { requiredTableId => + if (requiredTableId != stagingTableId) { + throw new IllegalArgumentException( + s"UC Delta Rest Catalog API staging response table id $stagingTableId does not match " + + s"required property $UC_TABLE_ID_KEY=$requiredTableId.") + } + } + // Later maps win so UC stays authoritative for table identity and managed-location markers. + protocolFeatureProperties(staging.getRequiredProtocol) ++ + requiredProperties ++ + Map( + TableCatalog.PROP_IS_MANAGED_LOCATION -> "true", + UC_TABLE_ID_KEY -> stagingTableId) + } + + private def protocolFeatureProperties( + protocol: UCDeltaRestCatalogApiProtocol): Map[String, String] = { + Option(protocol).map { p => + (Option(p.getReaderFeatures).map(_.asScala).getOrElse(Nil) ++ + Option(p.getWriterFeatures).map(_.asScala).getOrElse(Nil)) + .map(feature => s"delta.feature.$feature" -> "supported") + .toMap + }.getOrElse(Map.empty) + } + + /** + * Builds the final UC Delta Rest Catalog API createTable request from the post-commit Delta + * state. + */ + private def toCreateTableRequest( + ident: Identifier, + table: CatalogTable, + snapshot: Snapshot): CreateTableRequest = { + new CreateTableRequest() + .name(ident.name()) + .location(table.storage.locationUri + .getOrElse { + throw new IllegalArgumentException( + "UC Delta Rest Catalog API createTable requires a location for " + + s"${ident.toString}.") + } + .toString) + .tableType(toDeltaTableType(table.tableType)) + .dataSourceFormat(DeltaDataSourceFormat.DELTA) + .comment(table.comment.orNull) + .schemaString(snapshot.schema.json) + .partitionColumns(snapshot.metadata.partitionColumns.asJava) + .protocol(toDeltaProtocol(snapshot.protocol)) + .properties(snapshot.metadata.configuration.asJava) + .lastCommitTimestampMs(snapshot.timestamp) + } + + private def toDeltaTableType(tableType: CatalogTableType): DeltaTableType = tableType match { + case CatalogTableType.MANAGED => DeltaTableType.MANAGED + case CatalogTableType.EXTERNAL => DeltaTableType.EXTERNAL + case other => + throw new IllegalArgumentException( + s"Unsupported UC Delta Rest Catalog API table type: $other") + } + + private def toDeltaProtocol(protocol: Protocol): UCDeltaRestCatalogApiProtocol = { + new UCDeltaRestCatalogApiProtocol() + .minReaderVersion(protocol.minReaderVersion) + .minWriterVersion(protocol.minWriterVersion) + // Keep wire JSON deterministic even though Protocol stores features as sets. + .readerFeatures(protocol.readerFeatureNames.toSeq.sorted.asJava) + .writerFeatures(protocol.writerFeatureNames.toSeq.sorted.asJava) + } + + def apply(delegatePlugin: CatalogPlugin, spark: SparkSession): UCDeltaCatalogClient = { + val catalogName = delegatePlugin.name() + val credentialContext = ucDeltaRestCatalogApiCredentialContext(spark, catalogName) + val ucDeltaClient = credentialContext.flatMap { context => + UCTokenBasedRestClientFactory.createUCDeltaClient( + context.uri, + context.tokenProvider, + context.appVersions, + catalogName) match { + case Some(client) if client.supportsUCDeltaRestCatalogApi() => + Some(client) + case Some(client) => + client.close() + throw new IllegalArgumentException( + s"UC Delta Rest Catalog API is enabled for catalog $catalogName, but the Unity " + + "Catalog server does not support the required UC Delta Rest Catalog API endpoints.") + case None => + None + } + } + new UCDeltaCatalogClient(ucDeltaClient, catalogName, credentialContext) + } + +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/catalog/UCDeltaRestCatalogApiSchemaConverter.scala b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/UCDeltaRestCatalogApiSchemaConverter.scala new file mode 100644 index 00000000000..809ce876acc --- /dev/null +++ b/spark/src/main/scala/org/apache/spark/sql/delta/catalog/UCDeltaRestCatalogApiSchemaConverter.scala @@ -0,0 +1,29 @@ +/* + * Copyright (2026) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.catalog + +import org.apache.spark.sql.types.{DataType, StructType} + +private[catalog] object UCDeltaRestCatalogApiSchemaConverter { + + def toSparkType(schemaString: String): StructType = { + if (schemaString == null) { + throw new IllegalArgumentException("UC Delta Rest Catalog API table schema is missing.") + } + DataType.fromJson(schemaString).asInstanceOf[StructType] + } +} diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala index fa40034cc4e..2d44f1641fc 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala @@ -67,8 +67,9 @@ import org.apache.spark.util.Utils * @param output SQL output of the command * @param protocol This is used to create a table with specific protocol version * @param allowCatalogManaged This is used to create UC managed table with catalogManaged feature - * @param createTableFunc If specified, call this function to create the table, instead of - * Spark `SessionCatalog#createTable` which is backed by Hive Metastore. + * @param createTableFunc If specified, call this function with the post-commit snapshot to create + * the table, instead of Spark `SessionCatalog#createTable` which is backed + * by Hive Metastore. */ case class CreateDeltaTableCommand( override val table: CatalogTable, @@ -80,7 +81,7 @@ case class CreateDeltaTableCommand( override val output: Seq[Attribute] = Nil, protocol: Option[Protocol] = None, override val allowCatalogManaged: Boolean = false, - createTableFunc: Option[CatalogTable => Unit] = None) + createTableFunc: Option[(CatalogTable, Snapshot) => Unit] = None) extends LeafRunnableCommand with DeltaCommand with DeltaLogging diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala index d0e38f95e65..512339e1ddc 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableLike.scala @@ -91,14 +91,14 @@ trait CreateDeltaTableLike extends SQLConfHelper { snapshot: Snapshot, query: Option[LogicalPlan], didNotChangeMetadata: Boolean, - createTableFunc: Option[CatalogTable => Unit] = None + createTableFunc: Option[(CatalogTable, Snapshot) => Unit] = None ): Unit = { val cleaned = cleanupTableDefinition(spark, table, snapshot) operation match { case _ if tableByPath => // do nothing with the metastore if this is by path case TableCreationModes.Create => if (createTableFunc.isDefined) { - createTableFunc.get.apply(cleaned) + createTableFunc.get.apply(cleaned, snapshot) } else { spark.sessionState.catalog.createTable( cleaned, @@ -120,7 +120,7 @@ trait CreateDeltaTableLike extends SQLConfHelper { case Some(createFunc) => // This is the new missing-table path where creation is delegated through the V2 // catalog plugin (for example Unity Catalog) instead of SessionCatalog.createTable(). - createFunc(cleaned) + createFunc(cleaned, snapshot) case None => spark.sessionState.catalog.createTable( cleaned, diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/UCCommitCoordinatorBuilder.scala b/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/UCCommitCoordinatorBuilder.scala index 0f92e15f870..e15ac7cb692 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/UCCommitCoordinatorBuilder.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/coordinatedcommits/UCCommitCoordinatorBuilder.scala @@ -23,7 +23,12 @@ import scala.collection.JavaConverters._ import scala.util.control.NonFatal import io.delta.storage.commit.CommitCoordinatorClient -import io.delta.storage.commit.uccommitcoordinator.{UCClient, UCCommitCoordinatorClient, UCTokenBasedRestClient} +import io.delta.storage.commit.uccommitcoordinator.{ + UCClient, + UCCommitCoordinatorClient, + UCDeltaClient, + UCTokenBasedRestClient +} import org.apache.spark.sql.delta.logging.DeltaLogKeys import org.apache.spark.sql.delta.metering.DeltaLogging @@ -31,6 +36,7 @@ import org.apache.spark.sql.delta.metering.DeltaLogging import io.unitycatalog.client.auth.TokenProvider import org.apache.spark.internal.MDC import org.apache.spark.sql.SparkSession +import org.apache.spark.util.Utils /** * Builder for Unity Catalog Commit Coordinator Clients. @@ -291,6 +297,9 @@ trait UCClientFactory { } object UCTokenBasedRestClientFactory extends UCClientFactory { + private val UCDeltaTokenBasedRestClientClassName = + "io.delta.storage.commit.uccommitcoordinator.UCDeltaTokenBasedRestClient" + override def createUCClient(uri: String, authConfig: Map[String, String]): UCClient = { createUCClientWithVersions(uri, authConfig, defaultAppVersions) } @@ -311,6 +320,27 @@ object UCTokenBasedRestClientFactory extends UCClientFactory { new UCTokenBasedRestClient(uri, tokenProvider, appVersions.asJava) } + private[delta] def createUCDeltaClient( + uri: String, + tokenProvider: TokenProvider, + appVersions: java.util.Map[String, String], + catalogName: String): Option[UCDeltaClient] = { + try { + val clientClass = Utils.classForName(UCDeltaTokenBasedRestClientClassName) + val constructor = clientClass.getConstructor( + classOf[String], + classOf[TokenProvider], + classOf[java.util.Map[String, String]], + classOf[String]) + Some(constructor + .newInstance(uri, tokenProvider, appVersions, catalogName) + .asInstanceOf[UCDeltaClient]) + } catch { + case _: ClassNotFoundException => + None + } + } + private[coordinatedcommits] def defaultAppVersions: Map[String, String] = { Map( "Delta" -> io.delta.VERSION, diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTable.scala b/spark/src/main/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTable.scala index e6e0057fcaf..1da1ee756bd 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTable.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTable.scala @@ -95,6 +95,10 @@ object ServerSidePlannedTable extends DeltaLogging { // Check if we should enable server-side planning (for testing) val enableServerSidePlanning = spark.conf.get(DeltaSQLConf.ENABLE_SERVER_SIDE_PLANNING.key, "false").toBoolean + if (!enableServerSidePlanning) { + return None + } + val hasTableCredentials = hasCredentials(table) // Check if we should use server-side planning diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala index 96c222d1c85..d275d37b290 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaDataSource.scala @@ -24,7 +24,7 @@ import scala.util.{Failure, Success, Try} import com.databricks.spark.util.DatabricksLogging import org.apache.spark.internal.MDC import org.apache.spark.sql.delta._ -import org.apache.spark.sql.delta.catalog.DeltaTableV2 +import org.apache.spark.sql.delta.catalog.{DeltaCatalogClient, DeltaTableV2} import org.apache.spark.sql.delta.commands.{ DeltaInsertReplaceOnOrUsingCommand, InsertReplaceOnOrUsingAPIOrigin, @@ -95,7 +95,12 @@ class DeltaDataSource catalogTableOpt .map(catalogTable => DeltaLog.forTableWithSnapshot( sparkSession, catalogTable, options)) - .getOrElse(DeltaLog.forTableWithSnapshot(sparkSession, path, options))._2 + .getOrElse { + DeltaLog.forTableWithSnapshot( + sparkSession, + path, + DeltaCatalogClient.pathCredentialOptions(sparkSession, path) ++ options) + }._2 } def inferSchema: StructType = new StructType() // empty diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCreateTableLikeSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCreateTableLikeSuite.scala index 396822e2068..b6dcd717d7f 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCreateTableLikeSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaCreateTableLikeSuite.scala @@ -380,7 +380,7 @@ class DeltaCreateTableLikeSuite extends QueryTest snapshot, query = None, didNotChangeMetadata = true, - createTableFunc = Some((_: CatalogTable) => { + createTableFunc = Some((_: CatalogTable, _: Snapshot) => { createCallbackCalls += 1 })) } diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/catalog/DeltaCatalogClientSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/catalog/DeltaCatalogClientSuite.scala new file mode 100644 index 00000000000..ed3913a831a --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/catalog/DeltaCatalogClientSuite.scala @@ -0,0 +1,975 @@ +/* + * Copyright (2026) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.catalog + +import java.io.IOException +import java.net.{InetSocketAddress, URI, URLDecoder} +import java.nio.charset.StandardCharsets + +import scala.collection.JavaConverters._ + +import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper} +import com.sun.net.httpserver.{HttpExchange, HttpServer} +import io.delta.storage.commit.uccommitcoordinator.UCCommitCoordinatorClient.UC_TABLE_ID_KEY +import org.apache.hadoop.fs.Path + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.connector.catalog.{ + Identifier, + Table, + TableCatalog, + TableChange, + V1Table +} +import org.apache.spark.sql.delta.{CatalogOwnedTableFeature, DeltaLog, DummySnapshot} +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.types.{ArrayType, IntegerType, LongType, StringType, StructType} +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +class DeltaCatalogClientSuite + extends QueryTest + with DeltaSQLCommandTest + with BeforeAndAfterAll + with BeforeAndAfterEach { + + private var server: HttpServer = _ + private var serverUri: String = _ + private var configHandler: HttpExchange => Unit = _ + private var handler: HttpExchange => Unit = _ + private var pathCredentialsHandler: HttpExchange => Unit = _ + private var credentialRequestCount: Int = _ + private val objectMapper = new ObjectMapper() + + private val AwsVendedTokenProviderClass = + "io.unitycatalog.hadoop.internal.auth.AwsVendedTokenProvider" + private val S3ACredentialsProviderKey = "fs.s3a.aws.credentials.provider" + private val S3AInitAccessKey = "fs.s3a.init.access.key" + private val UCTableOperationKey = "fs.unitycatalog.table.operation" + private val UCCredentialsTypeKey = "fs.unitycatalog.credentials.type" + private val UCCredentialsTypePathValue = "path" + private val UCPathKey = "fs.unitycatalog.path" + private val UCPathOperationKey = "fs.unitycatalog.path.operation" + + override def beforeAll(): Unit = { + super.beforeAll() + server = HttpServer.create(new InetSocketAddress("localhost", 0), 0) + server.createContext("/api/2.1/unity-catalog/delta/v1/config", exchange => { + try { + if (configHandler != null) { + configHandler(exchange) + } else { + sendJson(exchange, 200, + """{ + | "endpoints": [ + | "GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}" + | ], + | "protocol-version": "1.0" + |}""".stripMargin) + } + } finally { + exchange.close() + } + }) + server.createContext("/api/2.1/unity-catalog/delta/v1/catalogs", exchange => { + try { + if (handler != null) handler(exchange) else sendJson(exchange, 404, "{}") + } finally { + exchange.close() + } + }) + server.createContext("/api/2.1/unity-catalog/temporary-path-credentials", exchange => { + try { + if (pathCredentialsHandler != null) { + pathCredentialsHandler(exchange) + } else { + sendJson(exchange, 404, "{}") + } + } finally { + exchange.close() + } + }) + server.start() + serverUri = s"http://localhost:${server.getAddress.getPort}" + } + + override def afterAll(): Unit = { + if (server != null) server.stop(0) + super.afterAll() + } + + override def beforeEach(): Unit = { + super.beforeEach() + configHandler = null + handler = null + pathCredentialsHandler = null + credentialRequestCount = 0 + } + + test("loadTable skips credentials for local Delta locations") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson(exchange, 200, loadTableResponseJson("file:/tmp/uc/table")) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + credentialRequestCount += 1 + sendJson(exchange, 500, "{}") + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val table = loadWithUCDeltaRestCatalogApi() + val properties = table.catalogTable.storage.properties + val idMetadata = table.catalogTable.schema("id").metadata + + assert(credentialRequestCount === 0) + assert(properties === Map( + "delta.feature.catalogManaged" -> "supported", + UC_TABLE_ID_KEY -> "11111111-1111-1111-1111-111111111111")) + assert(idMetadata.getLong("delta.columnMapping.id") === 1L) + assert(idMetadata.getString("delta.columnMapping.physicalName") === "col-123") + } + + test("loadTable fails loudly when cloud credentials are empty") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson(exchange, 200, loadTableResponseJson("s3://bucket/table")) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + credentialRequestCount += 1 + sendJson(exchange, 200, """{"storage-credentials": []}""") + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val error = intercept[IllegalArgumentException] { + loadWithUCDeltaRestCatalogApi() + } + + assert(credentialRequestCount === 1) + assert(error.getMessage.contains("no storage credentials")) + } + + test("loadTable accepts trailing-slash cloud credential prefixes") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson(exchange, 200, loadTableResponseJson("s3://bucket/path/to/table")) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + credentialRequestCount += 1 + assert(exchange.getRequestURI.getQuery === "operation=READ_WRITE") + sendJson(exchange, 200, s3CredentialsResponseJson( + "s3://bucket/path/to/table/", + "READ_WRITE")) + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val table = loadWithUCDeltaRestCatalogApi() + val storageProperties = table.catalogTable.storage.properties + val tableProperties = table.properties.asScala + + assert(credentialRequestCount === 1) + assert(storageProperties(S3ACredentialsProviderKey) === + AwsVendedTokenProviderClass) + assert(storageProperties(S3AInitAccessKey) === "ak") + assert(tableProperties( + s"option.${S3ACredentialsProviderKey}") === + AwsVendedTokenProviderClass) + assert(tableProperties( + s"option.${S3AInitAccessKey}") === "ak") + assert(storageProperties("delta.feature.catalogManaged") === "supported") + assert(!tableProperties.contains("delta.feature.catalogManaged")) + assert(!tableProperties.contains( + s"option.option.${S3AInitAccessKey}")) + } + + test("loadTable falls back to READ credentials when READ_WRITE is denied") { + var credentialQueries = Seq.empty[String] + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson(exchange, 200, loadTableResponseJson("s3://bucket/path/to/table")) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + credentialQueries :+= exchange.getRequestURI.getQuery + exchange.getRequestURI.getQuery match { + case "operation=READ_WRITE" => + sendJson(exchange, 403, """{"error_code": "PERMISSION_DENIED"}""") + case "operation=READ" => + sendJson(exchange, 200, s3CredentialsResponseJson("s3://bucket/path/to/table", "READ")) + case other => + fail(s"Unexpected credential query: $other") + } + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val table = loadWithUCDeltaRestCatalogApi() + + assert(credentialQueries === Seq("operation=READ_WRITE", "operation=READ")) + assert(table.catalogTable.storage.properties( + UCTableOperationKey) === "READ") + } + + test("loadTable uses static credential properties when renewal is disabled") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson(exchange, 200, loadTableResponseJson("s3://bucket/path/to/table")) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + credentialRequestCount += 1 + sendJson( + exchange, + 200, + s3CredentialsResponseJson("s3://bucket/path/to/table", "READ_WRITE")) + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val table = withUCDeltaRestCatalogApiRenewalDisabled { catalog => + catalog.loadTable(Identifier.of(Array("default"), "tbl")).get.asInstanceOf[V1Table] + } + + assert(credentialRequestCount === 1) + assert(table.catalogTable.storage.properties("fs.s3a.access.key") === "ak") + assert(!table.catalogTable.storage.properties.contains( + S3ACredentialsProviderKey)) + } + + test("loadTable maps missing provider to None") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson( + exchange, + 200, + loadTableResponseJson("file:/tmp/uc/table") + .replace("\"data-source-format\": \"DELTA\"", "\"data-source-format\": null")) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + fail("Unexpected credentials request for local path") + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val table = loadWithUCDeltaRestCatalogApi() + + assert(table.catalogTable.provider.isEmpty) + } + + test("loadTable falls back when UC Delta Rest Catalog API reports unsupported table format") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson(exchange, 501, + """{ + | "error": { + | "message": "Table exists but is not supported by the Delta endpoint.", + | "type": "UnsupportedTableFormatException", + | "code": 501 + | } + |}""".stripMargin) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + fail("Unexpected credentials request after unsupported table format") + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val loaded = withUCDeltaRestCatalogApi { catalog => + catalog.loadTable(Identifier.of(Array("default"), "tbl")) + } + + assert(loaded.isEmpty) + } + + test("loadTable propagates generic UC Delta Rest Catalog API 501 errors") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson(exchange, 501, + """{ + | "error": { + | "message": "Not implemented.", + | "type": "NotImplementedException", + | "code": 501 + | } + |}""".stripMargin) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + fail("Unexpected credentials request after loadTable failure") + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val error = intercept[IOException] { + loadWithUCDeltaRestCatalogApi() + } + + assert(error.getMessage.contains("Failed to load table uc.default.tbl")) + assert(error.getMessage.contains("HTTP 501")) + assert(error.getMessage.contains("NotImplementedException")) + } + + test("loadTable propagates UC Delta Rest Catalog API server errors") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl" => + sendJson(exchange, 500, """{"error_code":"INTERNAL_ERROR"}""") + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + fail("Unexpected credentials request after loadTable failure") + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val error = intercept[IOException] { + loadWithUCDeltaRestCatalogApi() + } + + assert(error.getMessage.contains("Failed to load table uc.default.tbl")) + assert(error.getMessage.contains("HTTP 500")) + } + + test("apply fails when UC Delta Rest Catalog API is enabled but unsupported") { + configHandler = exchange => sendJson(exchange, 200, + """{ + | "endpoints": [], + | "protocol-version": "1.0" + |}""".stripMargin) + handler = exchange => + fail(s"Unexpected UC Delta Rest Catalog API request path: ${exchange.getRequestURI.getPath}") + + val error = intercept[IllegalArgumentException] { + withUCDeltaRestCatalogApi { catalog => + catalog.loadTable(Identifier.of(Array("default"), "tbl")) + } + } + + assert(error.getMessage.contains("UC Delta Rest Catalog API is enabled for catalog uc")) + assert(error.getMessage.contains( + "does not support the required UC Delta Rest Catalog API endpoints")) + } + + test("loadTable does not probe UC Delta Rest Catalog API when disabled") { + configHandler = exchange => + fail(s"Unexpected UC Delta Rest Catalog API config request: ${exchange.getRequestURI}") + handler = exchange => + fail(s"Unexpected UC Delta Rest Catalog API request path: ${exchange.getRequestURI.getPath}") + + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + "spark.sql.catalog.uc.uri" -> serverUri, + "spark.sql.catalog.uc.token" -> "mock-token") { + val catalog = UCDeltaCatalogClient(new TestDelegateCatalog, spark) + assert(catalog.loadTable(Identifier.of(Array("default"), "tbl")).isEmpty) + } + } + + test("apply fails when UC Delta Rest Catalog API is enabled without UC config") { + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + UCDeltaCatalogClient.deltaRestApiEnabledConf("uc") -> "true") { + val error = intercept[IllegalArgumentException] { + UCDeltaCatalogClient(new TestDelegateCatalog, spark) + } + assert(error.getMessage.contains("configuration is missing or incomplete")) + } + } + + test("loadTable skips UC Delta Rest Catalog API for delta path identifiers") { + handler = exchange => + fail(s"Unexpected UC Delta Rest Catalog API table request: ${exchange.getRequestURI}") + + withUCDeltaRestCatalogApi { catalog => + assert(catalog.loadTable( + Identifier.of(Array("delta"), "s3://bucket/path/to/table")).isEmpty) + } + } + + test( + "pathCredentialOptions returns UC Delta Rest Catalog API path credential properties " + + "for cloud paths") { + configHandler = exchange => { + assert(queryParams(exchange)("catalog") === "uc") + sendJson(exchange, 200, + """{ + | "endpoints": [ + | "GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}" + | ], + | "protocol-version": "1.0" + |}""".stripMargin) + } + pathCredentialsHandler = exchange => { + assert(exchange.getRequestMethod === "POST") + assertJsonContains(exchange, Seq( + "\"url\":\"s3://bucket/path/to/table\"", + "\"operation\":\"PATH_READ\"")) + sendJson(exchange, 200, s3TemporaryCredentialsResponseJson()) + } + + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + "spark.sql.catalog.uc.uri" -> serverUri, + "spark.sql.catalog.uc.token" -> "mock-token", + "spark.sql.defaultCatalog" -> "uc", + DeltaCatalogClient.deltaRestApiEnabledConf("uc") -> "true") { + val props = DeltaCatalogClient.pathCredentialOptions( + spark, + new Path("s3://bucket/path/to/table")) + + assert(props(S3ACredentialsProviderKey) === + AwsVendedTokenProviderClass) + assert(props(S3AInitAccessKey) === "ak") + assert(props(UCCredentialsTypeKey) === UCCredentialsTypePathValue) + assert(props(UCPathKey) === + "s3://bucket/path/to/table") + assert(props(UCPathOperationKey) === "PATH_READ") + } + } + + test("pathCredentialOptions returns empty when path credentials are unavailable") { + configHandler = exchange => { + assert(queryParams(exchange)("catalog") === "uc") + sendJson(exchange, 200, + """{ + | "endpoints": [ + | "GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}" + | ], + | "protocol-version": "1.0" + |}""".stripMargin) + } + pathCredentialsHandler = exchange => sendJson(exchange, 404, "{}") + + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + "spark.sql.catalog.uc.uri" -> serverUri, + "spark.sql.catalog.uc.token" -> "mock-token", + "spark.sql.defaultCatalog" -> "uc", + DeltaCatalogClient.deltaRestApiEnabledConf("uc") -> "true") { + val props = DeltaCatalogClient.pathCredentialOptions( + spark, + new Path("s3://bucket/path/to/table")) + + assert(props.isEmpty) + } + } + + test("pathCredentialOptions returns empty when path is not governed by UC") { + configHandler = exchange => { + assert(queryParams(exchange)("catalog") === "uc") + sendJson(exchange, 200, + """{ + | "endpoints": [ + | "GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}" + | ], + | "protocol-version": "1.0" + |}""".stripMargin) + } + pathCredentialsHandler = exchange => { + assert(exchange.getRequestMethod === "POST") + assertJsonContains(exchange, Seq("\"url\":\"s3://other-bucket/path/to/table\"")) + sendJson(exchange, 404, """{"error_code":"NOT_FOUND"}""") + } + + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + "spark.sql.catalog.uc.uri" -> serverUri, + "spark.sql.catalog.uc.token" -> "mock-token", + "spark.sql.defaultCatalog" -> "uc", + DeltaCatalogClient.deltaRestApiEnabledConf("uc") -> "true") { + val props = DeltaCatalogClient.pathCredentialOptions( + spark, + new Path("s3://other-bucket/path/to/table")) + + assert(props.isEmpty) + } + } + + test( + "pathCredentialOptions returns empty when no UC Delta Rest Catalog API catalog is " + + "configured") { + configHandler = exchange => + fail(s"Unexpected UC Delta Rest Catalog API config request: ${exchange.getRequestURI}") + pathCredentialsHandler = exchange => + fail(s"Unexpected temporary path credentials request: ${exchange.getRequestURI}") + + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + "spark.sql.catalog.uc.uri" -> serverUri, + "spark.sql.catalog.uc.token" -> "mock-token") { + val props = DeltaCatalogClient.pathCredentialOptions( + spark, + new Path("s3://bucket/path/to/table")) + + assert(props.isEmpty) + } + } + + test("prepareCreateTable uses UC Delta Rest Catalog API staging response for managed tables") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/staging-tables" => + sendJson(exchange, 200, + """{ + | "table-id": "11111111-1111-1111-1111-111111111111", + | "table-type": "MANAGED", + | "location": "s3://bucket/table", + | "storage-credentials": [], + | "required-protocol": { + | "min-reader-version": 3, + | "min-writer-version": 7, + | "reader-features": ["catalogManaged"], + | "writer-features": ["catalogManaged"] + | }, + | "required-properties": { + | "delta.enableDeletionVectors": "true", + | "io.unitycatalog.tableId": "11111111-1111-1111-1111-111111111111" + | } + |}""".stripMargin) + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables/tbl/credentials" => + credentialRequestCount += 1 + assert(exchange.getRequestURI.getQuery === "operation=READ_WRITE") + sendJson(exchange, 200, s3CredentialsResponseJson("s3://bucket/table", "READ_WRITE")) + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val prepared = withUCDeltaRestCatalogApi { catalog => + catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.MANAGED, + location = None).get + } + + assert(prepared.location.toString === "s3://bucket/table") + assert(prepared.tableProperties(TableCatalog.PROP_IS_MANAGED_LOCATION) === "true") + assert(prepared.tableProperties("delta.feature.catalogManaged") === "supported") + assert(prepared.tableProperties("delta.enableDeletionVectors") === "true") + assert(prepared.tableProperties("io.unitycatalog.tableId") === + "11111111-1111-1111-1111-111111111111") + assert(credentialRequestCount === 1) + assert(prepared.storageProperties(S3ACredentialsProviderKey) === + AwsVendedTokenProviderClass) + assert(prepared.storageProperties(S3AInitAccessKey) === "ak") + } + + test("prepareCreateTable allows local managed staging without credentials") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/staging-tables" => + sendJson(exchange, 200, + """{ + | "table-id": "11111111-1111-1111-1111-111111111111", + | "table-type": "MANAGED", + | "location": "/tmp/uc-managed-tables/default/tbl", + | "storage-credentials": [], + | "required-properties": {} + |}""".stripMargin) + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val prepared = withUCDeltaRestCatalogApi { catalog => + catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.MANAGED, + location = None).get + } + + assert(prepared.location.toString === "/tmp/uc-managed-tables/default/tbl") + assert(prepared.storageProperties.isEmpty) + } + + test("prepareCreateTable does not intercept unsupported create shapes") { + handler = exchange => + fail(s"Unexpected UC Delta Rest Catalog API request path: ${exchange.getRequestURI.getPath}") + + withUCDeltaRestCatalogApi { catalog => + assert(catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.MANAGED, + location = Some(new URI("file:/tmp/user-location"))).isEmpty) + assert(catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.EXTERNAL, + location = None).isEmpty) + assert(catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.EXTERNAL, + location = Some(new URI("file:/tmp/external"))).isEmpty) + assert(catalog.prepareCreateTable( + Identifier.of(Array("nested", "default"), "tbl"), + CatalogTableType.MANAGED, + location = None).isEmpty) + } + } + + test("prepareCreateTable returns None when UC Delta Rest Catalog API is disabled") { + handler = exchange => + fail(s"Unexpected UC Delta Rest Catalog API request path: ${exchange.getRequestURI.getPath}") + + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + "spark.sql.catalog.uc.uri" -> serverUri, + "spark.sql.catalog.uc.token" -> "mock-token") { + val catalog = UCDeltaCatalogClient(new TestDelegateCatalog, spark) + assert(catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.MANAGED, + location = None).isEmpty) + } + } + + test("prepareCreateTable propagates staging errors and rejects table id mismatches") { + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/staging-tables" => + sendJson(exchange, 500, """{"error_code":"INTERNAL_ERROR"}""") + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val stagingError = intercept[IOException] { + withUCDeltaRestCatalogApi { catalog => + catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.MANAGED, + location = None) + } + } + assert(stagingError.getMessage.contains("Failed to create staging table")) + assert(stagingError.getMessage.contains("HTTP 500")) + + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/staging-tables" => + sendJson(exchange, 200, + """{ + | "table-id": "11111111-1111-1111-1111-111111111111", + | "table-type": "MANAGED", + | "location": "/tmp/uc-managed-tables/default/tbl", + | "storage-credentials": [], + | "required-properties": { + | "io.unitycatalog.tableId": "22222222-2222-2222-2222-222222222222" + | } + |}""".stripMargin) + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + val tableIdError = intercept[IllegalArgumentException] { + withUCDeltaRestCatalogApi { catalog => + catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.MANAGED, + location = None) + } + } + assert(tableIdError.getMessage.contains("does not match")) + assert(tableIdError.getMessage.contains("io.unitycatalog.tableId")) + } + + test("createTable fails when called without a prepared UC Delta Rest Catalog API create") { + val error = intercept[IllegalStateException] { + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + "spark.sql.catalog.uc.uri" -> serverUri, + "spark.sql.catalog.uc.token" -> "mock-token") { + val catalog = UCDeltaCatalogClient(new TestDelegateCatalog, spark) + catalog.createTable(Identifier.of(Array("default"), "tbl"), null, null) + } + } + + assert(error.getMessage.contains("UC Delta Rest Catalog API createTable is not available")) + } + + test("createTable posts Delta metadata to the expected UC namespace") { + var requestJson: JsonNode = null + var expectedLocation: String = null + val tableLocation = "file:/tmp/uc-created-table" + handler = exchange => exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/uc/schemas/default/tables" => + assert(exchange.getRequestMethod === "POST") + requestJson = objectMapper.readTree(readRequestBody(exchange)) + sendJson(exchange, 200, loadTableResponseJson(tableLocation)) + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") + } + + withTempDir { dir => + val schema = new StructType() + .add("id", LongType) + .add("payload", new StructType() + .add("name", StringType, nullable = true) + .add("scores", ArrayType(IntegerType))) + .add("p", StringType) + val metadata = Metadata( + schemaString = schema.json, + partitionColumns = Seq("p"), + configuration = Map("user.prop" -> "kept")) + val tablePath = new Path(dir.getCanonicalPath) + expectedLocation = dir.toURI.toString + val snapshot = new DummySnapshot( + new Path(tablePath, "_delta_log"), + DeltaLog.forTable(spark, tablePath), + metadata, + Some(Protocol.forTableFeature(CatalogOwnedTableFeature))) + val table = CatalogTable( + identifier = TableIdentifier("tbl", Some("default"), Some("uc")), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty.copy(locationUri = Some(dir.toURI)), + schema = schema, + provider = Some("delta"), + partitionColumnNames = Seq("p"), + properties = Map( + TableCatalog.PROP_PROVIDER -> "delta", + TableCatalog.PROP_COMMENT -> "ignored-property-comment", + TableCatalog.PROP_LOCATION -> "ignored-location", + TableCatalog.PROP_IS_MANAGED_LOCATION -> "true", + "path" -> "ignored-path", + "option.path" -> "ignored-option-path"), + comment = Some("table comment")) + + withUCDeltaRestCatalogApi { catalog => + catalog.createTable(Identifier.of(Array("default"), "tbl"), table, snapshot) + } + } + + assert(requestJson.get("name").asText === "tbl") + assert(requestJson.get("location").asText === expectedLocation) + assert(requestJson.get("table-type").asText === "MANAGED") + assert(requestJson.get("data-source-format").asText === "DELTA") + assert(requestJson.get("comment").asText === "table comment") + + val protocol = requestJson.get("protocol") + assert(protocol.get("min-reader-version").asInt === 3) + assert(protocol.get("min-writer-version").asInt === 7) + assert(protocol.get("reader-features").elements().asScala.map(_.asText).toSeq === + Seq("catalogManaged", "vacuumProtocolCheck")) + assert(protocol.get("writer-features").elements().asScala.map(_.asText).toSeq === + Seq("catalogManaged", "inCommitTimestamp", "vacuumProtocolCheck")) + + assert(requestJson.get("partition-columns").elements().asScala.map(_.asText).toSeq === Seq("p")) + + val fields = requestJson.get("columns").get("fields") + assert(fields.size() === 3) + assert(fields.get(0).get("name").asText === "id") + assert(deltaTypeName(fields.get(0).get("type")) === "long") + assert(fields.get(1).get("name").asText === "payload") + assert(deltaTypeName(fields.get(1).get("type")) === "struct") + assert(deltaTypeName(fields.get(1).get("type").get("fields").get(1).get("type")) === "array") + + val properties = requestJson.get("properties") + assert(properties.get("user.prop").asText === "kept") + Seq( + TableCatalog.PROP_PROVIDER, + TableCatalog.PROP_COMMENT, + TableCatalog.PROP_LOCATION, + TableCatalog.PROP_IS_MANAGED_LOCATION, + "path", + "option.path").foreach { key => + assert(!properties.has(key), s"CreateTableRequest should not include $key") + } + } + + test("prepareCreateTable uses temporary path credentials for external cloud tables") { + val location = "s3://bucket/external/tbl" + configHandler = exchange => { + assert(queryParams(exchange)("catalog") === "uc") + sendJson(exchange, 200, + """{ + | "endpoints": [ + | "GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}", + | "GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}/credentials", + | "GET /v1/temporary-path-credentials" + | ], + | "protocol-version": "1.0" + |}""".stripMargin) + } + pathCredentialsHandler = exchange => { + credentialRequestCount += 1 + assert(exchange.getRequestMethod === "POST") + assertJsonContains(exchange, Seq( + s""""url":"$location"""", + """"operation":"PATH_CREATE_TABLE"""")) + sendJson(exchange, 200, s3TemporaryCredentialsResponseJson()) + } + + val prepared = withUCDeltaRestCatalogApi { catalog => + catalog.prepareCreateTable( + Identifier.of(Array("default"), "tbl"), + CatalogTableType.EXTERNAL, + location = Some(java.net.URI.create(location))).get + } + + assert(credentialRequestCount === 1) + assert(prepared.location.toString === location) + assert(prepared.tableProperties.isEmpty) + assert(prepared.storageProperties(S3ACredentialsProviderKey) === + AwsVendedTokenProviderClass) + assert(prepared.storageProperties(UCCredentialsTypeKey) === UCCredentialsTypePathValue) + assert(prepared.storageProperties(UCPathOperationKey) === + "PATH_CREATE_TABLE") + assert(prepared.storageProperties(UCPathKey) === location) + } + + private def loadWithUCDeltaRestCatalogApi(): V1Table = { + withUCDeltaRestCatalogApi { catalog => + catalog.loadTable(Identifier.of(Array("default"), "tbl")).get.asInstanceOf[V1Table] + } + } + + private def withUCDeltaRestCatalogApi[T](f: DeltaCatalogClient => T): T = { + withUCDeltaRestCatalogApi(new TestDelegateCatalog, renewCredentialEnabled = true)(f) + } + + private def withUCDeltaRestCatalogApiRenewalDisabled[T](f: DeltaCatalogClient => T): T = { + withUCDeltaRestCatalogApi(new TestDelegateCatalog, renewCredentialEnabled = false)(f) + } + + private def withUCDeltaRestCatalogApi[T]( + delegate: TableCatalog)( + f: DeltaCatalogClient => T): T = { + withUCDeltaRestCatalogApi(delegate, renewCredentialEnabled = true)(f) + } + + private def withUCDeltaRestCatalogApi[T]( + delegate: TableCatalog, + renewCredentialEnabled: Boolean)( + f: DeltaCatalogClient => T): T = { + withSQLConf( + "spark.sql.catalog.uc" -> "io.unitycatalog.spark.UCSingleCatalog", + "spark.sql.catalog.uc.uri" -> serverUri, + "spark.sql.catalog.uc.token" -> "mock-token", + UCDeltaCatalogClient.renewCredentialEnabledConf("uc") -> renewCredentialEnabled.toString, + UCDeltaCatalogClient.deltaRestApiEnabledConf("uc") -> "true") { + val catalog = UCDeltaCatalogClient(delegate, spark) + f(catalog) + } + } + + private def loadTableResponseJson( + location: String, + dataSourceFormat: String = "DELTA"): String = + s"""{ + | "metadata": { + | "data-source-format": "$dataSourceFormat", + | "table-type": "MANAGED", + | "table-uuid": "11111111-1111-1111-1111-111111111111", + | "location": "$location", + | "columns": { + | "type": "struct", + | "fields": [ + | { + | "name": "id", + | "type": "long", + | "nullable": false, + | "metadata": { + | "delta.columnMapping.id": 1, + | "delta.columnMapping.physicalName": "col-123" + | } + | } + | ] + | }, + | "partition-columns": [], + | "properties": { + | "delta.feature.catalogManaged": "supported", + | "$UC_TABLE_ID_KEY": "11111111-1111-1111-1111-111111111111" + | } + | }, + | "commits": [] + |}""".stripMargin + + private def s3CredentialsResponseJson(prefix: String, operation: String): String = + s"""{ + | "storage-credentials": [ + | { + | "prefix": "$prefix", + | "operation": "$operation", + | "config": { + | "s3.access-key-id": "ak", + | "s3.secret-access-key": "sk", + | "s3.session-token": "st" + | } + | } + | ] + |}""".stripMargin + + private def s3TemporaryCredentialsResponseJson(): String = + """{ + | "aws_temp_credentials": { + | "access_key_id": "ak", + | "secret_access_key": "sk", + | "session_token": "st" + | }, + | "expiration_time": 1710000000000 + |}""".stripMargin + + private def assertJsonContains(exchange: HttpExchange, expectedSnippets: Seq[String]): Unit = { + val body = new String(exchange.getRequestBody.readAllBytes(), StandardCharsets.UTF_8) + .replaceAll("\\s+", "") + expectedSnippets.foreach { snippet => + assert(body.contains(snippet), s"Expected request body $body to contain $snippet") + } + } + + private def queryParams(exchange: HttpExchange): Map[String, String] = { + Option(exchange.getRequestURI.getRawQuery).toSeq + .flatMap(_.split("&")) + .filter(_.nonEmpty) + .map { kv => + val pair = kv.split("=", 2) + val key = URLDecoder.decode(pair(0), StandardCharsets.UTF_8) + val value = if (pair.length == 2) { + URLDecoder.decode(pair(1), StandardCharsets.UTF_8) + } else { + "" + } + key -> value + }.toMap + } + + private def readRequestBody(exchange: HttpExchange): String = { + val input = exchange.getRequestBody + try { + new String(input.readAllBytes(), StandardCharsets.UTF_8) + } finally { + input.close() + } + } + + private def deltaTypeName(deltaType: JsonNode): String = { + if (deltaType.isTextual) deltaType.asText else deltaType.get("type").asText + } + + private def sendJson(exchange: HttpExchange, status: Int, body: String): Unit = { + val bytes = body.getBytes(StandardCharsets.UTF_8) + exchange.getResponseHeaders.add("Content-Type", "application/json") + exchange.sendResponseHeaders(status, bytes.length) + exchange.getResponseBody.write(bytes) + exchange.getResponseBody.close() + } + + private class TestDelegateCatalog extends TableCatalog { + override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {} + override def name(): String = "uc" + override def listTables(namespace: Array[String]): Array[Identifier] = Array.empty + override def loadTable(ident: Identifier): Table = + throw new IllegalStateException("unexpected loadTable call") + override def createTable( + ident: Identifier, + schema: StructType, + partitions: Array[org.apache.spark.sql.connector.expressions.Transform], + properties: java.util.Map[String, String]): Table = + throw new UnsupportedOperationException("not needed in this test") + override def alterTable(ident: Identifier, changes: TableChange*): Table = + throw new UnsupportedOperationException("not needed in this test") + override def dropTable(ident: Identifier): Boolean = + throw new UnsupportedOperationException("not needed in this test") + override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = + throw new UnsupportedOperationException("not needed in this test") + } + +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/catalog/UCDeltaRestCatalogApiSchemaConverterSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/catalog/UCDeltaRestCatalogApiSchemaConverterSuite.scala new file mode 100644 index 00000000000..e115e8d5856 --- /dev/null +++ b/spark/src/test/scala/org/apache/spark/sql/delta/catalog/UCDeltaRestCatalogApiSchemaConverterSuite.scala @@ -0,0 +1,63 @@ +/* + * Copyright (2026) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.catalog + +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.spark.sql.types.{ + ArrayType, + BooleanType, + DecimalType, + IntegerType, + LongType, + MapType, + MetadataBuilder, + StringType, + StructField, + StructType +} + +class UCDeltaRestCatalogApiSchemaConverterSuite extends AnyFunSuite { + + test("converts Delta schema JSON to Spark schema") { + val fieldMetadata = new MetadataBuilder() + .putLong("delta.columnMapping.id", 1L) + .putString("delta.columnMapping.physicalName", "col-1") + .build() + val inputSchema = StructType(Seq( + StructField("id", LongType, nullable = false, fieldMetadata), + StructField("amount", DecimalType(10, 2)), + StructField("values", ArrayType(IntegerType, containsNull = true)), + StructField("tags", MapType(StringType, BooleanType, valueContainsNull = false)), + StructField("nested", StructType(Seq(StructField("name", StringType)))))) + + val schema = UCDeltaRestCatalogApiSchemaConverter.toSparkType(inputSchema.json) + + assert(schema === inputSchema) + assert(!schema("id").nullable) + assert(schema("id").metadata.getLong("delta.columnMapping.id") === 1L) + assert(schema("id").metadata.getString("delta.columnMapping.physicalName") === "col-1") + } + + test("rejects missing schema JSON") { + val e = intercept[IllegalArgumentException] { + UCDeltaRestCatalogApiSchemaConverter.toSparkType(null) + } + + assert(e.getMessage === "UC Delta Rest Catalog API table schema is missing.") + } +} diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTableSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTableSuite.scala index 231ea7493ed..dcf94676748 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTableSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/serverSidePlanning/ServerSidePlannedTableSuite.scala @@ -17,9 +17,11 @@ package org.apache.spark.sql.delta.serverSidePlanning import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCapability} import org.apache.spark.sql.delta.sources.DeltaSQLConf import org.apache.spark.sql.delta.test.DeltaSQLCommandTest import org.apache.spark.sql.sources.{And, EqualTo, Filter, GreaterThan, LessThan} +import org.apache.spark.sql.types.StructType /** * Tests for server-side planning with a mock client. @@ -142,6 +144,33 @@ class ServerSidePlannedTableSuite extends QueryTest with DeltaSQLCommandTest { s"Expected normal table but got ServerSidePlannedTable when config is disabled") } + test("disabled server-side planning does not inspect table credentials") { + val throwingTable = new Table { + override def name(): String = "throwing_table" + override def schema(): StructType = new StructType() + override def properties(): java.util.Map[String, String] = + throw new IllegalStateException("properties should not be called") + override def capabilities(): java.util.Set[TableCapability] = + java.util.Collections.emptySet() + } + + val originalConfig = spark.conf.getOption(DeltaSQLConf.ENABLE_SERVER_SIDE_PLANNING.key) + spark.conf.set(DeltaSQLConf.ENABLE_SERVER_SIDE_PLANNING.key, "false") + try { + val plannedTable = ServerSidePlannedTable.tryCreate( + spark, + Identifier.of(Array("db"), "throwing_table"), + throwingTable, + isUnityCatalog = true) + assert(plannedTable.isEmpty) + } finally { + originalConfig match { + case Some(value) => spark.conf.set(DeltaSQLConf.ENABLE_SERVER_SIDE_PLANNING.key, value) + case None => spark.conf.unset(DeltaSQLConf.ENABLE_SERVER_SIDE_PLANNING.key) + } + } + } + test("shouldUseServerSidePlanning() decision logic") { // ============================================================ // Production mode: skipUCRequirementForTests = false diff --git a/spark/unitycatalog/src/test/java/io/sparkuctest/S3CredentialFileSystem.java b/spark/unitycatalog/src/test/java/io/sparkuctest/S3CredentialFileSystem.java index c4e6d9c1501..6b2dec556d9 100644 --- a/spark/unitycatalog/src/test/java/io/sparkuctest/S3CredentialFileSystem.java +++ b/spark/unitycatalog/src/test/java/io/sparkuctest/S3CredentialFileSystem.java @@ -149,14 +149,28 @@ private void assertCredentials() { private synchronized AwsCredentialsProvider resolveProvider(Configuration conf) { if (provider != null) return provider; - String clazz = conf.get(S3A_CREDENTIALS_PROVIDER); - if (clazz == null) return null; - try { - provider = - (AwsCredentialsProvider) - Class.forName(clazz).getConstructor(Configuration.class).newInstance(conf); - } catch (Exception e) { - throw new RuntimeException("Failed to instantiate credential provider: " + clazz, e); + String classes = conf.get(S3A_CREDENTIALS_PROVIDER); + if (classes == null) return null; + + // S3A accepts a comma-separated provider chain. This fake filesystem only understands + // AWS SDK v2 providers; if none are present, assert static UC-vended Hadoop keys instead. + for (String clazz : classes.split(",")) { + String trimmed = clazz.trim(); + if (trimmed.isEmpty()) continue; + try { + Class candidate = Class.forName(trimmed); + if (!AwsCredentialsProvider.class.isAssignableFrom(candidate)) continue; + provider = + (AwsCredentialsProvider) + candidate.getConstructor(Configuration.class).newInstance(conf); + return provider; + } catch (ClassNotFoundException e) { + // Ignore providers that are valid for real S3A but absent from this test classpath. + } catch (NoSuchMethodException e) { + // Ignore AWS providers that real S3A can construct without a Hadoop Configuration. + } catch (ReflectiveOperationException e) { + throw new RuntimeException("Failed to instantiate credential provider: " + trimmed, e); + } } return provider; } diff --git a/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableCreationTest.java b/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableCreationTest.java index bc819ecff22..cace5f3f48d 100644 --- a/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableCreationTest.java +++ b/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableCreationTest.java @@ -24,6 +24,7 @@ import io.unitycatalog.client.ApiException; import io.unitycatalog.client.api.TablesApi; import io.unitycatalog.client.model.ColumnInfo; +import io.unitycatalog.client.model.ColumnTypeName; import io.unitycatalog.client.model.DataSourceFormat; import io.unitycatalog.client.model.TableInfo; import java.util.ArrayList; @@ -556,6 +557,9 @@ public void testTableWithComplexTypes(TableType tableType) throws Exception { Map.of(), null, null); + if (tableType == TableType.MANAGED) { + assertComplexColumnMetadata(tableName); + } // Verify data can be queried check( @@ -625,8 +629,7 @@ private void assertUCTableInfo( String schemaName = uc.schemaName(); // Verify that properties are set on server. This can not be done by DESC EXTENDED. - TablesApi tablesApi = new TablesApi(uc.createApiClient()); - TableInfo tableInfo = tablesApi.getTable(fullTableName, false, false); + TableInfo tableInfo = loadUCTableInfo(fullTableName); assertThat(tableInfo.getCatalogName()).isEqualTo(catalogName); assertThat(tableInfo.getName()).isEqualTo(parseTableName(fullTableName)); assertThat(tableInfo.getSchemaName()).isEqualTo(schemaName); @@ -640,21 +643,22 @@ private void assertUCTableInfo( List columns = tableInfo.getColumns(); assertThat(columns).isNotNull(); + assertThat(columns).isNotEmpty(); + List columnNamesFromServer = + columns.stream().map(ColumnInfo::getName).collect(Collectors.toList()); + assertThat(columnNamesFromServer).containsExactlyInAnyOrderElementsOf(expectedColumns); + if (partitionColumn.isPresent()) { + List matchingColumns = + columns.stream() + .filter(c -> c.getName().equals(partitionColumn.get())) + .collect(Collectors.toList()); + assertThat(matchingColumns).hasSize(1); + assertThat(matchingColumns.get(0).getPartitionIndex()).isEqualTo(0); + } else { + assertThat(columns.stream().anyMatch(c -> c.getPartitionIndex() != null)).isFalse(); + } + if (tableType == TableType.MANAGED) { - assertThat(columns).isNotEmpty(); - List columnNamesFromServer = - columns.stream().map(ColumnInfo::getName).collect(Collectors.toList()); - assertThat(columnNamesFromServer).containsExactlyInAnyOrderElementsOf(expectedColumns); - if (partitionColumn.isPresent()) { - List matchingColumns = - columns.stream() - .filter(c -> c.getName().equals(partitionColumn.get())) - .collect(Collectors.toList()); - assertThat(matchingColumns).hasSize(1); - assertThat(matchingColumns.get(0).getPartitionIndex()).isEqualTo(0); - } else { - assertThat(columns.stream().anyMatch(c -> c.getPartitionIndex() != null)).isFalse(); - } // Delta sent properties of managed tables to server Map tablePropertiesFromServer = tableInfo.getProperties(); tablePropertiesFromServer.remove("table_type", "MANAGED"); // New property by Spark 4.1 @@ -712,8 +716,6 @@ private void assertUCTableInfo( && !expectedPropertiesWithVariableValue.contains(entry.getKey())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); assertThat(unexpectedTablePropertiesFromServer).isEmpty(); - } else { - assertThat(columns).isEmpty(); } // Also verify table using DESC EXTENDED @@ -749,6 +751,42 @@ private void assertUCTableInfo( } } + private void assertComplexColumnMetadata(String fullTableName) throws ApiException { + Map columnsByName = + loadUCTableInfo(fullTableName).getColumns().stream() + .collect(Collectors.toMap(ColumnInfo::getName, Function.identity())); + + ColumnInfo arr = columnsByName.get("arr"); + assertColumnInfo(arr, ColumnTypeName.ARRAY, "array"); + assertThat(arr.getTypeJson()) + .contains("\"elementType\":\"integer\"", "\"containsNull\":true") + .doesNotContain("\"element-type\"", "\"contains-null\""); + + ColumnInfo map = columnsByName.get("map_col"); + assertColumnInfo(map, ColumnTypeName.MAP, "map"); + assertThat(map.getTypeJson()) + .contains( + "\"keyType\":\"string\"", "\"valueType\":\"integer\"", "\"valueContainsNull\":true") + .doesNotContain("\"key-type\"", "\"value-type\"", "\"value-contains-null\""); + + ColumnInfo struct = columnsByName.get("struct_col"); + assertColumnInfo(struct, ColumnTypeName.STRUCT, "struct"); + assertThat(struct.getTypeJson()) + .contains( + "\"name\":\"a\"", "\"type\":\"integer\"", "\"name\":\"b\"", "\"type\":\"string\""); + } + + private void assertColumnInfo(ColumnInfo column, ColumnTypeName typeName, String typeText) { + assertThat(column).isNotNull(); + assertThat(column.getTypeName()).isEqualTo(typeName); + assertThat(column.getTypeText()).isEqualTo(typeText); + } + + private TableInfo loadUCTableInfo(String fullTableName) throws ApiException { + return new TablesApi(unityCatalogInfo().createApiClient()) + .getTable(fullTableName, false, false); + } + private static String parseTableName(String fullTableName) { String[] splits = fullTableName.split("\\."); assertThat(splits.length).isEqualTo(3); diff --git a/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableIntegrationBaseTest.java b/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableIntegrationBaseTest.java index 0139e56c093..fb7dac55a41 100644 --- a/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableIntegrationBaseTest.java +++ b/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableIntegrationBaseTest.java @@ -142,9 +142,17 @@ private SparkConf configureSparkWithUnityCatalog(SparkConf conf) { // Set the catalog specific configs. UnityCatalogInfo uc = unityCatalogInfo(); String catalogName = uc.catalogName(); - return conf.set("spark.sql.catalog." + catalogName, "io.unitycatalog.spark.UCSingleCatalog") - .set("spark.sql.catalog." + catalogName + ".uri", uc.serverUri()) - .set("spark.sql.catalog." + catalogName + ".token", uc.serverToken()); + conf = + conf.set("spark.sql.catalog." + catalogName, "io.unitycatalog.spark.UCSingleCatalog") + .set("spark.sql.catalog." + catalogName + ".uri", uc.serverUri()) + .set("spark.sql.catalog." + catalogName + ".token", uc.serverToken()) + .set( + "spark.sql.catalog." + catalogName + ".deltaRestApi.enabled", + String.valueOf(isUCDeltaRestCatalogApiEnabled())); + if (isUCRemoteConfigured()) { + conf.set("spark.sql.defaultCatalog", catalogName); + } + return conf; } /** Stop the SparkSession after all tests. */ diff --git a/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableReadTest.java b/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableReadTest.java index 66a9169ff35..8b17c275f2f 100644 --- a/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableReadTest.java +++ b/spark/unitycatalog/src/test/java/io/sparkuctest/UCDeltaTableReadTest.java @@ -122,8 +122,11 @@ public void testDeltaTableForPath(TableType tableType) throws Exception { () -> sql("SELECT * FROM delta.`%s`", tablePath), "For managed tables, path-based access should fail"); } else { - // For EXTERNAL tables, path-based access should work - S3CredentialFileSystem.credentialCheckEnabled = false; + // Local UC OSS does not implement the UC Delta Rest Catalog API path credentials + // handler yet. Remote runs keep credential checks enabled and validate UC Delta Rest + // Catalog API path credential + // propagation. + S3CredentialFileSystem.credentialCheckEnabled = isUCRemoteConfigured(); try { check( sql("SELECT * FROM delta.`%s` ORDER BY id", tablePath), diff --git a/spark/unitycatalog/src/test/java/io/sparkuctest/UnityCatalogSupport.java b/spark/unitycatalog/src/test/java/io/sparkuctest/UnityCatalogSupport.java index c6633febd78..22a32975eaa 100644 --- a/spark/unitycatalog/src/test/java/io/sparkuctest/UnityCatalogSupport.java +++ b/spark/unitycatalog/src/test/java/io/sparkuctest/UnityCatalogSupport.java @@ -129,12 +129,19 @@ public ApiClient createApiClient() { public static final String UC_CATALOG_NAME = "UC_CATALOG_NAME"; public static final String UC_SCHEMA_NAME = "UC_SCHEMA_NAME"; public static final String UC_BASE_TABLE_LOCATION = "UC_BASE_TABLE_LOCATION"; + public static final String UC_DELTA_REST_CATALOG_API_ENABLED = + "UC_DELTA_REST_CATALOG_API_ENABLED"; protected static boolean isUCRemoteConfigured() { String ucRemote = System.getenv(UC_REMOTE); return ucRemote != null && ucRemote.equalsIgnoreCase("true"); } + protected static boolean isUCDeltaRestCatalogApiEnabled() { + String deltaRestApiEnabled = System.getenv(UC_DELTA_REST_CATALOG_API_ENABLED); + return deltaRestApiEnabled == null || deltaRestApiEnabled.equalsIgnoreCase("true"); + } + /** The Unity Catalog info instance for subclasses access */ private UnityCatalogInfo ucInfo = null; diff --git a/spark/unitycatalog/src/test/java/io/sparkuctest/UnityCatalogSupportTest.java b/spark/unitycatalog/src/test/java/io/sparkuctest/UnityCatalogSupportTest.java index 5ad795cae0d..cd1b62aaae5 100644 --- a/spark/unitycatalog/src/test/java/io/sparkuctest/UnityCatalogSupportTest.java +++ b/spark/unitycatalog/src/test/java/io/sparkuctest/UnityCatalogSupportTest.java @@ -18,6 +18,7 @@ import static io.sparkuctest.UnityCatalogSupport.UC_BASE_TABLE_LOCATION; import static io.sparkuctest.UnityCatalogSupport.UC_CATALOG_NAME; +import static io.sparkuctest.UnityCatalogSupport.UC_DELTA_REST_CATALOG_API_ENABLED; import static io.sparkuctest.UnityCatalogSupport.UC_REMOTE; import static io.sparkuctest.UnityCatalogSupport.UC_SCHEMA_NAME; import static io.sparkuctest.UnityCatalogSupport.UC_TOKEN; @@ -37,7 +38,13 @@ public class UnityCatalogSupportTest { private static final List ALL_ENVS = ImmutableList.of( - UC_REMOTE, UC_URI, UC_TOKEN, UC_CATALOG_NAME, UC_SCHEMA_NAME, UC_BASE_TABLE_LOCATION); + UC_REMOTE, + UC_URI, + UC_TOKEN, + UC_CATALOG_NAME, + UC_SCHEMA_NAME, + UC_BASE_TABLE_LOCATION, + UC_DELTA_REST_CATALOG_API_ENABLED); @Test public void testUnityCatalogInfo() throws Exception { @@ -176,6 +183,30 @@ public void testNoBaseTableLocation() throws Exception { }); } + @Test + public void testUCDeltaRestCatalogApiEnabledFromEnv() throws Exception { + withEnvTesting( + ImmutableMap.of(), + () -> { + TestingUCSupport uc = new TestingUCSupport(); + assertThat(uc.isUCDeltaRestCatalogApiEnabledForTest()).isTrue(); + }); + + withEnvTesting( + ImmutableMap.of(UC_DELTA_REST_CATALOG_API_ENABLED, "false"), + () -> { + TestingUCSupport uc = new TestingUCSupport(); + assertThat(uc.isUCDeltaRestCatalogApiEnabledForTest()).isFalse(); + }); + + withEnvTesting( + ImmutableMap.of(UC_DELTA_REST_CATALOG_API_ENABLED, "unexpected"), + () -> { + TestingUCSupport uc = new TestingUCSupport(); + assertThat(uc.isUCDeltaRestCatalogApiEnabledForTest()).isFalse(); + }); + } + public interface TestCall { void call() throws Exception; @@ -225,5 +256,9 @@ public UnityCatalogInfo accessUnityCatalogInfo() throws Exception { setupServer(); return unityCatalogInfo(); } + + public boolean isUCDeltaRestCatalogApiEnabledForTest() { + return isUCDeltaRestCatalogApiEnabled(); + } } } diff --git a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaClient.java b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaClient.java index 0cb061b920b..9ef408e23dc 100644 --- a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaClient.java +++ b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaClient.java @@ -17,65 +17,71 @@ package io.delta.storage.commit.uccommitcoordinator; import io.delta.storage.commit.actions.AbstractMetadata; -import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.StagingTableInfo; +import io.delta.storage.commit.CommitFailedException; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.CreateTableRequest; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.StagingTableResponse; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.UpdateTableRequest; + import java.io.IOException; -import java.util.List; -import java.util.Map; /** - * Extends {@link UCClient} with Delta table lifecycle operations backed by the UC Delta REST - * Catalog API (load, stage, and create tables). + * Interface for Unity Catalog Delta APIs. + * + *

This keeps UC Delta Rest Catalog API operations separate from the legacy UC client. + * Implementations that do not support these APIs should use the default methods, which fail loudly. */ public interface UCDeltaClient extends UCClient { /** - * Loads a table's metadata from Unity Catalog. - * - * @param catalog the catalog name - * @param schema the schema name - * @param table the table name - * @return the table's {@link AbstractMetadata} - * @throws IOException on network or API errors + * Returns whether this client can use UC Delta Rest Catalog API. + */ + default boolean supportsUCDeltaRestCatalogApi() { + return false; + } + + /** + * Loads a Delta table from Unity Catalog through the UC Delta Rest Catalog API. + */ + default AbstractMetadata loadTable( + String catalog, + String schema, + String table) throws IOException { + throw new UnsupportedOperationException( + "loadTable requires UC Delta Rest Catalog API support."); + } + + /** + * Creates a Delta staging table in Unity Catalog through the UC Delta Rest Catalog API. */ - AbstractMetadata loadTable(String catalog, String schema, String table) throws IOException; + default StagingTableResponse createStagingTable( + String catalog, + String schema, + String table) throws IOException { + throw new UnsupportedOperationException( + "createStagingTable requires UC Delta Rest Catalog API support."); + } /** - * Reserves a staging slot for a new Delta table. The returned response contains the table ID, - * storage location, and protocol/property requirements that the caller must honor when - * finalizing the table with {@link #createTable}. - * - * @param catalog the catalog name - * @param schema the schema name - * @param table the table name - * @return a {@link StagingTableInfo} with the reserved table details - * @throws IOException on network or API errors + * Finalizes a staged Delta table in Unity Catalog through the UC Delta Rest Catalog API. */ - StagingTableInfo createStagingTable(String catalog, String schema, String table) - throws IOException; + default AbstractMetadata createTable( + String catalog, + String schema, + CreateTableRequest request) throws IOException { + throw new UnsupportedOperationException( + "createTable requires UC Delta Rest Catalog API support."); + } /** - * Finalizes a previously staged Delta table, making it visible in the catalog. - * - * @param catalog the catalog name - * @param schema the schema name - * @param name the table name - * @param location the storage location - * @param tableType the table type (MANAGED or EXTERNAL), or {@code null} - * @param comment the table comment, or {@code null} - * @param partitionColumns the partition column names, or {@code null} - * @param protocol the required Delta protocol, or {@code null} - * @param properties the table properties, or {@code null} - * @return the newly created table's {@link AbstractMetadata} - * @throws IOException on network or API errors + * Updates a Delta table in Unity Catalog through the UC Delta Rest Catalog API. */ - AbstractMetadata createTable( + default AbstractMetadata updateTable( String catalog, String schema, - String name, - String location, - UCDeltaModels.TableType tableType, - String comment, - List partitionColumns, - UCDeltaModels.DeltaProtocol protocol, - Map properties) throws IOException; + String table, + UpdateTableRequest request) + throws IOException, CommitFailedException, UCCommitCoordinatorException { + throw new UnsupportedOperationException( + "updateTable requires UC Delta Rest Catalog API support."); + } } diff --git a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaModels.java b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaModels.java index 083b9796a5d..e72a1b9b6ff 100644 --- a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaModels.java +++ b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaModels.java @@ -16,20 +16,17 @@ package io.delta.storage.commit.uccommitcoordinator; -import io.delta.storage.commit.actions.AbstractProtocol; -import java.util.Collection; +import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; -import java.util.Objects; -import java.util.Set; +import java.util.UUID; -/** - * Delta-owned models for the UC Delta REST Catalog API. These decouple the {@link UCDeltaClient} - * interface from any generated SDK types. - */ -public final class UCDeltaModels { +import io.delta.storage.commit.uniform.UniformMetadata; +/** Delta-owned models for UC Delta APIs. */ +public final class UCDeltaModels { private UCDeltaModels() {} public enum TableType { @@ -37,73 +34,663 @@ public enum TableType { EXTERNAL } - public static class DeltaProtocol implements AbstractProtocol { + public enum DataSourceFormat { + DELTA("DELTA"), + ICEBERG("ICEBERG"); - private int minReaderVersion; - private int minWriterVersion; - private final Set readerFeatures = new HashSet<>(); - private final Set writerFeatures = new HashSet<>(); + private final String value; - @Override - public int getMinReaderVersion() { - return minReaderVersion; + DataSourceFormat(String value) { + this.value = value; } - @Override - public int getMinWriterVersion() { - return minWriterVersion; + public String getValue() { + return value; } + } + + public static class DeltaProtocol { + private Integer minReaderVersion; + private Integer minWriterVersion; + private List readerFeatures; + private List writerFeatures; - @Override - public Set getReaderFeatures() { - return readerFeatures; + public DeltaProtocol minReaderVersion(Integer minReaderVersion) { + this.minReaderVersion = minReaderVersion; + return this; } - @Override - public Set getWriterFeatures() { - return writerFeatures; + public Integer getMinReaderVersion() { + return minReaderVersion; } - public DeltaProtocol minReaderVersion(int minReaderVersion) { + public void setMinReaderVersion(Integer minReaderVersion) { this.minReaderVersion = minReaderVersion; + } + + public DeltaProtocol minWriterVersion(Integer minWriterVersion) { + this.minWriterVersion = minWriterVersion; return this; } - public DeltaProtocol minWriterVersion(int minWriterVersion) { + public Integer getMinWriterVersion() { + return minWriterVersion; + } + + public void setMinWriterVersion(Integer minWriterVersion) { this.minWriterVersion = minWriterVersion; + } + + public DeltaProtocol readerFeatures(List readerFeatures) { + this.readerFeatures = readerFeatures; + return this; + } + + public DeltaProtocol addReaderFeaturesItem(String readerFeaturesItem) { + if (readerFeatures == null) { + readerFeatures = new ArrayList<>(); + } + readerFeatures.add(readerFeaturesItem); + return this; + } + + public List getReaderFeatures() { + return readerFeatures == null ? Collections.emptyList() : readerFeatures; + } + + public void setReaderFeatures(List readerFeatures) { + this.readerFeatures = readerFeatures; + } + + public DeltaProtocol writerFeatures(List writerFeatures) { + this.writerFeatures = writerFeatures; + return this; + } + + public DeltaProtocol addWriterFeaturesItem(String writerFeaturesItem) { + if (writerFeatures == null) { + writerFeatures = new ArrayList<>(); + } + writerFeatures.add(writerFeaturesItem); + return this; + } + + public List getWriterFeatures() { + return writerFeatures == null ? Collections.emptyList() : writerFeatures; + } + + public void setWriterFeatures(List writerFeatures) { + this.writerFeatures = writerFeatures; + } + } + + public static final class CreateTableRequest { + private String name; + private String location; + private TableType tableType; + private DataSourceFormat dataSourceFormat; + private String comment; + private String schemaString; + private List partitionColumns; + private DeltaProtocol protocol; + private Map properties; + private Long lastCommitTimestampMs; + + public CreateTableRequest name(String name) { + this.name = name; + return this; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public CreateTableRequest location(String location) { + this.location = location; + return this; + } + + public String getLocation() { + return location; + } + + public void setLocation(String location) { + this.location = location; + } + + public CreateTableRequest tableType(TableType tableType) { + this.tableType = tableType; + return this; + } + + public TableType getTableType() { + return tableType; + } + + public void setTableType(TableType tableType) { + this.tableType = tableType; + } + + public CreateTableRequest dataSourceFormat(DataSourceFormat dataSourceFormat) { + this.dataSourceFormat = dataSourceFormat; + return this; + } + + public DataSourceFormat getDataSourceFormat() { + return dataSourceFormat; + } + + public void setDataSourceFormat(DataSourceFormat dataSourceFormat) { + this.dataSourceFormat = dataSourceFormat; + } + + public CreateTableRequest comment(String comment) { + this.comment = comment; + return this; + } + + public String getComment() { + return comment; + } + + public void setComment(String comment) { + this.comment = comment; + } + + public CreateTableRequest schemaString(String schemaString) { + this.schemaString = schemaString; + return this; + } + + public String getSchemaString() { + return schemaString; + } + + public void setSchemaString(String schemaString) { + this.schemaString = schemaString; + } + + public CreateTableRequest partitionColumns(List partitionColumns) { + this.partitionColumns = partitionColumns; + return this; + } + + public CreateTableRequest addPartitionColumnsItem(String partitionColumnsItem) { + if (partitionColumns == null) { + partitionColumns = new ArrayList<>(); + } + partitionColumns.add(partitionColumnsItem); + return this; + } + + public List getPartitionColumns() { + return partitionColumns == null ? Collections.emptyList() : partitionColumns; + } + + public void setPartitionColumns(List partitionColumns) { + this.partitionColumns = partitionColumns; + } + + public CreateTableRequest protocol(DeltaProtocol protocol) { + this.protocol = protocol; + return this; + } + + public DeltaProtocol getProtocol() { + return protocol; + } + + public void setProtocol(DeltaProtocol protocol) { + this.protocol = protocol; + } + + public CreateTableRequest properties(Map properties) { + this.properties = properties; + return this; + } + + public CreateTableRequest putPropertiesItem(String key, String propertiesItem) { + if (properties == null) { + properties = new LinkedHashMap<>(); + } + properties.put(key, propertiesItem); + return this; + } + + public Map getProperties() { + return properties == null ? Collections.emptyMap() : properties; + } + + public void setProperties(Map properties) { + this.properties = properties; + } + + public CreateTableRequest lastCommitTimestampMs(Long lastCommitTimestampMs) { + this.lastCommitTimestampMs = lastCommitTimestampMs; + return this; + } + + public Long getLastCommitTimestampMs() { + return lastCommitTimestampMs; + } + + public void setLastCommitTimestampMs(Long lastCommitTimestampMs) { + this.lastCommitTimestampMs = lastCommitTimestampMs; + } + } + + public static final class UpdateTableRequest { + private List requirements; + private List updates; + + public UpdateTableRequest requirements(List requirements) { + this.requirements = requirements; + return this; + } + + public UpdateTableRequest addRequirementsItem(TableRequirement requirement) { + if (requirements == null) { + requirements = new ArrayList<>(); + } + requirements.add(requirement); + return this; + } + + public List getRequirements() { + return requirements == null ? Collections.emptyList() : requirements; + } + + public void setRequirements(List requirements) { + this.requirements = requirements; + } + + public UpdateTableRequest updates(List updates) { + this.updates = updates; + return this; + } + + public UpdateTableRequest addUpdatesItem(TableUpdate update) { + if (updates == null) { + updates = new ArrayList<>(); + } + updates.add(update); + return this; + } + + public List getUpdates() { + return updates == null ? Collections.emptyList() : updates; + } + + public void setUpdates(List updates) { + this.updates = updates; + } + } + + public static final class TableRequirement { + public enum Type { + ASSERT_TABLE_UUID, + ASSERT_ETAG + } + + private Type type; + private UUID uuid; + private String etag; + + public static TableRequirement assertTableUuid(UUID uuid) { + return new TableRequirement().type(Type.ASSERT_TABLE_UUID).uuid(uuid); + } + + public static TableRequirement assertEtag(String etag) { + return new TableRequirement().type(Type.ASSERT_ETAG).etag(etag); + } + + public TableRequirement type(Type type) { + this.type = type; + return this; + } + + public Type getType() { + return type; + } + + public void setType(Type type) { + this.type = type; + } + + public TableRequirement uuid(UUID uuid) { + this.uuid = uuid; + return this; + } + + public UUID getUuid() { + return uuid; + } + + public void setUuid(UUID uuid) { + this.uuid = uuid; + } + + public TableRequirement etag(String etag) { + this.etag = etag; + return this; + } + + public String getEtag() { + return etag; + } + + public void setEtag(String etag) { + this.etag = etag; + } + } + + public static final class DeltaCommit { + private Long version; + private Long timestamp; + private String fileName; + private Long fileSize; + private Long fileModificationTimestamp; + + public DeltaCommit version(Long version) { + this.version = version; + return this; + } + + public Long getVersion() { + return version; + } + + public void setVersion(Long version) { + this.version = version; + } + + public DeltaCommit timestamp(Long timestamp) { + this.timestamp = timestamp; + return this; + } + + public Long getTimestamp() { + return timestamp; + } + + public void setTimestamp(Long timestamp) { + this.timestamp = timestamp; + } + + public DeltaCommit fileName(String fileName) { + this.fileName = fileName; return this; } - public DeltaProtocol readerFeatures(Collection readerFeatures) { - this.readerFeatures.addAll(readerFeatures); + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public DeltaCommit fileSize(Long fileSize) { + this.fileSize = fileSize; return this; } - public DeltaProtocol writerFeatures(Collection writerFeatures) { - this.writerFeatures.addAll(writerFeatures); + public Long getFileSize() { + return fileSize; + } + + public void setFileSize(Long fileSize) { + this.fileSize = fileSize; + } + + public DeltaCommit fileModificationTimestamp(Long fileModificationTimestamp) { + this.fileModificationTimestamp = fileModificationTimestamp; return this; } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof DeltaProtocol)) return false; - DeltaProtocol that = (DeltaProtocol) o; - return minReaderVersion == that.minReaderVersion - && minWriterVersion == that.minWriterVersion - && Objects.equals(readerFeatures, that.readerFeatures) - && Objects.equals(writerFeatures, that.writerFeatures); + public Long getFileModificationTimestamp() { + return fileModificationTimestamp; } - @Override - public int hashCode() { - return Objects.hash(minReaderVersion, minWriterVersion, readerFeatures, writerFeatures); + public void setFileModificationTimestamp(Long fileModificationTimestamp) { + this.fileModificationTimestamp = fileModificationTimestamp; } } - public static final class StagingTableInfo { + public static final class TableUpdate { + public enum Action { + SET_PROPERTIES, + REMOVE_PROPERTIES, + SET_PROTOCOL, + SET_COLUMNS, + SET_PARTITION_COLUMNS, + SET_TABLE_COMMENT, + ADD_COMMIT, + SET_LATEST_BACKFILLED_VERSION, + UPDATE_METADATA_SNAPSHOT_VERSION + } + + private Action action; + private Map propertyUpdates; + private List propertyRemovals; + private DeltaProtocol protocol; + private String schemaString; + private List partitionColumns; + private String comment; + private DeltaCommit commit; + private UniformMetadata uniform; + private Long latestPublishedVersion; + private Long lastCommitVersion; + private Long lastCommitTimestampMs; + + public static TableUpdate setProperties(Map updates) { + return new TableUpdate().action(Action.SET_PROPERTIES).propertyUpdates(updates); + } + + public static TableUpdate removeProperties(List removals) { + return new TableUpdate().action(Action.REMOVE_PROPERTIES).propertyRemovals(removals); + } + + public static TableUpdate setProtocolUpdate(DeltaProtocol protocol) { + return new TableUpdate().action(Action.SET_PROTOCOL).protocol(protocol); + } + + public static TableUpdate setColumns(String schemaString) { + return new TableUpdate().action(Action.SET_COLUMNS).schemaString(schemaString); + } + + public static TableUpdate setPartitionColumnsUpdate(List partitionColumns) { + return new TableUpdate() + .action(Action.SET_PARTITION_COLUMNS) + .partitionColumns(partitionColumns); + } + + public static TableUpdate setTableComment(String comment) { + return new TableUpdate().action(Action.SET_TABLE_COMMENT).comment(comment); + } + + public static TableUpdate addCommit(DeltaCommit commit, UniformMetadata uniform) { + return new TableUpdate().action(Action.ADD_COMMIT).commit(commit).uniform(uniform); + } + + public static TableUpdate setLatestBackfilledVersion(Long latestPublishedVersion) { + return new TableUpdate() + .action(Action.SET_LATEST_BACKFILLED_VERSION) + .latestPublishedVersion(latestPublishedVersion); + } + + public static TableUpdate updateMetadataSnapshotVersion( + Long lastCommitVersion, + Long lastCommitTimestampMs) { + return new TableUpdate() + .action(Action.UPDATE_METADATA_SNAPSHOT_VERSION) + .lastCommitVersion(lastCommitVersion) + .lastCommitTimestampMs(lastCommitTimestampMs); + } + + public TableUpdate action(Action action) { + this.action = action; + return this; + } + + public Action getAction() { + return action; + } + + public void setAction(Action action) { + this.action = action; + } + + public TableUpdate propertyUpdates(Map propertyUpdates) { + this.propertyUpdates = propertyUpdates; + return this; + } + + public Map getPropertyUpdates() { + return propertyUpdates == null ? Collections.emptyMap() : propertyUpdates; + } + + public void setPropertyUpdates(Map propertyUpdates) { + this.propertyUpdates = propertyUpdates; + } + + public TableUpdate propertyRemovals(List propertyRemovals) { + this.propertyRemovals = propertyRemovals; + return this; + } + + public List getPropertyRemovals() { + return propertyRemovals == null ? Collections.emptyList() : propertyRemovals; + } + + public void setPropertyRemovals(List propertyRemovals) { + this.propertyRemovals = propertyRemovals; + } + + public TableUpdate protocol(DeltaProtocol protocol) { + this.protocol = protocol; + return this; + } + + public DeltaProtocol getProtocol() { + return protocol; + } + + public void setProtocol(DeltaProtocol protocol) { + this.protocol = protocol; + } + + public TableUpdate schemaString(String schemaString) { + this.schemaString = schemaString; + return this; + } + + public String getSchemaString() { + return schemaString; + } + + public void setSchemaString(String schemaString) { + this.schemaString = schemaString; + } + + public TableUpdate partitionColumns(List partitionColumns) { + this.partitionColumns = partitionColumns; + return this; + } + + public List getPartitionColumns() { + return partitionColumns == null ? Collections.emptyList() : partitionColumns; + } + + public void setPartitionColumns(List partitionColumns) { + this.partitionColumns = partitionColumns; + } + + public TableUpdate comment(String comment) { + this.comment = comment; + return this; + } + + public String getComment() { + return comment; + } + + public void setComment(String comment) { + this.comment = comment; + } + + public TableUpdate commit(DeltaCommit commit) { + this.commit = commit; + return this; + } + + public DeltaCommit getCommit() { + return commit; + } + + public void setCommit(DeltaCommit commit) { + this.commit = commit; + } + + public TableUpdate uniform(UniformMetadata uniform) { + this.uniform = uniform; + return this; + } + + public UniformMetadata getUniform() { + return uniform; + } + + public void setUniform(UniformMetadata uniform) { + this.uniform = uniform; + } + + public TableUpdate latestPublishedVersion(Long latestPublishedVersion) { + this.latestPublishedVersion = latestPublishedVersion; + return this; + } + + public Long getLatestPublishedVersion() { + return latestPublishedVersion; + } + + public void setLatestPublishedVersion(Long latestPublishedVersion) { + this.latestPublishedVersion = latestPublishedVersion; + } + + public TableUpdate lastCommitVersion(Long lastCommitVersion) { + this.lastCommitVersion = lastCommitVersion; + return this; + } + + public Long getLastCommitVersion() { + return lastCommitVersion; + } + + public void setLastCommitVersion(Long lastCommitVersion) { + this.lastCommitVersion = lastCommitVersion; + } + + public TableUpdate lastCommitTimestampMs(Long lastCommitTimestampMs) { + this.lastCommitTimestampMs = lastCommitTimestampMs; + return this; + } + + public Long getLastCommitTimestampMs() { + return lastCommitTimestampMs; + } + + public void setLastCommitTimestampMs(Long lastCommitTimestampMs) { + this.lastCommitTimestampMs = lastCommitTimestampMs; + } + } - private final String tableId; + public static final class StagingTableResponse { + private final UUID tableId; private final TableType tableType; private final String location; private final DeltaProtocol requiredProtocol; @@ -111,8 +698,8 @@ public static final class StagingTableInfo { private final Map requiredProperties; private final Map suggestedProperties; - public StagingTableInfo( - String tableId, + public StagingTableResponse( + UUID tableId, TableType tableType, String location, DeltaProtocol requiredProtocol, @@ -128,7 +715,7 @@ public StagingTableInfo( this.suggestedProperties = suggestedProperties; } - public String getTableId() { + public UUID getTableId() { return tableId; } diff --git a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaTokenBasedRestClient.java b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaTokenBasedRestClient.java index 1f19840106f..73869734468 100644 --- a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaTokenBasedRestClient.java +++ b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCDeltaTokenBasedRestClient.java @@ -16,577 +16,699 @@ package io.delta.storage.commit.uccommitcoordinator; -import io.delta.storage.commit.Commit; -import io.delta.storage.commit.CommitFailedException; -import io.delta.storage.commit.GetCommitsResponse; -import io.delta.storage.commit.TableIdentifier; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.json.JsonMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; + import io.delta.storage.commit.actions.AbstractMetadata; -import io.delta.storage.commit.actions.AbstractProtocol; +import io.delta.storage.commit.CommitFailedException; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.CreateTableRequest; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.DataSourceFormat; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.DeltaProtocol; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.StagingTableResponse; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.TableRequirement; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.TableUpdate; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.TableType; +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.UpdateTableRequest; +import io.delta.storage.commit.uniform.IcebergMetadata; +import io.delta.storage.commit.uniform.UniformMetadata; import io.unitycatalog.client.ApiClient; -import io.unitycatalog.client.ApiClientBuilder; import io.unitycatalog.client.ApiException; -import io.unitycatalog.client.api.MetastoresApi; import io.unitycatalog.client.auth.TokenProvider; -import io.unitycatalog.client.delta.api.TablesApi; -import io.unitycatalog.client.delta.model.AddCommitUpdate; -import io.unitycatalog.client.delta.model.AssertTableUUID; +import io.unitycatalog.client.delta.api.ConfigurationApi; +import io.unitycatalog.client.delta.model.ArrayType; +import io.unitycatalog.client.delta.model.CatalogConfig; import io.unitycatalog.client.delta.model.CreateStagingTableRequest; -import io.unitycatalog.client.delta.model.CreateTableRequest; -import io.unitycatalog.client.delta.model.DeltaCommit; -import io.unitycatalog.client.delta.model.DeltaProtocol; -import io.unitycatalog.client.delta.model.LoadTableResponse; -import io.unitycatalog.client.delta.model.PrimitiveType; -import io.unitycatalog.client.delta.model.RemovePropertiesUpdate; -import io.unitycatalog.client.delta.model.SetLatestBackfilledVersionUpdate; -import io.unitycatalog.client.delta.model.SetPartitionColumnsUpdate; -import io.unitycatalog.client.delta.model.SetPropertiesUpdate; -import io.unitycatalog.client.delta.model.SetProtocolUpdate; -import io.unitycatalog.client.delta.model.SetSchemaUpdate; -import io.unitycatalog.client.delta.model.SetTableCommentUpdate; -import io.unitycatalog.client.delta.model.StagingTableResponse; -import io.unitycatalog.client.delta.model.StagingTableResponseRequiredProtocol; -import io.unitycatalog.client.delta.model.StagingTableResponseSuggestedProtocol; -import io.unitycatalog.client.delta.model.StructField; -import io.unitycatalog.client.delta.model.StructType; +import io.unitycatalog.client.delta.model.DeltaType; +import io.unitycatalog.client.delta.model.MapType; import io.unitycatalog.client.delta.model.TableMetadata; -import io.unitycatalog.client.delta.model.UniformMetadata; -import io.unitycatalog.client.delta.model.UniformMetadataIceberg; -import io.unitycatalog.client.delta.model.UpdateTableRequest; -import io.unitycatalog.client.model.GetMetastoreSummaryResponse; +import io.unitycatalog.client.delta.serde.DeltaTypeModule; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; -import java.net.URI; import java.util.ArrayList; import java.util.Collections; -import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Objects; -import java.util.Optional; -import java.util.Set; import java.util.UUID; /** - * A REST client implementation of {@link UCDeltaClient} that uses the UC Delta REST Catalog API for - * all table lifecycle and commit coordination operations. - * - *

This client uses {@code io.unitycatalog.client.delta.api.TablesApi} for Delta-specific - * table operations (load, create, update) and {@link MetastoresApi} for metastore queries. - * - * @see UCDeltaClient + * Token-based REST client implementation for UC Delta Rest Catalog API operations. */ -public class UCDeltaTokenBasedRestClient implements UCDeltaClient { +public class UCDeltaTokenBasedRestClient + extends UCTokenBasedRestClient + implements UCDeltaClient { + + private static final Logger LOG = LoggerFactory.getLogger(UCDeltaTokenBasedRestClient.class); + private static final ObjectMapper DELTA_TYPE_OBJECT_MAPPER = + JsonMapper.builder().serializationInclusion(JsonInclude.Include.NON_NULL).build() + .registerModule(new DeltaTypeModule()); + private static final ObjectMapper DELTA_SCHEMA_OBJECT_MAPPER = + createDeltaSchemaObjectMapper(); + + private static ObjectMapper createDeltaSchemaObjectMapper() { + ObjectMapper mapper = + JsonMapper.builder().serializationInclusion(JsonInclude.Include.NON_NULL).build(); + mapper.registerModule(new DeltaTypeModule()); + mapper.addMixIn(ArrayType.class, CamelCaseArrayMixin.class); + mapper.addMixIn(MapType.class, CamelCaseMapMixin.class); + return mapper; + } - private static final int HTTP_CONFLICT = 409; - private static final int HTTP_NOT_FOUND = 404; + private abstract static class CamelCaseArrayMixin { + @JsonProperty("elementType") + abstract DeltaType getElementType(); - private static final Set PRIMITIVE_TYPE_NAMES = Set.of( - "BOOLEAN", "BYTE", "SHORT", "INT", "LONG", "FLOAT", "DOUBLE", - "DATE", "TIMESTAMP", "TIMESTAMP_NTZ", "STRING", "BINARY", "DECIMAL"); + @JsonSetter("elementType") + abstract void setElementType(DeltaType value); - private TablesApi deltaTablesApi; - private MetastoresApi metastoresApi; + @JsonProperty("containsNull") + abstract Boolean getContainsNull(); - /** - * Constructs a new UCDeltaTokenBasedRestClient. - * - * @param baseUri The base URI of the Unity Catalog server - * @param tokenProvider The TokenProvider to use for authentication - * @param appVersions A map of application name to version string for telemetry - */ - public UCDeltaTokenBasedRestClient( - String baseUri, - TokenProvider tokenProvider, - Map appVersions) { - Objects.requireNonNull(baseUri, "baseUri must not be null"); - Objects.requireNonNull(tokenProvider, "tokenProvider must not be null"); - Objects.requireNonNull(appVersions, "appVersions must not be null"); + @JsonSetter("containsNull") + abstract void setContainsNull(Boolean value); + } - ApiClientBuilder builder = ApiClientBuilder.create() - .uri(baseUri) - .tokenProvider(tokenProvider); + private abstract static class CamelCaseMapMixin { + @JsonProperty("keyType") + abstract DeltaType getKeyType(); - appVersions.forEach((name, version) -> { - if (version != null) { - builder.addAppVersion(name, version); - } - }); + @JsonSetter("keyType") + abstract void setKeyType(DeltaType value); - ApiClient apiClient = builder.build(); - this.deltaTablesApi = new TablesApi(apiClient); - this.metastoresApi = new MetastoresApi(apiClient); - } + @JsonProperty("valueType") + abstract DeltaType getValueType(); - private void ensureOpen() { - if (deltaTablesApi == null || metastoresApi == null) { - throw new IllegalStateException("UCDeltaTokenBasedRestClient has been closed."); - } + @JsonSetter("valueType") + abstract void setValueType(DeltaType value); + + @JsonProperty("valueContainsNull") + abstract Boolean getValueContainsNull(); + + @JsonSetter("valueContainsNull") + abstract void setValueContainsNull(Boolean value); } - // =========================== - // UCClient Implementation - // =========================== + private boolean supportsUCDeltaRestCatalogApi; + private volatile boolean closed; + private io.unitycatalog.client.delta.api.TablesApi deltaTablesApi; - @Override - public String getMetastoreId() throws IOException { - ensureOpen(); - try { - GetMetastoreSummaryResponse response = metastoresApi.summary(); - return response.getMetastoreId(); - } catch (ApiException e) { - throw new IOException( - String.format("Failed to get metastore ID (HTTP %s): %s", - e.getCode(), e.getResponseBody()), e); + private static final int HTTP_NOT_FOUND = 404; + private static final int HTTP_BAD_REQUEST = 400; + private static final int HTTP_CONFLICT = 409; + private static final int HTTP_TOO_MANY_REQUESTS = 429; + private static final String UC_DELTA_API_PROTOCOL_VERSION = "1.0"; + // Endpoint identifiers advertised by the UC Delta Rest Catalog API /config endpoint, not + // concrete URLs. + private static final List REQUIRED_UC_DELTA_API_ENDPOINT_IDS = + Collections.singletonList("GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}"); + + protected static class UCDeltaRestCatalogApiSupport { + private final boolean supportsTableApis; + + UCDeltaRestCatalogApiSupport(boolean supportsTableApis) { + this.supportsTableApis = supportsTableApis; } } - @Override - public void commit( - String tableId, - URI tableUri, - TableIdentifier tableIdentifier, - Optional commit, - Optional lastKnownBackfilledVersion, - Optional oldMetadata, - Optional newMetadata, - Optional oldProtocol, - Optional newProtocol, - Optional uniform) - throws IOException, CommitFailedException, UCCommitCoordinatorException { - ensureOpen(); - Objects.requireNonNull(tableId, "tableId must not be null"); - Objects.requireNonNull(tableIdentifier, "tableIdentifier must not be null"); - - UpdateTableRequest request = new UpdateTableRequest(); - request.addRequirementsItem(new AssertTableUUID() - .type("assert-table-uuid") - .uuid(UUID.fromString(tableId))); - - commit.ifPresent(c -> { - AddCommitUpdate addCommit = new AddCommitUpdate() - .action("add-commit") - .commit(toSDKDeltaCommit(c)); - uniform.ifPresent(u -> addCommit.uniform(toSDKUniformMetadata(u))); - request.addUpdatesItem(addCommit); - }); - - lastKnownBackfilledVersion.ifPresent(v -> - request.addUpdatesItem(new SetLatestBackfilledVersionUpdate() - .action("set-latest-backfilled-version") - .latestPublishedVersion(v))); - - if (oldMetadata.isPresent() - && newMetadata.isPresent() - && !Objects.equals(oldMetadata.get(), newMetadata.get())) { - addMetadataUpdates(request, oldMetadata.get(), newMetadata.get()); - } - if (oldProtocol.isPresent() - && newProtocol.isPresent() - && !Objects.equals(oldProtocol.get(), newProtocol.get())) { - request.addUpdatesItem(new SetProtocolUpdate() - .action("set-protocol") - .protocol(toSDKDeltaProtocol(newProtocol.get()))); - } - - String catalog = tableIdentifier.getNamespace()[0]; - String schema = tableIdentifier.getNamespace()[1]; - String table = tableIdentifier.getName(); + public UCDeltaTokenBasedRestClient( + String baseUri, + TokenProvider tokenProvider, + Map appVersions) { + super(baseUri, tokenProvider, appVersions); + } - try { - deltaTablesApi.updateTable(catalog, schema, table, request); - } catch (ApiException e) { - handleUpdateTableException(e, catalog, schema, table); - } + public UCDeltaTokenBasedRestClient( + String baseUri, + TokenProvider tokenProvider, + Map appVersions, + String catalog) { + super(baseUri, tokenProvider, appVersions, catalog); + Objects.requireNonNull(catalog, "catalog must not be null"); + configureUCDeltaRestCatalogApi(catalog); } - @Override - public GetCommitsResponse getCommits( - String tableId, - URI tableUri, - Optional startVersion, - Optional endVersion) throws IOException, UCCommitCoordinatorException { - throw new UnsupportedOperationException( - "getCommits is not yet supported by UCDeltaTokenBasedRestClient. " + - "A separate PR will add this once the tableIdentifier mapping is available."); + private void configureUCDeltaRestCatalogApi(String catalog) { + initializeUCDeltaRestCatalogApi( + getUCDeltaRestCatalogApiSupport(getApiClient(), catalog)); } - @Override - public void finalizeCreate( - String tableName, - String catalogName, - String schemaName, - String storageLocation, - List columns, - Map properties) throws CommitFailedException { - ensureOpen(); - Objects.requireNonNull(tableName, "tableName must not be null"); - Objects.requireNonNull(catalogName, "catalogName must not be null"); - Objects.requireNonNull(schemaName, "schemaName must not be null"); - Objects.requireNonNull(storageLocation, "storageLocation must not be null"); - Objects.requireNonNull(columns, "columns must not be null"); - Objects.requireNonNull(properties, "properties must not be null"); - - CreateTableRequest sdkRequest = new CreateTableRequest() - .name(tableName) - .location(storageLocation) - .properties(properties); - - if (!columns.isEmpty()) { - sdkRequest.columns(toSDKStructType(columns)); - } + private void initializeUCDeltaRestCatalogApi( + UCDeltaRestCatalogApiSupport ucDeltaRestCatalogApiSupport) { + Objects.requireNonNull( + ucDeltaRestCatalogApiSupport, "ucDeltaRestCatalogApiSupport must not be null"); + this.supportsUCDeltaRestCatalogApi = ucDeltaRestCatalogApiSupport.supportsTableApis; + this.deltaTablesApi = ucDeltaRestCatalogApiSupport.supportsTableApis + ? new io.unitycatalog.client.delta.api.TablesApi(getApiClient()) + : null; + } + protected static UCDeltaRestCatalogApiSupport getUCDeltaRestCatalogApiSupport( + ApiClient apiClient, + String catalog) { + Objects.requireNonNull(apiClient, "apiClient must not be null"); + Objects.requireNonNull(catalog, "catalog must not be null"); try { - deltaTablesApi.createTable(catalogName, schemaName, sdkRequest); + CatalogConfig config = + new ConfigurationApi(apiClient).getConfig(catalog, UC_DELTA_API_PROTOCOL_VERSION); + List endpoints = config == null ? null : config.getEndpoints(); + return new UCDeltaRestCatalogApiSupport( + endpoints != null && endpoints.containsAll(REQUIRED_UC_DELTA_API_ENDPOINT_IDS)); } catch (ApiException e) { - throw new CommitFailedException( - true /* retryable */, - false /* conflict */, - String.format("Failed to finalize table %s.%s.%s (HTTP %s): %s", - catalogName, schemaName, tableName, e.getCode(), e.getResponseBody()), + if (e.getCode() == HTTP_NOT_FOUND) { + LOG.warn( + "UC Delta Rest Catalog API config endpoint is unavailable for catalog {}. " + + "UC Delta Rest Catalog API will be disabled.", + catalog, + e); + return new UCDeltaRestCatalogApiSupport(false); + } + throw new IllegalArgumentException( + String.format( + "Failed to determine UC Delta Rest Catalog API support for catalog %s (HTTP %s): %s", + catalog, + e.getCode(), + e.getResponseBody()), e); } } @Override - public void close() throws IOException { - this.deltaTablesApi = null; - this.metastoresApi = null; + public boolean supportsUCDeltaRestCatalogApi() { + return supportsUCDeltaRestCatalogApi; } - // =========================== - // UCDeltaClient Implementation - // =========================== + /** + * Ensures the client has not been closed. Must be called before any API operation. + */ + protected final void ensureUCDeltaClientOpen() { + if (closed) { + throw new IllegalStateException("UCDeltaTokenBasedRestClient has been closed."); + } + } + private void ensureUCDeltaRestCatalogApiSupported(String operation) { + ensureUCDeltaClientOpen(); + if (!supportsUCDeltaRestCatalogApi) { + throw new UnsupportedOperationException( + operation + " requires UC Delta Rest Catalog API support."); + } + } + + /** + * Loads one table from Unity Catalog. + * + *

This uses the UC Delta Rest Catalog API. Callers that need legacy UC loadTable behavior + * should use the existing catalog path instead of this API-only method. + */ @Override public AbstractMetadata loadTable( - String catalog, String schema, String table) throws IOException { - ensureOpen(); - Objects.requireNonNull(catalog, "catalog must not be null"); - Objects.requireNonNull(schema, "schema must not be null"); - Objects.requireNonNull(table, "table must not be null"); + String catalog, + String schema, + String table) throws IOException { + ensureUCDeltaRestCatalogApiSupported("loadTable"); + Objects.requireNonNull(catalog, "catalog must not be null."); + Objects.requireNonNull(schema, "schema must not be null."); + Objects.requireNonNull(table, "table must not be null."); try { - LoadTableResponse response = deltaTablesApi.loadTable(catalog, schema, table); - return new DeltaTableMetadata(table, response.getMetadata()); + io.unitycatalog.client.delta.model.LoadTableResponse response = + deltaTablesApi.loadTable(catalog, schema, table); + if (response == null || response.getMetadata() == null) { + throw new IOException( + String.format( + "Malformed UC Delta Rest Catalog API loadTable response for table %s.%s.%s: " + + "missing table metadata.", + catalog, + schema, + table)); + } + if (response.getMetadata().getColumns() == null) { + throw new IOException( + String.format( + "Malformed UC Delta Rest Catalog API loadTable response for table %s.%s.%s: " + + "missing table schema columns.", + catalog, + schema, + table)); + } + return toTableMetadata(table, response.getMetadata()); } catch (ApiException e) { throw new IOException( - String.format("Failed to load table %s.%s.%s (HTTP %s): %s", - catalog, schema, table, e.getCode(), e.getResponseBody()), e); + String.format( + "Failed to load table %s.%s.%s via UC Delta Rest Catalog API (HTTP %s): %s", + catalog, + schema, + table, + e.getCode(), + e.getResponseBody()), + e); } } + /** + * Creates a Delta staging table in Unity Catalog through the UC Delta Rest Catalog API. + */ @Override - public UCDeltaModels.StagingTableInfo createStagingTable( - String catalog, String schema, String table) throws IOException { - ensureOpen(); - Objects.requireNonNull(catalog, "catalog must not be null"); - Objects.requireNonNull(schema, "schema must not be null"); - Objects.requireNonNull(table, "table must not be null"); + public StagingTableResponse createStagingTable( + String catalog, + String schema, + String table) throws IOException { + ensureUCDeltaRestCatalogApiSupported("createStagingTable"); + Objects.requireNonNull(catalog, "catalog must not be null."); + Objects.requireNonNull(schema, "schema must not be null."); + Objects.requireNonNull(table, "table must not be null."); try { - CreateStagingTableRequest request = new CreateStagingTableRequest().name(table); - StagingTableResponse response = - deltaTablesApi.createStagingTable(catalog, schema, request); - return toStagingTableInfo(response); + return toStagingTableResponse(deltaTablesApi.createStagingTable( + catalog, + schema, + new CreateStagingTableRequest().name(table))); } catch (ApiException e) { throw new IOException( - String.format("Failed to create staging table %s.%s.%s (HTTP %s): %s", - catalog, schema, table, e.getCode(), e.getResponseBody()), e); + String.format( + "Failed to create staging table %s.%s.%s via UC Delta Rest Catalog API (HTTP %s): %s", + catalog, + schema, + table, + e.getCode(), + e.getResponseBody()), + e); } } + /** + * Finalizes a Delta table in Unity Catalog through the UC Delta Rest Catalog API. + */ @Override public AbstractMetadata createTable( String catalog, String schema, - String name, - String location, - UCDeltaModels.TableType tableType, - String comment, - List partitionColumns, - UCDeltaModels.DeltaProtocol protocol, - Map properties) throws IOException { - ensureOpen(); - Objects.requireNonNull(catalog, "catalog must not be null"); - Objects.requireNonNull(schema, "schema must not be null"); - Objects.requireNonNull(name, "name must not be null"); + CreateTableRequest request) throws IOException { + ensureUCDeltaRestCatalogApiSupported("createTable"); + Objects.requireNonNull(catalog, "catalog must not be null."); + Objects.requireNonNull(schema, "schema must not be null."); + Objects.requireNonNull(request, "request must not be null."); try { - CreateTableRequest sdkRequest = new CreateTableRequest() - .name(name) - .location(location); - if (tableType != null) { - sdkRequest.tableType( - io.unitycatalog.client.delta.model.TableType.fromValue(tableType.name())); - } - if (comment != null) { - sdkRequest.comment(comment); - } - if (partitionColumns != null && !partitionColumns.isEmpty()) { - sdkRequest.partitionColumns(partitionColumns); - } - if (protocol != null) { - sdkRequest.protocol(toSDKDeltaProtocol(protocol)); - } - if (properties != null && !properties.isEmpty()) { - sdkRequest.properties(properties); - } - - LoadTableResponse response = - deltaTablesApi.createTable(catalog, schema, sdkRequest); - return new DeltaTableMetadata(name, response.getMetadata()); + io.unitycatalog.client.delta.model.LoadTableResponse response = + deltaTablesApi.createTable(catalog, schema, toSdkCreateTableRequest(request)); + return response == null ? null : toTableMetadata(request.getName(), response.getMetadata()); } catch (ApiException e) { + String table = request.getName() != null ? request.getName() : ""; throw new IOException( - String.format("Failed to create table %s.%s.%s (HTTP %s): %s", - catalog, schema, name, e.getCode(), e.getResponseBody()), e); + String.format( + "Failed to create table %s.%s.%s via UC Delta Rest Catalog API (HTTP %s): %s", + catalog, + schema, + table, + e.getCode(), + e.getResponseBody()), + e); } } - // =========================== - // Response Conversion Methods - // =========================== + /** + * Updates a Delta table in Unity Catalog through the UC Delta Rest Catalog API. + */ + @Override + public AbstractMetadata updateTable( + String catalog, + String schema, + String table, + UpdateTableRequest request) + throws IOException, CommitFailedException, UCCommitCoordinatorException { + ensureUCDeltaRestCatalogApiSupported("updateTable"); + Objects.requireNonNull(catalog, "catalog must not be null."); + Objects.requireNonNull(schema, "schema must not be null."); + Objects.requireNonNull(table, "table must not be null."); + Objects.requireNonNull(request, "request must not be null."); - private UCDeltaModels.StagingTableInfo toStagingTableInfo(StagingTableResponse r) { - UCDeltaModels.TableType tableType = null; - if (r.getTableType() != null) { - tableType = UCDeltaModels.TableType.valueOf(r.getTableType().getValue()); + try { + io.unitycatalog.client.delta.model.LoadTableResponse response = + deltaTablesApi.updateTable(catalog, schema, table, toSdkUpdateTableRequest(request)); + return response == null ? null : toTableMetadata(table, response.getMetadata()); + } catch (ApiException e) { + handleUpdateTableException(catalog, schema, table, e); + throw new IllegalStateException("unreachable"); } - - return new UCDeltaModels.StagingTableInfo( - r.getTableId() != null ? r.getTableId().toString() : null, - tableType, - r.getLocation(), - toDeltaProtocol(r.getRequiredProtocol()), - toDeltaProtocol(r.getSuggestedProtocol()), - r.getRequiredProperties(), - r.getSuggestedProperties()); } - private UCDeltaModels.DeltaProtocol toDeltaProtocol(StagingTableResponseRequiredProtocol p) { - if (p == null) { + private static TableMetadataAdapter toTableMetadata(String tableName, TableMetadata metadata) { + if (metadata == null) { return null; } - UCDeltaModels.DeltaProtocol protocol = new UCDeltaModels.DeltaProtocol() - .minReaderVersion(p.getMinReaderVersion()) - .minWriterVersion(p.getMinWriterVersion()); - if (p.getReaderFeatures() != null) { - protocol.readerFeatures(p.getReaderFeatures()); + return new TableMetadataAdapter(tableName, metadata); + } + + private static StagingTableResponse toStagingTableResponse( + io.unitycatalog.client.delta.model.StagingTableResponse response) { + if (response == null) { + return null; } - if (p.getWriterFeatures() != null) { - protocol.writerFeatures(p.getWriterFeatures()); + return new StagingTableResponse( + response.getTableId(), + toTableType(response.getTableType()), + response.getLocation(), + toProtocol(response.getRequiredProtocol()), + toProtocol(response.getSuggestedProtocol()), + response.getRequiredProperties(), + response.getSuggestedProperties()); + } + + private static DeltaProtocol toProtocol( + io.unitycatalog.client.delta.model.StagingTableResponseRequiredProtocol protocol) { + if (protocol == null) { + return null; } - return protocol; + return new DeltaProtocol() + .minReaderVersion(protocol.getMinReaderVersion()) + .minWriterVersion(protocol.getMinWriterVersion()) + .readerFeatures(protocol.getReaderFeatures()) + .writerFeatures(protocol.getWriterFeatures()); } - private UCDeltaModels.DeltaProtocol toDeltaProtocol(StagingTableResponseSuggestedProtocol p) { - if (p == null) { + private static DeltaProtocol toProtocol( + io.unitycatalog.client.delta.model.StagingTableResponseSuggestedProtocol protocol) { + if (protocol == null) { return null; } - UCDeltaModels.DeltaProtocol protocol = new UCDeltaModels.DeltaProtocol(); - if (p.getReaderFeatures() != null) { - protocol.readerFeatures(p.getReaderFeatures()); + return new DeltaProtocol() + .readerFeatures(protocol.getReaderFeatures()) + .writerFeatures(protocol.getWriterFeatures()); + } + + private static io.unitycatalog.client.delta.model.CreateTableRequest toSdkCreateTableRequest( + CreateTableRequest request) { + io.unitycatalog.client.delta.model.CreateTableRequest sdkRequest = + new io.unitycatalog.client.delta.model.CreateTableRequest(); + sdkRequest + .name(request.getName()) + .location(request.getLocation()) + .tableType(toSdkTableType(request.getTableType())) + .dataSourceFormat(toSdkDataSourceFormat(request.getDataSourceFormat())) + .comment(request.getComment()) + .columns(toSdkStructType(request.getSchemaString())) + .partitionColumns(request.getPartitionColumns()) + .protocol(toSdkDeltaProtocol(request.getProtocol())) + .properties(request.getProperties()); + sdkRequest.lastCommitTimestampMs(request.getLastCommitTimestampMs()); + return sdkRequest; + } + + private static io.unitycatalog.client.delta.model.UpdateTableRequest toSdkUpdateTableRequest( + UpdateTableRequest request) { + List requirements = new ArrayList<>(); + for (TableRequirement requirement : request.getRequirements()) { + requirements.add(toSdkTableRequirement(requirement)); } - if (p.getWriterFeatures() != null) { - protocol.writerFeatures(p.getWriterFeatures()); + List updates = new ArrayList<>(); + for (TableUpdate update : request.getUpdates()) { + updates.add(toSdkTableUpdate(update)); } - return protocol; + return new io.unitycatalog.client.delta.model.UpdateTableRequest() + .requirements(requirements) + .updates(updates); } - // =========================== - // SDK Conversion Methods - // =========================== + private static io.unitycatalog.client.delta.model.TableRequirement toSdkTableRequirement( + TableRequirement requirement) { + Objects.requireNonNull(requirement, "requirement must not be null."); + switch (requirement.getType()) { + case ASSERT_TABLE_UUID: + return new io.unitycatalog.client.delta.model.AssertTableUUID() + .uuid(requirement.getUuid()); + case ASSERT_ETAG: + return new io.unitycatalog.client.delta.model.AssertEtag() + .etag(requirement.getEtag()); + default: + throw new IllegalArgumentException("Unsupported UC Delta table requirement: " + + requirement.getType()); + } + } - private DeltaCommit toSDKDeltaCommit(Commit c) { - Objects.requireNonNull(c, "commit must not be null"); - Objects.requireNonNull(c.getFileStatus(), "commit fileStatus must not be null"); - return new DeltaCommit() - .version(c.getVersion()) - .timestamp(c.getCommitTimestamp()) - .fileName(c.getFileStatus().getPath().getName()) - .fileSize(c.getFileStatus().getLen()) - .fileModificationTimestamp(c.getFileStatus().getModificationTime()); + private static io.unitycatalog.client.delta.model.TableUpdate toSdkTableUpdate( + TableUpdate update) { + Objects.requireNonNull(update, "update must not be null."); + switch (update.getAction()) { + case SET_PROPERTIES: + return new io.unitycatalog.client.delta.model.SetPropertiesUpdate() + .updates(update.getPropertyUpdates()); + case REMOVE_PROPERTIES: + return new io.unitycatalog.client.delta.model.RemovePropertiesUpdate() + .removals(update.getPropertyRemovals()); + case SET_PROTOCOL: + return new io.unitycatalog.client.delta.model.SetProtocolUpdate() + .protocol(toSdkDeltaProtocol(update.getProtocol())); + case SET_COLUMNS: + return new io.unitycatalog.client.delta.model.SetSchemaUpdate() + .columns(toSdkStructType(update.getSchemaString())); + case SET_PARTITION_COLUMNS: + return new io.unitycatalog.client.delta.model.SetPartitionColumnsUpdate() + .partitionColumns(update.getPartitionColumns()); + case SET_TABLE_COMMENT: + return new io.unitycatalog.client.delta.model.SetTableCommentUpdate() + .comment(update.getComment()); + case ADD_COMMIT: + return new io.unitycatalog.client.delta.model.AddCommitUpdate() + .commit(toSdkDeltaCommit(update.getCommit())) + .uniform(toSdkUniformMetadata(update.getUniform())); + case SET_LATEST_BACKFILLED_VERSION: + return new io.unitycatalog.client.delta.model.SetLatestBackfilledVersionUpdate() + .latestPublishedVersion(update.getLatestPublishedVersion()); + case UPDATE_METADATA_SNAPSHOT_VERSION: + return new io.unitycatalog.client.delta.model.UpdateSnapshotVersionUpdate() + .lastCommitVersion(update.getLastCommitVersion()) + .lastCommitTimestampMs(update.getLastCommitTimestampMs()); + default: + throw new IllegalArgumentException("Unsupported UC Delta table update: " + + update.getAction()); + } } - private DeltaProtocol toSDKDeltaProtocol(AbstractProtocol p) { - DeltaProtocol protocol = new DeltaProtocol() - .minReaderVersion(p.getMinReaderVersion()) - .minWriterVersion(p.getMinWriterVersion()); - if (p.getReaderFeatures() != null && !p.getReaderFeatures().isEmpty()) { - protocol.readerFeatures(new ArrayList<>(p.getReaderFeatures())); + private static TableType toTableType( + io.unitycatalog.client.delta.model.TableType tableType) { + if (tableType == null) { + return null; } - if (p.getWriterFeatures() != null && !p.getWriterFeatures().isEmpty()) { - protocol.writerFeatures(new ArrayList<>(p.getWriterFeatures())); + switch (tableType) { + case MANAGED: + return TableType.MANAGED; + case EXTERNAL: + return TableType.EXTERNAL; + default: + throw new IllegalArgumentException("Unsupported UC Delta table type: " + tableType); } - return protocol; } - private UniformMetadata toSDKUniformMetadata( - io.delta.storage.commit.uniform.UniformMetadata uniform) { - UniformMetadata ucUniform = new UniformMetadata(); - uniform.getIcebergMetadata().ifPresent(iceberg -> { - UniformMetadataIceberg ucIceberg = new UniformMetadataIceberg() - .metadataLocation(iceberg.getMetadataLocation()) - .convertedDeltaVersion(iceberg.getConvertedDeltaVersion()) - .convertedDeltaTimestamp(parseTimestampToEpochMs( - iceberg.getConvertedDeltaTimestamp())); - iceberg.getBaseConvertedDeltaVersion().ifPresent( - ucIceberg::baseConvertedDeltaVersion); - ucUniform.iceberg(ucIceberg); - }); - return ucUniform; + private static io.unitycatalog.client.delta.model.TableType toSdkTableType( + TableType tableType) { + if (tableType == null) { + return null; + } + switch (tableType) { + case MANAGED: + return io.unitycatalog.client.delta.model.TableType.MANAGED; + case EXTERNAL: + return io.unitycatalog.client.delta.model.TableType.EXTERNAL; + default: + throw new IllegalArgumentException("Unsupported UC Delta table type: " + tableType); + } } - /** - * Parses a timestamp string to epoch milliseconds. Handles both numeric strings (already epoch - * millis) and ISO-8601 datetime strings (e.g. "2025-01-04T03:13:11.423Z"). - */ - private Long parseTimestampToEpochMs(String timestamp) { - if (timestamp == null) { + private static io.unitycatalog.client.delta.model.DataSourceFormat toSdkDataSourceFormat( + DataSourceFormat format) { + if (format == null) { + return null; + } + switch (format) { + case DELTA: + return io.unitycatalog.client.delta.model.DataSourceFormat.DELTA; + case ICEBERG: + return io.unitycatalog.client.delta.model.DataSourceFormat.ICEBERG; + default: + throw new IllegalArgumentException("Unsupported UC Delta data source format: " + format); + } + } + + private static io.unitycatalog.client.delta.model.StructType toSdkStructType( + String schemaString) { + if (schemaString == null) { return null; } try { - return Long.parseLong(timestamp); - } catch (NumberFormatException e) { - return java.time.Instant.parse(timestamp).toEpochMilli(); + return DELTA_TYPE_OBJECT_MAPPER.treeToValue( + normalizeDeltaSchemaJson(DELTA_TYPE_OBJECT_MAPPER.readTree(schemaString)), + io.unitycatalog.client.delta.model.StructType.class); + } catch (JsonProcessingException e) { + throw new IllegalArgumentException("Failed to deserialize Delta schema JSON.", e); } } - /** - * Compares old and new metadata, adding the appropriate UC SDK update items to the request for - * any fields that changed. - */ - private void addMetadataUpdates( - UpdateTableRequest request, - AbstractMetadata oldMetadata, - AbstractMetadata newMetadata) { - if (!Objects.equals(oldMetadata.getSchemaString(), newMetadata.getSchemaString())) { - request.addUpdatesItem(new SetSchemaUpdate() - .action("set-columns") - .columns(parseSchemaString(newMetadata.getSchemaString()))); - } - if (!Objects.equals(oldMetadata.getPartitionColumns(), newMetadata.getPartitionColumns())) { - request.addUpdatesItem(new SetPartitionColumnsUpdate() - .action("set-partition-columns") - .partitionColumns(newMetadata.getPartitionColumns())); - } - if (!Objects.equals(oldMetadata.getDescription(), newMetadata.getDescription())) { - request.addUpdatesItem(new SetTableCommentUpdate() - .action("set-table-comment") - .comment(newMetadata.getDescription())); - } - - Map oldConfig = oldMetadata.getConfiguration() != null - ? oldMetadata.getConfiguration() : Collections.emptyMap(); - Map newConfig = newMetadata.getConfiguration() != null - ? newMetadata.getConfiguration() : Collections.emptyMap(); - - if (!Objects.equals(oldConfig, newConfig)) { - Map toSet = new LinkedHashMap<>(); - for (Map.Entry entry : newConfig.entrySet()) { - if (!Objects.equals(entry.getValue(), oldConfig.get(entry.getKey()))) { - toSet.put(entry.getKey(), entry.getValue()); - } + // Spark stores array/map schema fields in camelCase; the UC Delta API SDK model uses the wire + // names from the OpenAPI spec. Normalize only those field names before binding to SDK models. + private static JsonNode normalizeDeltaSchemaJson(JsonNode node) { + if (node == null) { + return null; + } + if (node.isArray()) { + for (JsonNode child : node) { + normalizeDeltaSchemaJson(child); } - if (!toSet.isEmpty()) { - request.addUpdatesItem(new SetPropertiesUpdate() - .action("set-properties") - .updates(toSet)); + } else if (node.isObject()) { + ObjectNode object = (ObjectNode) node; + renameField(object, "elementType", "element-type"); + renameField(object, "containsNull", "contains-null"); + renameField(object, "keyType", "key-type"); + renameField(object, "valueType", "value-type"); + renameField(object, "valueContainsNull", "value-contains-null"); + for (JsonNode child : object) { + normalizeDeltaSchemaJson(child); } + } + return node; + } - List toRemove = new ArrayList<>(); - for (String key : oldConfig.keySet()) { - if (!newConfig.containsKey(key)) { - toRemove.add(key); - } - } - if (!toRemove.isEmpty()) { - request.addUpdatesItem(new RemovePropertiesUpdate() - .action("remove-properties") - .removals(toRemove)); + private static void renameField(ObjectNode object, String from, String to) { + if (object.has(from)) { + if (!object.has(to)) { + object.set(to, object.get(from)); } + object.remove(from); } } - private StructType toSDKStructType(List columns) { - StructType structType = new StructType(); - for (ColumnDef col : columns) { - structType.addFieldsItem(new StructField() - .name(col.getName()) - .nullable(col.isNullable()) - .type(toSDKDeltaType(col))); + private static io.unitycatalog.client.delta.model.DeltaProtocol toSdkDeltaProtocol( + DeltaProtocol protocol) { + if (protocol == null) { + return null; } - return structType; + return new io.unitycatalog.client.delta.model.DeltaProtocol() + .minReaderVersion(protocol.getMinReaderVersion()) + .minWriterVersion(protocol.getMinWriterVersion()) + .readerFeatures(protocol.getReaderFeatures()) + .writerFeatures(protocol.getWriterFeatures()); } - private PrimitiveType toSDKDeltaType(ColumnDef col) { - if (!PRIMITIVE_TYPE_NAMES.contains(col.getTypeName())) { - throw new UnsupportedOperationException( - "Complex column type '" + col.getTypeName() + "' for column '" + col.getName() + - "' is not yet supported. Only primitive types are supported."); + private static io.unitycatalog.client.delta.model.DeltaCommit toSdkDeltaCommit( + UCDeltaModels.DeltaCommit commit) { + if (commit == null) { + return null; } - return new PrimitiveType().type(col.getTypeText()); + return new io.unitycatalog.client.delta.model.DeltaCommit() + .version(commit.getVersion()) + .timestamp(commit.getTimestamp()) + .fileName(commit.getFileName()) + .fileSize(commit.getFileSize()) + .fileModificationTimestamp(commit.getFileModificationTimestamp()); } - private StructType parseSchemaString(String schemaString) { - // TODO: implement full Delta schema string -> StructType conversion - throw new UnsupportedOperationException( - "Delta schema string to StructType conversion is not yet implemented."); + private static io.unitycatalog.client.delta.model.UniformMetadata toSdkUniformMetadata( + UniformMetadata uniform) { + if (uniform == null || !uniform.getIcebergMetadata().isPresent()) { + return null; + } + IcebergMetadata iceberg = uniform.getIcebergMetadata().get(); + io.unitycatalog.client.delta.model.UniformMetadataIceberg sdkIceberg = + new io.unitycatalog.client.delta.model.UniformMetadataIceberg() + .metadataLocation(iceberg.getMetadataLocation()) + .convertedDeltaVersion(iceberg.getConvertedDeltaVersion()) + .convertedDeltaTimestamp(Long.parseLong(iceberg.getConvertedDeltaTimestamp())) + .baseConvertedDeltaVersion(iceberg.getBaseConvertedDeltaVersion().orElse(null)); + return new io.unitycatalog.client.delta.model.UniformMetadata().iceberg(sdkIceberg); } - // =========================== - // Exception Handling - // =========================== - - private void handleUpdateTableException( - ApiException e, String catalog, String schema, String table) - throws IOException, CommitFailedException, UCCommitCoordinatorException { + private static void handleUpdateTableException( + String catalog, + String schema, + String table, + ApiException e) + throws CommitFailedException, UCCommitCoordinatorException, IOException { int statusCode = e.getCode(); String responseBody = e.getResponseBody(); - switch (statusCode) { + case HTTP_BAD_REQUEST: + throw new CommitFailedException( + false /* retryable */, + false /* conflict */, + "Invalid UC Delta updateTable request: " + responseBody, + e); + case HTTP_NOT_FOUND: + throw new InvalidTargetTableException( + String.format( + "Invalid UC Delta target table %s.%s.%s: %s", + catalog, + schema, + table, + responseBody)); case HTTP_CONFLICT: throw new CommitFailedException( true /* retryable */, true /* conflict */, - String.format("Update conflict for %s.%s.%s: %s", - catalog, schema, table, responseBody), + "UC Delta updateTable conflict: " + responseBody, e); - case HTTP_NOT_FOUND: - throw new InvalidTargetTableException( - String.format("Table not found %s.%s.%s: %s", - catalog, schema, table, responseBody)); + case HTTP_TOO_MANY_REQUESTS: + throw new CommitLimitReachedException( + "UC Delta updateTable commit limit reached: " + responseBody); default: throw new IOException( - String.format("Failed to update table %s.%s.%s (HTTP %s): %s", - catalog, schema, table, statusCode, responseBody), e); + String.format( + "Failed to update table %s.%s.%s via UC Delta Rest Catalog API (HTTP %s): %s", + catalog, + schema, + table, + statusCode, + responseBody), + e); } } - // =========================== - // Inner Classes - // =========================== - /** - * Adapts a UC SDK {@link TableMetadata} to {@link AbstractMetadata}. + * Adapts UC Delta SDK table metadata to Delta's storage-level metadata interface. + * + *

The UC SDK schema stays hidden behind this client implementation. Callers see the schema + * through Delta's storage-level schema JSON. */ - private static final class DeltaTableMetadata implements AbstractMetadata { + public static final class TableMetadataAdapter implements AbstractMetadata { + private final String tableName; + private final TableMetadata delegate; + private final String schemaString; + + private TableMetadataAdapter(String tableName, TableMetadata delegate) { + this.tableName = Objects.requireNonNull(tableName, "tableName must not be null."); + this.delegate = Objects.requireNonNull(delegate, "delegate must not be null."); + this.schemaString = toSchemaString( + Objects.requireNonNull(delegate.getColumns(), "UC Delta table schema is missing.")); + } + + public String getTableType() { + return delegate.getTableType() == null ? null : delegate.getTableType().getValue(); + } - private final String name; - private final TableMetadata m; + public UUID getTableUuid() { + return delegate.getTableUuid(); + } - DeltaTableMetadata(String name, TableMetadata m) { - this.name = name; - this.m = m; + public String getLocation() { + return delegate.getLocation(); } @Override public String getId() { - return m.getTableUuid() != null ? m.getTableUuid().toString() : null; + return delegate.getTableUuid() == null ? null : delegate.getTableUuid().toString(); } @Override public String getName() { - return name; + return tableName; } @Override @@ -596,7 +718,9 @@ public String getDescription() { @Override public String getProvider() { - return m.getDataSourceFormat() != null ? m.getDataSourceFormat().getValue() : null; + return delegate.getDataSourceFormat() == null + ? null + : delegate.getDataSourceFormat().getValue().toLowerCase(Locale.ROOT); } @Override @@ -606,43 +730,38 @@ public Map getFormatOptions() { @Override public String getSchemaString() { - return m.getColumns() != null ? m.getColumns().toString() : null; + return schemaString; } @Override public List getPartitionColumns() { - return m.getPartitionColumns() != null - ? m.getPartitionColumns() : Collections.emptyList(); + return delegate.getPartitionColumns(); } @Override public Map getConfiguration() { - return m.getProperties() != null ? m.getProperties() : Collections.emptyMap(); + return delegate.getProperties(); } @Override public Long getCreatedTime() { - return m.getCreatedTime(); + return delegate.getCreatedTime(); } + } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof DeltaTableMetadata)) return false; - DeltaTableMetadata that = (DeltaTableMetadata) o; - return Objects.equals(getId(), that.getId()) - && Objects.equals(getName(), that.getName()) - && Objects.equals(getProvider(), that.getProvider()) - && Objects.equals(getSchemaString(), that.getSchemaString()) - && Objects.equals(getPartitionColumns(), that.getPartitionColumns()) - && Objects.equals(getConfiguration(), that.getConfiguration()) - && Objects.equals(getCreatedTime(), that.getCreatedTime()); + private static String toSchemaString( + io.unitycatalog.client.delta.model.StructType schema) { + try { + return DELTA_SCHEMA_OBJECT_MAPPER.writeValueAsString(schema); + } catch (JsonProcessingException e) { + throw new IllegalArgumentException("Failed to serialize UC Delta schema.", e); } + } - @Override - public int hashCode() { - return Objects.hash(getId(), getName(), getProvider(), getSchemaString(), - getPartitionColumns(), getConfiguration(), getCreatedTime()); - } + @Override + public void close() throws IOException { + this.closed = true; + this.deltaTablesApi = null; + super.close(); } } diff --git a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCTokenBasedApiClientProvider.java b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCTokenBasedApiClientProvider.java new file mode 100644 index 00000000000..37ac19c8719 --- /dev/null +++ b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCTokenBasedApiClientProvider.java @@ -0,0 +1,76 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.storage.commit.uccommitcoordinator; + +import io.unitycatalog.client.ApiClient; +import io.unitycatalog.client.ApiClientBuilder; +import io.unitycatalog.client.auth.TokenProvider; +import io.unitycatalog.client.retry.JitterDelayRetryPolicy; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +abstract class UCTokenBasedApiClientProvider { + private ApiClient apiClient; + + protected UCTokenBasedApiClientProvider( + String baseUri, + TokenProvider tokenProvider, + Map appVersions) { + this.apiClient = buildApiClient(baseUri, tokenProvider, appVersions); + } + + protected UCTokenBasedApiClientProvider( + String baseUri, + TokenProvider tokenProvider, + Map appVersions, + String catalog) { + // The catalog is consumed by subclasses that probe catalog-scoped Delta API support. + this(baseUri, tokenProvider, appVersions); + } + + private static ApiClient buildApiClient( + String baseUri, + TokenProvider tokenProvider, + Map appVersions) { + Objects.requireNonNull(baseUri, "baseUri must not be null"); + Objects.requireNonNull(tokenProvider, "tokenProvider must not be null"); + Objects.requireNonNull(appVersions, "appVersions must not be null"); + + ApiClientBuilder builder = ApiClientBuilder.create() + .uri(baseUri) + .tokenProvider(tokenProvider) + .retryPolicy(JitterDelayRetryPolicy.builder().build()); + + appVersions.forEach((name, version) -> { + if (version != null) { + builder.addAppVersion(name, version); + } + }); + + return builder.build(); + } + + protected ApiClient getApiClient() { + return apiClient; + } + + public void close() throws IOException { + apiClient = null; + } +} diff --git a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCTokenBasedRestClient.java b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCTokenBasedRestClient.java index 67b582a1b00..a60f9d67290 100644 --- a/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCTokenBasedRestClient.java +++ b/storage/src/main/java/io/delta/storage/commit/uccommitcoordinator/UCTokenBasedRestClient.java @@ -26,7 +26,6 @@ import io.delta.storage.commit.uniform.IcebergMetadata; import io.delta.storage.commit.uniform.UniformMetadata; import io.unitycatalog.client.ApiClient; -import io.unitycatalog.client.ApiClientBuilder; import io.unitycatalog.client.ApiException; import io.unitycatalog.client.api.DeltaCommitsApi; import io.unitycatalog.client.api.MetastoresApi; @@ -83,7 +82,7 @@ * @see GetCommitsResponse * @see TokenProvider */ -public class UCTokenBasedRestClient implements UCClient { +public class UCTokenBasedRestClient extends UCTokenBasedApiClientProvider implements UCClient { private DeltaCommitsApi deltaCommitsApi; private MetastoresApi metastoresApi; @@ -109,21 +108,20 @@ public UCTokenBasedRestClient( String baseUri, TokenProvider tokenProvider, Map appVersions) { - Objects.requireNonNull(baseUri, "baseUri must not be null"); - Objects.requireNonNull(tokenProvider, "tokenProvider must not be null"); - Objects.requireNonNull(appVersions, "appVersions must not be null"); - - ApiClientBuilder builder = ApiClientBuilder.create() - .uri(baseUri) - .tokenProvider(tokenProvider); - - appVersions.forEach((name, version) -> { - if (version != null) { - builder.addAppVersion(name, version); - } - }); + super(baseUri, tokenProvider, appVersions); + ApiClient apiClient = getApiClient(); + this.deltaCommitsApi = new DeltaCommitsApi(apiClient); + this.metastoresApi = new MetastoresApi(apiClient); + this.tablesApi = new TablesApi(apiClient); + } - ApiClient apiClient = builder.build(); + public UCTokenBasedRestClient( + String baseUri, + TokenProvider tokenProvider, + Map appVersions, + String catalog) { + super(baseUri, tokenProvider, appVersions, catalog); + ApiClient apiClient = getApiClient(); this.deltaCommitsApi = new DeltaCommitsApi(apiClient); this.metastoresApi = new MetastoresApi(apiClient); this.tablesApi = new TablesApi(apiClient); @@ -233,6 +231,7 @@ public GetCommitsResponse getCommits( @Override public void close() throws IOException { + super.close(); // Nulling out the API instances makes them eligible for GC. Once garbage collected, // the underlying connection pool is freed and destroyed. this.deltaCommitsApi = null; diff --git a/storage/src/test/scala/io/delta/storage/commit/uccommitcoordinator/UCDeltaTokenBasedRestClientSuite.scala b/storage/src/test/scala/io/delta/storage/commit/uccommitcoordinator/UCDeltaTokenBasedRestClientSuite.scala index 04d55788f98..2f5e5e85420 100644 --- a/storage/src/test/scala/io/delta/storage/commit/uccommitcoordinator/UCDeltaTokenBasedRestClientSuite.scala +++ b/storage/src/test/scala/io/delta/storage/commit/uccommitcoordinator/UCDeltaTokenBasedRestClientSuite.scala @@ -16,18 +16,26 @@ package io.delta.storage.commit.uccommitcoordinator +import java.io.IOException import java.net.{InetSocketAddress, URI} import java.nio.charset.StandardCharsets -import java.util.{Collections, Optional, Set => JSet} +import java.util.{Arrays => JArrays, Collections, Optional, UUID} + +import io.delta.storage.commit.{Commit, GetCommitsResponse, TableIdentifier} +import io.delta.storage.commit.actions.{AbstractMetadata, AbstractProtocol} +import io.delta.storage.commit.uccommitcoordinator.UCDeltaModels.{ + CreateTableRequest, + DeltaCommit, + DeltaProtocol, + TableRequirement, + TableUpdate, + UpdateTableRequest +} +import io.delta.storage.commit.uniform.UniformMetadata import com.fasterxml.jackson.databind.ObjectMapper import com.sun.net.httpserver.{HttpExchange, HttpServer} -import io.delta.storage.commit.{Commit, CommitFailedException, TableIdentifier} -import io.delta.storage.commit.actions.{AbstractMetadata, AbstractProtocol} -import io.delta.storage.commit.uniform.{IcebergMetadata, UniformMetadata} import io.unitycatalog.client.auth.TokenProvider - -import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.http.HttpStatus import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} import org.scalatest.funsuite.AnyFunSuite @@ -37,52 +45,51 @@ class UCDeltaTokenBasedRestClientSuite with BeforeAndAfterAll with BeforeAndAfterEach { - private val testTableId = "550e8400-e29b-41d4-a716-446655440000" private val testMetastoreId = "test-metastore-123" - private val testCatalog = "cat" - private val testSchema = "sch" - private val testTable = "tbl" - private val testIdentifier = new TableIdentifier(testCatalog, testSchema, testTable) + private val objectMapper = new ObjectMapper() private var server: HttpServer = _ private var serverUri: String = _ - private var deltaHandler: (HttpExchange, String) => Unit = _ - private val objectMapper = new ObjectMapper() - - private val metastoreJson = s"""{"metastore_id":"$testMetastoreId"}""" + private var deltaConfigHandler: HttpExchange => Unit = _ + private var deltaTablesHandler: HttpExchange => Unit = _ + private var legacyTablesHandler: HttpExchange => Unit = _ override def beforeAll(): Unit = { server = HttpServer.create(new InetSocketAddress("localhost", 0), 0) - - server.createContext("/", exchange => { - val body = readBody(exchange) - val path = exchange.getRequestURI.getPath - if (path.contains("/metastore_summary")) { - sendJson(exchange, HttpStatus.SC_OK, metastoreJson) - } else if (deltaHandler != null) { - deltaHandler(exchange, body) - } else { - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) - } - exchange.close() - }) - + server.createContext( + "/api/2.1/unity-catalog/delta/v1/config", + exchange => { + if (deltaConfigHandler != null) deltaConfigHandler(exchange) + else sendJson(exchange, HttpStatus.SC_NOT_FOUND, "{}") + exchange.close() + }) + server.createContext( + "/api/2.1/unity-catalog/delta/v1/catalogs", + exchange => { + if (deltaTablesHandler != null) deltaTablesHandler(exchange) + else sendJson(exchange, HttpStatus.SC_NOT_FOUND, "{}") + exchange.close() + }) + server.createContext( + "/api/2.1/unity-catalog/tables", + exchange => { + if (legacyTablesHandler != null) legacyTablesHandler(exchange) + else sendJson(exchange, HttpStatus.SC_NOT_FOUND, "{}") + exchange.close() + }) server.start() serverUri = s"http://localhost:${server.getAddress.getPort}" } override def afterAll(): Unit = if (server != null) server.stop(0) - override def beforeEach(): Unit = { deltaHandler = null } - - // --------------- helpers --------------- - private def loadTableJson( - tableUuid: String = testTableId, - format: String = "DELTA"): String = - s"""{"metadata":{"table-uuid":"$tableUuid","data-source-format":"$format",""" + - s""""properties":{"key1":"val1"},"partition-columns":["date"],"created-time":1000}}""" + override def beforeEach(): Unit = { + deltaConfigHandler = null + deltaTablesHandler = null + legacyTablesHandler = null + } - private def readBody(exchange: HttpExchange): String = { + private def readRequestBody(exchange: HttpExchange): String = { val is = exchange.getRequestBody try new String(is.readAllBytes(), StandardCharsets.UTF_8) finally is.close() } @@ -95,492 +102,627 @@ class UCDeltaTokenBasedRestClientSuite exchange.getResponseBody.close() } - private def tokenProvider(): TokenProvider = new TokenProvider { + private def createTokenProvider(): TokenProvider = new TokenProvider { override def accessToken(): String = "mock-token" override def initialize(configs: java.util.Map[String, String]): Unit = {} override def configs(): java.util.Map[String, String] = Collections.emptyMap() } - private def withClient(fn: UCDeltaTokenBasedRestClient => Unit): Unit = { - val client = new UCDeltaTokenBasedRestClient( - serverUri, tokenProvider(), Collections.emptyMap()) - try fn(client) finally client.close() - } + private def createDeltaClient(): UCDeltaTokenBasedRestClient = + new UCDeltaTokenBasedRestClient(serverUri, createTokenProvider(), Collections.emptyMap(), "main") - private def createCommit(version: Long): Commit = { - val fs = new FileStatus(1024L, false, 1, 4096L, 9999L, - new Path(s"/path/_delta_log/_staged_commits/$version.uuid.json")) - new Commit(version, fs, 5000L) + private def assertJsonEquals(actual: String, expected: String): Unit = { + assert(objectMapper.readTree(actual) === objectMapper.readTree(expected)) } - private def metadata( - schema: String = """{"type":"struct"}""", - desc: String = "desc", - partitions: java.util.List[String] = Collections.emptyList(), - config: java.util.Map[String, String] = Collections.emptyMap()): AbstractMetadata = - new AbstractMetadata { - override def getId: String = "id" - override def getName: String = "name" - override def getDescription: String = desc - override def getProvider: String = "delta" - override def getFormatOptions: java.util.Map[String, String] = Collections.emptyMap() - override def getSchemaString: String = schema - override def getPartitionColumns: java.util.List[String] = partitions - override def getConfiguration: java.util.Map[String, String] = config - override def getCreatedTime: java.lang.Long = 0L - override def equals(obj: Any): Boolean = obj match { - case o: AbstractMetadata => - getId == o.getId && getName == o.getName && getDescription == o.getDescription && - getProvider == o.getProvider && getSchemaString == o.getSchemaString && - getPartitionColumns == o.getPartitionColumns && getConfiguration == o.getConfiguration - case _ => false - } - override def hashCode(): Int = - java.util.Objects.hash(getId, getName, getDescription, - getProvider, getSchemaString, getPartitionColumns, - getConfiguration) - } - - private def protocol(minReader: Int, minWriter: Int, - readerFeatures: JSet[String] = Collections.emptySet(), - writerFeatures: JSet[String] = Collections.emptySet()): AbstractProtocol = - new AbstractProtocol { - override def getMinReaderVersion: Int = minReader - override def getMinWriterVersion: Int = minWriter - override def getReaderFeatures: JSet[String] = readerFeatures - override def getWriterFeatures: JSet[String] = writerFeatures - override def equals(obj: Any): Boolean = obj match { - case o: AbstractProtocol => - getMinReaderVersion == o.getMinReaderVersion && - getMinWriterVersion == o.getMinWriterVersion && - getReaderFeatures == o.getReaderFeatures && getWriterFeatures == o.getWriterFeatures - case _ => false - } - override def hashCode(): Int = - java.util.Objects.hash(getMinReaderVersion: Integer, getMinWriterVersion: Integer, - getReaderFeatures, getWriterFeatures) - } - - // --------------- constructor tests --------------- + private def withDeltaClient(fn: UCDeltaTokenBasedRestClient => Unit): Unit = { + val client = new UCDeltaTokenBasedRestClient( + serverUri, + createTokenProvider(), + Collections.emptyMap()) + try fn(client) + finally client.close() + } - test("constructor validates required parameters") { - intercept[NullPointerException] { - new UCDeltaTokenBasedRestClient(null, tokenProvider(), Collections.emptyMap()) - } - intercept[NullPointerException] { - new UCDeltaTokenBasedRestClient(serverUri, null, Collections.emptyMap()) - } - intercept[NullPointerException] { - new UCDeltaTokenBasedRestClient(serverUri, tokenProvider(), null) + private def enableDeltaApiConfig(): Unit = { + deltaConfigHandler = exchange => { + assert(exchange.getRequestURI.getQuery.contains("catalog=main")) + assert(exchange.getRequestURI.getQuery.contains("protocol-versions=1.0")) + sendJson( + exchange, + HttpStatus.SC_OK, + """{ + | "endpoints": [ + | "GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}" + | ], + | "protocol-version": "1.0" + |}""".stripMargin) } } - // --------------- getMetastoreId --------------- - - test("getMetastoreId returns ID on success") { - withClient(c => assert(c.getMetastoreId() === testMetastoreId)) + private def loadTableResponseJson( + schemaJson: String = simpleSchemaJson, + includeColumns: Boolean = true): String = { + val columnsJson = if (includeColumns) { + s""", + | "columns": $schemaJson""".stripMargin + } else { + "" + } + s"""{ + | "metadata": { + | "name": "tbl", + | "catalog-name": "main", + | "schema-name": "default", + | "table-type": "MANAGED", + | "data-source-format": "DELTA", + | "table-uuid": "11111111-1111-1111-1111-111111111111", + | "location": "file:/tmp/uc/table", + | "created-time": 10, + | "updated-time": 11$columnsJson, + | "partition-columns": [], + | "properties": {} + | }, + | "commits": [] + |}""".stripMargin } - // --------------- loadTable --------------- - - test("loadTable returns AbstractMetadata with correct fields") { - withClient { c => - val m = c.loadTable(testCatalog, testSchema, testTable) - assert(m.getName === testTable) - assert(m.getId === testTableId) - assert(m.getProvider === "DELTA") - assert(m.getConfiguration.get("key1") === "val1") - assert(m.getPartitionColumns.get(0) === "date") - assert(m.getCreatedTime === 1000L) - } + private def simpleSchemaJson: String = + """{ + | "type": "struct", + | "fields": [ + | { + | "name": "id", + | "type": "long", + | "nullable": false, + | "metadata": {} + | } + | ] + |}""".stripMargin + + private def complexDeltaSchemaJson: String = + """{ + | "type": "struct", + | "fields": [ + | { + | "name": "id", + | "type": "long", + | "nullable": false, + | "metadata": { + | "delta.columnMapping.id": 1, + | "delta.columnMapping.physicalName": "col-1", + | "aliases": ["identifier", "primary_key"], + | "flags": [true, false], + | "weights": [1.5, 2.5], + | "nested": {"owner": "uc"}, + | "nestedArray": [{"name": "left"}, {"name": "right"}] + | } + | }, + | { + | "name": "price", + | "type": "decimal(10,2)", + | "nullable": true, + | "metadata": {} + | }, + | { + | "name": "tags", + | "type": { + | "type": "array", + | "elementType": "string", + | "containsNull": true + | }, + | "nullable": true, + | "metadata": {} + | }, + | { + | "name": "scores", + | "type": { + | "type": "map", + | "keyType": "string", + | "valueType": { + | "type": "struct", + | "fields": [ + | { + | "name": "value", + | "type": "double", + | "nullable": false, + | "metadata": {"comment": "score value"} + | }, + | { + | "name": "timestamp", + | "type": "long", + | "nullable": true, + | "metadata": {} + | } + | ] + | }, + | "valueContainsNull": false + | }, + | "nullable": false, + | "metadata": {} + | }, + | { + | "name": "details", + | "type": { + | "type": "struct", + | "fields": [ + | { + | "name": "active", + | "type": "boolean", + | "nullable": false, + | "metadata": {} + | } + | ] + | }, + | "nullable": true, + | "metadata": {} + | } + | ] + |}""".stripMargin + + private def complexDeltaApiSchemaJson: String = complexDeltaSchemaJson + .replace("\"elementType\"", "\"element-type\"") + .replace("\"containsNull\"", "\"contains-null\"") + .replace("\"keyType\"", "\"key-type\"") + .replace("\"valueType\"", "\"value-type\"") + .replace("\"valueContainsNull\"", "\"value-contains-null\"") + + private def sparkSchemaJson: String = + """{ + | "type": "struct", + | "fields": [ + | { + | "name": "tags", + | "type": { + | "type": "array", + | "elementType": "string", + | "containsNull": true + | }, + | "nullable": true, + | "metadata": {} + | }, + | { + | "name": "scores", + | "type": { + | "type": "map", + | "keyType": "string", + | "valueType": "long", + | "valueContainsNull": false + | }, + | "nullable": false, + | "metadata": {} + | } + | ] + |}""".stripMargin + + test("UCDeltaClient defaults fail loudly for UC Delta Rest Catalog API") { + val client = new UCDeltaClient { + override def getMetastoreId(): String = testMetastoreId + override def commit( + tableId: String, + tableUri: URI, + tableIdentifier: TableIdentifier, + commit: Optional[Commit], + lastKnownBackfilledVersion: Optional[java.lang.Long], + oldMetadata: Optional[AbstractMetadata], + newMetadata: Optional[AbstractMetadata], + oldProtocol: Optional[AbstractProtocol], + newProtocol: Optional[AbstractProtocol], + uniform: Optional[UniformMetadata]): Unit = {} + override def getCommits( + tableId: String, + tableUri: URI, + startVersion: Optional[java.lang.Long], + endVersion: Optional[java.lang.Long]): GetCommitsResponse = null + override def finalizeCreate( + tableName: String, + catalogName: String, + schemaName: String, + storageLocation: String, + columns: java.util.List[UCClient.ColumnDef], + properties: java.util.Map[String, String]): Unit = {} + override def close(): Unit = {} + } + + assert(!client.supportsUCDeltaRestCatalogApi()) + assert(intercept[UnsupportedOperationException] { + client.loadTable("main", "default", "tbl") + }.getMessage === "loadTable requires UC Delta Rest Catalog API support.") + assert(intercept[UnsupportedOperationException] { + client.createStagingTable("main", "default", "tbl") + }.getMessage === "createStagingTable requires UC Delta Rest Catalog API support.") + assert(intercept[UnsupportedOperationException] { + client.createTable("main", "default", new CreateTableRequest().name("tbl")) + }.getMessage === "createTable requires UC Delta Rest Catalog API support.") + assert(intercept[UnsupportedOperationException] { + client.updateTable("main", "default", "tbl", new UpdateTableRequest()) + }.getMessage === "updateTable requires UC Delta Rest Catalog API support.") } - test("loadTable throws IOException on server error") { - deltaHandler = (exchange, _) => sendJson(exchange, 500, """{"error":"fail"}""") - withClient { c => - val e = intercept[java.io.IOException] { c.loadTable(testCatalog, testSchema, testTable) } - assert(e.getMessage.contains("HTTP 500")) + test("default constructor does not enable UC Delta Rest Catalog API") { + withDeltaClient { client => + assert(!client.supportsUCDeltaRestCatalogApi()) + assert(intercept[UnsupportedOperationException] { + client.loadTable("main", "default", "tbl") + }.getMessage === "loadTable requires UC Delta Rest Catalog API support.") + assert(intercept[UnsupportedOperationException] { + client.createStagingTable("main", "default", "tbl") + }.getMessage === "createStagingTable requires UC Delta Rest Catalog API support.") + assert(intercept[UnsupportedOperationException] { + client.createTable("main", "default", new CreateTableRequest().name("tbl")) + }.getMessage === "createTable requires UC Delta Rest Catalog API support.") + assert(intercept[UnsupportedOperationException] { + client.updateTable("main", "default", "tbl", new UpdateTableRequest()) + }.getMessage === "updateTable requires UC Delta Rest Catalog API support.") } } - // --------------- commit via updateTable --------------- - - test("commit sends ASSERT_TABLE_UUID, ADD_COMMIT, and SET_LATEST_BACKFILLED_VERSION") { - var captured: String = null - deltaHandler = (exchange, body) => { - if (exchange.getRequestMethod == "POST") { - captured = body - } - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) - } - - withClient { c => - c.commit(testTableId, new URI("s3://bucket/table"), testIdentifier, - Optional.of(createCommit(5L)), Optional.of(java.lang.Long.valueOf(3L)), - Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()) + test( + "catalog-aware constructor uses UC Delta Rest Catalog API when config lists required endpoints") { + enableDeltaApiConfig() + deltaTablesHandler = exchange => { + assert(exchange.getRequestURI.getPath === + "/api/2.1/unity-catalog/delta/v1/catalogs/main/schemas/default/tables/tbl") + sendJson(exchange, HttpStatus.SC_OK, loadTableResponseJson()) + } + legacyTablesHandler = exchange => fail(s"Unexpected legacy request: ${exchange.getRequestURI}") + + val client = createDeltaClient() + try { + assert(client.supportsUCDeltaRestCatalogApi()) + val metadata = client.loadTable("main", "default", "tbl") + assert(metadata.getId === "11111111-1111-1111-1111-111111111111") + assert(metadata.getName === "tbl") + assert(metadata.getProvider === "delta") + val adapter = metadata.asInstanceOf[UCDeltaTokenBasedRestClient.TableMetadataAdapter] + assert(adapter.getLocation === "file:/tmp/uc/table") + assert(adapter.getCreatedTime === Long.box(10L)) + assertJsonEquals(adapter.getSchemaString, simpleSchemaJson) + } finally { + client.close() } - - val json = objectMapper.readTree(captured) - val reqs = json.get("requirements") - assert(reqs.size() === 1) - assert(reqs.get(0).get("type").asText() === "assert-table-uuid") - assert(reqs.get(0).get("uuid").asText() === testTableId) - - val updates = json.get("updates") - val actions = (0 until updates.size()).map(i => updates.get(i).get("action").asText()).toSet - assert(actions === Set("add-commit", "set-latest-backfilled-version")) - - val addCommit = (0 until updates.size()).map(updates.get) - .find(_.get("action").asText() == "add-commit").get - assert(addCommit.get("commit").get("version").asLong() === 5L) - assert(addCommit.get("commit").get("timestamp").asLong() === 5000L) - assert(addCommit.get("commit").get("file-size").asLong() === 1024L) } - test("commit sends metadata diff updates") { - var captured: String = null - deltaHandler = (exchange, body) => { - if (exchange.getRequestMethod == "POST") { - captured = body - } - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) + test("loadTable converts UC SDK schema to Delta schema JSON") { + enableDeltaApiConfig() + deltaTablesHandler = exchange => { + assert(exchange.getRequestURI.getPath === + "/api/2.1/unity-catalog/delta/v1/catalogs/main/schemas/default/tables/tbl") + sendJson(exchange, HttpStatus.SC_OK, loadTableResponseJson(complexDeltaApiSchemaJson)) } - val oldMeta = metadata(desc = "old", - config = new java.util.HashMap[String, String]() { - put("a", "1"); put("b", "2") - }) - val newMeta = metadata(desc = "new", - config = new java.util.HashMap[String, String]() { - put("a", "1"); put("c", "3") - }) - - withClient { c => - c.commit(testTableId, new URI("s3://b/t"), testIdentifier, - Optional.of(createCommit(1L)), Optional.empty(), - Optional.of(oldMeta), Optional.of(newMeta), - Optional.empty(), Optional.empty(), Optional.empty()) - } - - val actions = { - val updates = objectMapper.readTree(captured).get("updates") - (0 until updates.size()).map(i => updates.get(i).get("action").asText()).toSet + val client = createDeltaClient() + try { + val schemaString = client.loadTable("main", "default", "tbl") + .asInstanceOf[UCDeltaTokenBasedRestClient.TableMetadataAdapter] + .getSchemaString + assertJsonEquals(schemaString, complexDeltaSchemaJson) + } finally { + client.close() } - assert(actions.contains("set-table-comment")) - assert(actions.contains("set-properties")) - assert(actions.contains("remove-properties")) } - test("commit sends SET_PROTOCOL when protocol changes") { - var captured: String = null - deltaHandler = (exchange, body) => { - if (exchange.getRequestMethod == "POST") { - captured = body + test("loadTable reports missing schema columns as IOException") { + enableDeltaApiConfig() + deltaTablesHandler = exchange => { + assert(exchange.getRequestURI.getPath === + "/api/2.1/unity-catalog/delta/v1/catalogs/main/schemas/default/tables/tbl") + sendJson( + exchange, + HttpStatus.SC_OK, + loadTableResponseJson(includeColumns = false)) + } + + val client = createDeltaClient() + try { + val error = intercept[IOException] { + client.loadTable("main", "default", "tbl") } - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) + assert(error.getMessage.contains("missing table schema columns")) + } finally { + client.close() } - - val oldProto = protocol(1, 2) - val newProto = protocol(3, 7, - java.util.Set.of("columnMapping"), java.util.Set.of("columnMapping", "v2Checkpoint")) - - withClient { c => - c.commit(testTableId, new URI("s3://b/t"), testIdentifier, - Optional.of(createCommit(1L)), Optional.empty(), - Optional.empty(), Optional.empty(), - Optional.of(oldProto), Optional.of(newProto), Optional.empty()) - } - - val updates = objectMapper.readTree(captured).get("updates") - val proto = (0 until updates.size()).map(updates.get) - .find(_.get("action").asText() == "set-protocol").get.get("protocol") - assert(proto.get("min-reader-version").asInt() === 3) - assert(proto.get("min-writer-version").asInt() === 7) } - test("commit skips metadata updates when old and new are equal") { - var captured: String = null - deltaHandler = (exchange, body) => { - if (exchange.getRequestMethod == "POST") { - captured = body + test( + "catalog-aware constructor fails UC Delta Rest Catalog API loadTable when config is unavailable") { + deltaConfigHandler = exchange => sendJson(exchange, HttpStatus.SC_NOT_FOUND, "{}") + deltaTablesHandler = exchange => + fail(s"Unexpected UC Delta Rest Catalog API request: ${exchange.getRequestURI}") + legacyTablesHandler = exchange => + fail(s"Unexpected legacy request: ${exchange.getRequestURI}") + + val client = createDeltaClient() + try { + assert(!client.supportsUCDeltaRestCatalogApi()) + val e = intercept[UnsupportedOperationException] { + client.loadTable("main", "default", "tbl") } - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) - } - - val m1 = metadata(desc = "same") - val m2 = metadata(desc = "same") - assert(m1 ne m2, "must be different objects") - - withClient { c => - c.commit(testTableId, new URI("s3://b/t"), testIdentifier, - Optional.of(createCommit(1L)), Optional.empty(), - Optional.of(m1), Optional.of(m2), - Optional.empty(), Optional.empty(), Optional.empty()) - } - - val updates = objectMapper.readTree(captured).get("updates") - val actions = (0 until updates.size()).map(i => updates.get(i).get("action").asText()).toSet - assert(!actions.contains("set-table-comment"), - "should skip metadata updates for equal metadata") - } - - test("commit skips protocol update when old and new are equal") { - var captured: String = null - deltaHandler = (exchange, body) => { - if (exchange.getRequestMethod == "POST") { - captured = body + assert(e.getMessage === "loadTable requires UC Delta Rest Catalog API support.") + val stagingError = intercept[UnsupportedOperationException] { + client.createStagingTable("main", "default", "tbl") } - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) - } - - val p1 = protocol(3, 7, java.util.Set.of("f1"), java.util.Set.of("f2")) - val p2 = protocol(3, 7, java.util.Set.of("f1"), java.util.Set.of("f2")) - assert(p1 ne p2, "must be different objects") - - withClient { c => - c.commit(testTableId, new URI("s3://b/t"), testIdentifier, - Optional.of(createCommit(1L)), Optional.empty(), - Optional.empty(), Optional.empty(), - Optional.of(p1), Optional.of(p2), Optional.empty()) + assert(stagingError.getMessage === + "createStagingTable requires UC Delta Rest Catalog API support.") + val createError = intercept[UnsupportedOperationException] { + client.createTable("main", "default", new CreateTableRequest().name("tbl")) + } + assert(createError.getMessage === "createTable requires UC Delta Rest Catalog API support.") + val updateError = intercept[UnsupportedOperationException] { + client.updateTable("main", "default", "tbl", new UpdateTableRequest()) + } + assert(updateError.getMessage === "updateTable requires UC Delta Rest Catalog API support.") + } finally { + client.close() } - - val updates = objectMapper.readTree(captured).get("updates") - val actions = (0 until updates.size()).map(i => updates.get(i).get("action").asText()).toSet - assert(!actions.contains("set-protocol"), "should skip protocol update for equal protocols") } - test("commit with uniform iceberg metadata (numeric timestamp)") { - var captured: String = null - deltaHandler = (exchange, body) => { - if (exchange.getRequestMethod == "POST") { - captured = body + test("createStagingTable and createTable call UC Delta Rest Catalog API endpoints") { + enableDeltaApiConfig() + var sawStagingCreate = false + var sawTableCreate = false + deltaTablesHandler = exchange => { + assert(exchange.getRequestMethod === "POST") + exchange.getRequestURI.getPath match { + case "/api/2.1/unity-catalog/delta/v1/catalogs/main/schemas/default/staging-tables" => + sawStagingCreate = true + val body = objectMapper.readTree(readRequestBody(exchange)) + assert(body.get("name").asText === "tbl") + sendJson( + exchange, + HttpStatus.SC_OK, + """{ + | "table-id": "22222222-2222-2222-2222-222222222222", + | "table-type": "MANAGED", + | "location": "s3://bucket/path/to/table", + | "storage-credentials": [], + | "required-protocol": { + | "min-reader-version": 1, + | "min-writer-version": 2 + | }, + | "required-properties": { + | "delta.feature.catalogManaged": "supported" + | } + |}""".stripMargin) + case "/api/2.1/unity-catalog/delta/v1/catalogs/main/schemas/default/tables" => + sawTableCreate = true + val body = objectMapper.readTree(readRequestBody(exchange)) + assert(body.get("name").asText === "tbl") + val fields = body.get("columns").get("fields") + val arrayType = fields.get(0).get("type") + assert(arrayType.has("element-type")) + assert(arrayType.has("contains-null")) + assert(!arrayType.has("elementType")) + assert(!arrayType.has("containsNull")) + val mapType = fields.get(1).get("type") + assert(mapType.has("key-type")) + assert(mapType.has("value-type")) + assert(mapType.has("value-contains-null")) + assert(!mapType.has("keyType")) + assert(!mapType.has("valueType")) + assert(!mapType.has("valueContainsNull")) + sendJson(exchange, HttpStatus.SC_OK, loadTableResponseJson()) + case path => + fail(s"Unexpected UC Delta Rest Catalog API request path: $path") } - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) } - val uniform = new UniformMetadata( - new IcebergMetadata("s3://bucket/v1.json", 42L, "1704337991423")) - - withClient { c => - c.commit(testTableId, new URI("s3://b/t"), testIdentifier, - Optional.of(createCommit(1L)), Optional.empty(), - Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), - Optional.of(uniform)) - } + val client = createDeltaClient() + try { + val staging = client.createStagingTable("main", "default", "tbl") + assert(staging.getTableId.toString === "22222222-2222-2222-2222-222222222222") + assert(staging.getLocation === "s3://bucket/path/to/table") + + val created = client.createTable( + "main", + "default", + new CreateTableRequest().name("tbl").schemaString(sparkSchemaJson)) + assert( + created.asInstanceOf[UCDeltaTokenBasedRestClient.TableMetadataAdapter].getLocation === + "file:/tmp/uc/table") + } finally { + client.close() + } + assert(sawStagingCreate) + assert(sawTableCreate) + } - val addCommit = { - val updates = objectMapper.readTree(captured).get("updates") - (0 until updates.size()).map(updates.get) - .find(_.get("action").asText() == "add-commit").get + test("updateTable calls UC Delta Rest Catalog API endpoint") { + enableDeltaApiConfig() + deltaTablesHandler = exchange => { + assert(exchange.getRequestMethod === "POST") + assert(exchange.getRequestURI.getPath === + "/api/2.1/unity-catalog/delta/v1/catalogs/main/schemas/default/tables/tbl") + val body = objectMapper.readTree(readRequestBody(exchange)) + val requirements = body.get("requirements") + assert(requirements.size() === 1) + assert(requirements.get(0).get("type").asText === "assert-table-uuid") + assert(requirements.get(0).get("uuid").asText === + "11111111-1111-1111-1111-111111111111") + + val updates = body.get("updates") + assert(updates.size() === 6) + val addCommit = updates.get(0) + assert(addCommit.get("action").asText === "add-commit") + assert(addCommit.get("commit").get("version").asLong === 7L) + assert(addCommit.get("commit").get("timestamp").asLong === 100L) + assert(addCommit.get("commit").get("file-name").asText === "0007.uuid.json") + assert(addCommit.get("commit").get("file-size").asLong === 32L) + assert(addCommit.get("commit").get("file-modification-timestamp").asLong === 200L) + + val setColumns = updates.get(1) + assert(setColumns.get("action").asText === "set-columns") + val fields = setColumns.get("columns").get("fields") + val arrayType = fields.get(0).get("type") + assert(arrayType.has("element-type")) + assert(arrayType.has("contains-null")) + assert(!arrayType.has("elementType")) + assert(!arrayType.has("containsNull")) + val mapType = fields.get(1).get("type") + assert(mapType.has("key-type")) + assert(mapType.has("value-type")) + assert(mapType.has("value-contains-null")) + assert(!mapType.has("keyType")) + assert(!mapType.has("valueType")) + assert(!mapType.has("valueContainsNull")) + + val setProtocol = updates.get(2) + assert(setProtocol.get("action").asText === "set-protocol") + assert(setProtocol.get("protocol").get("min-reader-version").asInt === 1) + assert(setProtocol.get("protocol").get("min-writer-version").asInt === 7) + assert(setProtocol.get("protocol").get("writer-features").get(0).asText === "domainMetadata") + + val setProperties = updates.get(3) + assert(setProperties.get("action").asText === "set-properties") + assert(setProperties.get("updates").get("delta.appendOnly").asText === "true") + val removeProperties = updates.get(4) + assert(removeProperties.get("action").asText === "remove-properties") + assert(removeProperties.get("removals").get(0).asText === "old.prop") + val latestBackfilled = updates.get(5) + assert(latestBackfilled.get("action").asText === "set-latest-backfilled-version") + assert(latestBackfilled.get("latest-published-version").asLong === 6L) + + sendJson(exchange, HttpStatus.SC_OK, loadTableResponseJson()) + } + + val client = createDeltaClient() + try { + val updated = client.updateTable( + "main", + "default", + "tbl", + new UpdateTableRequest() + .addRequirementsItem(TableRequirement.assertTableUuid( + UUID.fromString("11111111-1111-1111-1111-111111111111"))) + .addUpdatesItem(TableUpdate.addCommit( + new DeltaCommit() + .version(7L) + .timestamp(100L) + .fileName("0007.uuid.json") + .fileSize(32L) + .fileModificationTimestamp(200L), + null)) + .addUpdatesItem(TableUpdate.setColumns(sparkSchemaJson)) + .addUpdatesItem(TableUpdate.setProtocolUpdate( + new DeltaProtocol() + .minReaderVersion(1) + .minWriterVersion(7) + .writerFeatures(JArrays.asList("domainMetadata")))) + .addUpdatesItem(TableUpdate.setProperties( + Collections.singletonMap("delta.appendOnly", "true"))) + .addUpdatesItem(TableUpdate.removeProperties(JArrays.asList("old.prop"))) + .addUpdatesItem(TableUpdate.setLatestBackfilledVersion(6L))) + assert( + updated.asInstanceOf[UCDeltaTokenBasedRestClient.TableMetadataAdapter].getLocation === + "file:/tmp/uc/table") + } finally { + client.close() } - val iceberg = addCommit.get("uniform").get("iceberg") - assert(iceberg.get("metadata-location").asText() === "s3://bucket/v1.json") - assert(iceberg.get("converted-delta-version").asLong() === 42L) - assert(iceberg.get("converted-delta-timestamp").asLong() === 1704337991423L) } - test("commit with uniform iceberg metadata (ISO-8601 timestamp)") { - var captured: String = null - deltaHandler = (exchange, body) => { - if (exchange.getRequestMethod == "POST") { - captured = body + test("createStagingTable, createTable, and updateTable wrap HTTP errors") { + enableDeltaApiConfig() + deltaTablesHandler = exchange => + sendJson(exchange, HttpStatus.SC_INTERNAL_SERVER_ERROR, """{"error":"boom"}""") + + val client = createDeltaClient() + try { + val stagingError = intercept[java.io.IOException] { + client.createStagingTable("main", "default", "tbl") } - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) - } + assert(stagingError.getMessage.contains( + "Failed to create staging table main.default.tbl via UC Delta Rest Catalog API (HTTP 500)")) - val uniform = new UniformMetadata( - new IcebergMetadata("s3://bucket/v1.json", 42L, "2025-01-04T03:13:11.423Z")) + val createError = intercept[java.io.IOException] { + client.createTable("main", "default", new CreateTableRequest().name("tbl")) + } + assert(createError.getMessage.contains( + "Failed to create table main.default.tbl via UC Delta Rest Catalog API (HTTP 500)")) - withClient { c => - c.commit(testTableId, new URI("s3://b/t"), testIdentifier, - Optional.of(createCommit(1L)), Optional.empty(), - Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), - Optional.of(uniform)) - } + val unnamedCreateError = intercept[java.io.IOException] { + client.createTable("main", "default", new CreateTableRequest()) + } + assert(unnamedCreateError.getMessage.contains( + "Failed to create table main.default. via UC Delta Rest Catalog API (HTTP 500)")) - val addCommit = { - val updates = objectMapper.readTree(captured).get("updates") - (0 until updates.size()).map(updates.get) - .find(_.get("action").asText() == "add-commit").get + val updateError = intercept[java.io.IOException] { + client.updateTable("main", "default", "tbl", new UpdateTableRequest()) + } + assert(updateError.getMessage.contains( + "Failed to update table main.default.tbl via UC Delta Rest Catalog API (HTTP 500)")) + } finally { + client.close() } - val iceberg = addCommit.get("uniform").get("iceberg") - assert(iceberg.get("converted-delta-timestamp").asLong() === 1735960391423L) } - // --------------- error handling --------------- + test("createStagingTable, createTable, and updateTable validate required parameters") { + enableDeltaApiConfig() - test("commit throws CommitFailedException on 409") { - deltaHandler = (exchange, _) => { - if (exchange.getRequestMethod == "POST") { - sendJson(exchange, HttpStatus.SC_CONFLICT, - """{"error":"conflict"}""") - } else { - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) + val client = createDeltaClient() + try { + intercept[NullPointerException] { + client.createStagingTable(null, "default", "tbl") } - } - withClient { c => - val e = intercept[CommitFailedException] { - c.commit(testTableId, new URI("s3://b/t"), testIdentifier, - Optional.of(createCommit(1L)), Optional.empty(), Optional.empty(), - Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()) + intercept[NullPointerException] { + client.createStagingTable("main", null, "tbl") } - assert(e.getRetryable && e.getConflict) - } - } - - test("commit throws InvalidTargetTableException on 404") { - deltaHandler = (exchange, _) => { - if (exchange.getRequestMethod == "POST") { - sendJson(exchange, HttpStatus.SC_NOT_FOUND, - """{"error":"not found"}""") - } else { - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) + intercept[NullPointerException] { + client.createStagingTable("main", "default", null) } - } - withClient { c => - intercept[InvalidTargetTableException] { - c.commit(testTableId, new URI("s3://b/t"), testIdentifier, - Optional.of(createCommit(1L)), Optional.empty(), Optional.empty(), - Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()) + intercept[NullPointerException] { + client.createTable(null, "default", new CreateTableRequest().name("tbl")) } - } - } - - // --------------- createStagingTable --------------- - - test("createStagingTable sends request and converts response fields") { - var captured: String = null - val stagingJson = - s"""{ - | "table-id":"$testTableId", - | "table-type":"MANAGED", - | "location":"s3://bucket/staging", - | "required-protocol":{ - | "min-reader-version":3,"min-writer-version":7, - | "reader-features":["columnMapping"], - | "writer-features":["columnMapping","v2Checkpoint"] - | }, - | "suggested-protocol":{"reader-features":["deletionVectors"]}, - | "required-properties":{"delta.enableChangeDataFeed":"true"}, - | "suggested-properties":{"delta.autoOptimize.optimizeWrite":"true"} - |}""".stripMargin - - deltaHandler = (exchange, body) => { - captured = body - sendJson(exchange, HttpStatus.SC_OK, stagingJson) - } - - withClient { c => - val info = c.createStagingTable(testCatalog, testSchema, testTable) - - // verify request body - val req = objectMapper.readTree(captured) - assert(req.get("name").asText() === testTable) - - // verify all response fields converted - assert(info.getTableId === testTableId) - assert(info.getTableType === UCDeltaModels.TableType.MANAGED) - assert(info.getLocation === "s3://bucket/staging") - - val rp = info.getRequiredProtocol - assert(rp.getMinReaderVersion === 3) - assert(rp.getMinWriterVersion === 7) - assert(rp.getReaderFeatures.contains("columnMapping")) - assert(rp.getWriterFeatures.size() === 2) - - val sp = info.getSuggestedProtocol - assert(sp.getReaderFeatures.contains("deletionVectors")) - assert(sp.getWriterFeatures.isEmpty) - - assert(info.getRequiredProperties.get("delta.enableChangeDataFeed") === "true") - assert(info.getSuggestedProperties.get("delta.autoOptimize.optimizeWrite") === "true") - } - } - - test("createStagingTable throws IOException on server error") { - deltaHandler = (exchange, _) => - sendJson(exchange, HttpStatus.SC_INTERNAL_SERVER_ERROR, """{"error":"fail"}""") - withClient { c => - val e = intercept[java.io.IOException] { - c.createStagingTable(testCatalog, testSchema, testTable) + intercept[NullPointerException] { + client.createTable("main", null, new CreateTableRequest().name("tbl")) } - assert(e.getMessage.contains("HTTP 500")) - } - } - - // --------------- getCommits --------------- - - test("getCommits throws UnsupportedOperationException") { - withClient { c => - intercept[UnsupportedOperationException] { - c.getCommits(testTableId, new URI("s3://b/t"), Optional.empty(), Optional.empty()) + intercept[NullPointerException] { + client.createTable("main", "default", null) } - } - } - - // --------------- finalizeCreate --------------- - - test("finalizeCreate sends createTable request with columns and properties") { - var captured: String = null - deltaHandler = (exchange, body) => { - captured = body - sendJson(exchange, HttpStatus.SC_OK, loadTableJson()) - } - - val columns = java.util.List.of( - new UCClient.ColumnDef("id", "LONG", "long", """{"type":"long"}""", false, 0), - new UCClient.ColumnDef("name", "STRING", "string", """{"type":"string"}""", true, 1)) - val props = new java.util.HashMap[String, String]() - props.put("delta.minReaderVersion", "1") - - withClient { c => - c.finalizeCreate("my_table", testCatalog, testSchema, "s3://bucket/tbl", columns, props) - } - - val json = objectMapper.readTree(captured) - assert(json.get("name").asText() === "my_table") - assert(json.get("location").asText() === "s3://bucket/tbl") - assert(json.get("properties").get("delta.minReaderVersion").asText() === "1") - - val fields = json.get("columns").get("fields") - assert(fields.size() === 2) - assert(fields.get(0).get("name").asText() === "id") - assert(fields.get(0).get("nullable").asBoolean() === false) - assert(fields.get(1).get("name").asText() === "name") - assert(fields.get(1).get("nullable").asBoolean() === true) - } - - test("finalizeCreate throws CommitFailedException on server error") { - deltaHandler = (exchange, _) => - sendJson(exchange, HttpStatus.SC_INTERNAL_SERVER_ERROR, """{"error":"fail"}""") - - withClient { c => - val e = intercept[CommitFailedException] { - c.finalizeCreate("t", testCatalog, testSchema, "s3://b/t", - Collections.emptyList(), Collections.emptyMap()) + intercept[NullPointerException] { + client.updateTable(null, "default", "tbl", new UpdateTableRequest()) } - assert(e.getRetryable) - } - } - - test("finalizeCreate validates required parameters") { - withClient { c => intercept[NullPointerException] { - c.finalizeCreate(null, "c", "s", "loc", Collections.emptyList(), Collections.emptyMap()) + client.updateTable("main", null, "tbl", new UpdateTableRequest()) } intercept[NullPointerException] { - c.finalizeCreate("t", null, "s", "loc", Collections.emptyList(), Collections.emptyMap()) + client.updateTable("main", "default", null, new UpdateTableRequest()) } + intercept[NullPointerException] { + client.updateTable("main", "default", "tbl", null) + } + } finally { + client.close() } } - // --------------- close / ensureOpen --------------- + test("catalog-aware constructor disables UC Delta Rest Catalog API when config does not list loadTable") { + deltaConfigHandler = exchange => + sendJson( + exchange, + HttpStatus.SC_OK, + """{ + | "endpoints": [ + | "GET /v1/catalogs/{catalog}/schemas/{schema}/tables/{table}/credentials" + | ], + | "protocol-version": "1.0" + |}""".stripMargin) + + val client = createDeltaClient() + try { + assert(!client.supportsUCDeltaRestCatalogApi()) + } finally { + client.close() + } + } - test("operations after close throw IllegalStateException") { - val client = new UCDeltaTokenBasedRestClient( - serverUri, tokenProvider(), Collections.emptyMap()) - client.close() - intercept[IllegalStateException] { client.getMetastoreId() } - intercept[IllegalStateException] { client.loadTable("c", "s", "t") } + test("catalog-aware constructor fails when config probe fails") { + deltaConfigHandler = exchange => + sendJson(exchange, HttpStatus.SC_INTERNAL_SERVER_ERROR, """{"error":"boom"}""") + + val e = intercept[IllegalArgumentException] { + createDeltaClient() + } + assert(e.getMessage.contains("Failed to determine UC Delta Rest Catalog API support")) + assert(e.getMessage.contains("HTTP 500")) } }