diff --git a/build.sbt b/build.sbt index 1a0221c7bc2..2045abd124a 100644 --- a/build.sbt +++ b/build.sbt @@ -1163,6 +1163,37 @@ lazy val storage = (project in file("storage")) commonSettings, exportJars := true, javaOnlyReleaseSettings, + + // Use the shaded kernel-api jar. A direct project dependency puts kernel-api's unshaded + // class directory on downstream classpaths, which conflicts with Kernel's shaded Jackson API. + Compile / unmanagedJars += (kernelApi / Compile / packageBin).value, + Test / unmanagedJars += (kernelApi / Compile / packageBin).value, + Compile / compile := (Compile / compile).dependsOn(kernelApi / Compile / packageBin).value, + Test / test := (Test / test).dependsOn(kernelApi / Compile / packageBin).value, + + // delta-storage exposes Kernel types in its public API, so the published POM must still + // declare delta-kernel-api even though local compilation uses the shaded jar above. + pomPostProcess := { node => + val ver = version.value + import scala.xml._ + import scala.xml.transform._ + + val kernelApiDependency = + + io.delta + delta-kernel-api + {ver} + + + new RuleTransformer(new RewriteRule { + override def transform(n: Node): Seq[Node] = n match { + case e: Elem if e.label == "dependencies" => + Seq(e.copy(child = e.child ++ kernelApiDependency)) + case _ => Seq(n) + } + }).transform(node).head + }, + libraryDependencies ++= Seq( // User can provide any 2.x or 3.x version. We don't use any new fancy APIs. Watch out for // versions with known vulnerabilities. diff --git a/kernel/unitycatalog/src/main/java/io/delta/kernel/unitycatalog/adapters/MetadataAdapter.java b/kernel/unitycatalog/src/main/java/io/delta/kernel/unitycatalog/adapters/MetadataAdapter.java index c01d608e640..ef573f4ed05 100644 --- a/kernel/unitycatalog/src/main/java/io/delta/kernel/unitycatalog/adapters/MetadataAdapter.java +++ b/kernel/unitycatalog/src/main/java/io/delta/kernel/unitycatalog/adapters/MetadataAdapter.java @@ -18,6 +18,7 @@ import io.delta.kernel.internal.actions.Metadata; import io.delta.kernel.internal.util.VectorUtils; +import io.delta.kernel.types.StructType; import io.delta.storage.commit.actions.AbstractMetadata; import java.util.*; @@ -58,6 +59,11 @@ public Map getFormatOptions() { return Collections.unmodifiableMap(kernelMetadata.getFormat().getOptions()); } + @Override + public StructType getSchema() { + return kernelMetadata.getSchema(); + } + @Override public String getSchemaString() { return kernelMetadata.getSchemaString(); diff --git a/kernel/unitycatalog/src/test/scala/io/delta/kernel/unitycatalog/adapters/ActionAdaptersSuite.scala b/kernel/unitycatalog/src/test/scala/io/delta/kernel/unitycatalog/adapters/ActionAdaptersSuite.scala index 3f567b71246..117a753ec31 100644 --- a/kernel/unitycatalog/src/test/scala/io/delta/kernel/unitycatalog/adapters/ActionAdaptersSuite.scala +++ b/kernel/unitycatalog/src/test/scala/io/delta/kernel/unitycatalog/adapters/ActionAdaptersSuite.scala @@ -74,6 +74,7 @@ class ActionAdaptersSuite extends AnyFunSuite { assert(adapter.getDescription === "description") assert(adapter.getProvider === "parquet") assert(adapter.getFormatOptions.asScala == Map("foo" -> "bar")) + assert(adapter.getSchema === kernelMetadata.getSchema) assert(adapter.getSchemaString === "schemaStringJson") assert(adapter.getPartitionColumns.asScala == Seq("part1")) assert(adapter.getConfiguration.asScala == Map("zip" -> "zap")) diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala b/spark/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala index 86e2a1d0694..4700a85c8f5 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala @@ -1251,6 +1251,12 @@ case class Metadata( .map(DataType.fromJson(_).asInstanceOf[StructType]) .getOrElse(StructType.apply(Nil)) + /** Returns the schema as a Kernel [[io.delta.kernel.types.StructType]] */ + @JsonIgnore + private lazy val kernelSchema: io.delta.kernel.types.StructType = Option(schemaString) + .map(io.delta.kernel.internal.types.DataTypeJsonSerDe.deserializeStructType) + .orNull + /** Returns the partitionSchema as a [[StructType]] */ @JsonIgnore lazy val partitionSchema: StructType = @@ -1312,6 +1318,9 @@ case class Metadata( @JsonIgnore override def getFormatOptions: java.util.Map[String, String] = format.options.asJava + @JsonIgnore + override def getSchema: io.delta.kernel.types.StructType = kernelSchema + override def getSchemaString: String = schemaString override def getPartitionColumns: java.util.List[String] = partitionColumns.asJava diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala index f50244bd421..8dc5bde2697 100644 --- a/spark/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala +++ b/spark/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala @@ -134,6 +134,19 @@ class ActionSerializerSuite extends QueryTest with SharedSparkSession with Delta new StructType().json, Seq("a"))) + test("Metadata getSchema parses schemaString as Kernel schema") { + val schemaString = new StructType() + .add("id", "long") + .add("nested", new StructType().add("name", "string")) + .json + val metadata = Metadata(schemaString = schemaString) + val expected = + io.delta.kernel.internal.types.DataTypeJsonSerDe.deserializeStructType(schemaString) + + assert(metadata.getSchema === expected) + assert(Metadata().getSchema === null) + } + test("extra fields") { // TODO reading from checkpoint Action.fromJson("""{"txn": {"test": 1}}""") diff --git a/storage/src/main/java/io/delta/storage/commit/actions/AbstractMetadata.java b/storage/src/main/java/io/delta/storage/commit/actions/AbstractMetadata.java index 22670afe30c..cea28cba3d8 100644 --- a/storage/src/main/java/io/delta/storage/commit/actions/AbstractMetadata.java +++ b/storage/src/main/java/io/delta/storage/commit/actions/AbstractMetadata.java @@ -16,8 +16,11 @@ package io.delta.storage.commit.actions; -import java.util.Map; import java.util.List; +import java.util.Map; + +import io.delta.kernel.internal.types.DataTypeJsonSerDe; +import io.delta.kernel.types.StructType; /** * Interface for metadata actions in Delta. The metadata defines the metadata @@ -46,6 +49,17 @@ public interface AbstractMetadata { /** The format options */ Map getFormatOptions(); + /** + * The table schema as a Delta Kernel type. + * + *

The default implementation parses {@link #getSchemaString()}; if {@code getSchemaString()} + * returns {@code null}, this method returns {@code null}. + */ + default StructType getSchema() { + String schemaString = getSchemaString(); + return schemaString == null ? null : DataTypeJsonSerDe.deserializeStructType(schemaString); + } + /** * The table schema in string representation. */