Skip to content
Snippets Groups Projects
Commit 091f81e1 authored by Josh Rosen's avatar Josh Rosen
Browse files

[SPARK-15762][SQL] Cache Metadata & StructType hashCodes; use singleton Metadata.empty

We should cache `Metadata.hashCode` and use a singleton for `Metadata.empty` because calculating metadata hashCodes appears to be a bottleneck for certain workloads.

We should also cache `StructType.hashCode`.

In an optimizer stress-test benchmark run by ericl, these `hashCode` calls accounted for roughly 40% of the total CPU time and this bottleneck was completely eliminated by the caching added by this patch.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #13504 from JoshRosen/metadata-fix.
parent 681387b2
No related branches found
No related tags found
No related merge requests found
...@@ -104,7 +104,8 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any]) ...@@ -104,7 +104,8 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any])
} }
} }
override def hashCode: Int = Metadata.hash(this) private lazy val _hashCode: Int = Metadata.hash(this)
override def hashCode: Int = _hashCode
private def get[T](key: String): T = { private def get[T](key: String): T = {
map(key).asInstanceOf[T] map(key).asInstanceOf[T]
...@@ -115,8 +116,10 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any]) ...@@ -115,8 +116,10 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any])
object Metadata { object Metadata {
private[this] val _empty = new Metadata(Map.empty)
/** Returns an empty Metadata. */ /** Returns an empty Metadata. */
def empty: Metadata = new Metadata(Map.empty) def empty: Metadata = _empty
/** Creates a Metadata instance from JSON. */ /** Creates a Metadata instance from JSON. */
def fromJson(json: String): Metadata = { def fromJson(json: String): Metadata = {
......
...@@ -112,7 +112,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru ...@@ -112,7 +112,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
} }
} }
override def hashCode(): Int = java.util.Arrays.hashCode(fields.asInstanceOf[Array[AnyRef]]) private lazy val _hashCode: Int = java.util.Arrays.hashCode(fields.asInstanceOf[Array[AnyRef]])
override def hashCode(): Int = _hashCode
/** /**
* Creates a new [[StructType]] by adding a new field. * Creates a new [[StructType]] by adding a new field.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment