Skip to content
Snippets Groups Projects
Commit 4afa39e2 authored by Eric Liang's avatar Eric Liang Committed by Wenchen Fan
Browse files

[SPARK-18333][SQL] Revert hacks in parquet and orc reader to support case insensitive resolution

## What changes were proposed in this pull request?

These are no longer needed after https://issues.apache.org/jira/browse/SPARK-17183

cc cloud-fan

## How was this patch tested?

Existing parquet and orc tests.

Author: Eric Liang <ekl@databricks.com>

Closes #15799 from ericl/sc-4929.
parent 55964c15
No related branches found
No related tags found
No related merge requests found
...@@ -269,15 +269,11 @@ private[parquet] object ParquetReadSupport { ...@@ -269,15 +269,11 @@ private[parquet] object ParquetReadSupport {
*/ */
private def clipParquetGroupFields( private def clipParquetGroupFields(
parquetRecord: GroupType, structType: StructType): Seq[Type] = { parquetRecord: GroupType, structType: StructType): Seq[Type] = {
val parquetFieldMap = parquetRecord.getFields.asScala val parquetFieldMap = parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap
.map(f => f.getName -> f).toMap
val caseInsensitiveParquetFieldMap = parquetRecord.getFields.asScala
.map(f => f.getName.toLowerCase -> f).toMap
val toParquet = new ParquetSchemaConverter(writeLegacyParquetFormat = false) val toParquet = new ParquetSchemaConverter(writeLegacyParquetFormat = false)
structType.map { f => structType.map { f =>
parquetFieldMap parquetFieldMap
.get(f.name) .get(f.name)
.orElse(caseInsensitiveParquetFieldMap.get(f.name.toLowerCase))
.map(clipParquetType(_, f.dataType)) .map(clipParquetType(_, f.dataType))
.getOrElse(toParquet.convertField(f)) .getOrElse(toParquet.convertField(f))
} }
......
...@@ -1080,34 +1080,6 @@ class ParquetSchemaSuite extends ParquetSchemaTest { ...@@ -1080,34 +1080,6 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
} }
} }
testSchemaClipping(
"falls back to case insensitive resolution",
parquetSchema =
"""message root {
| required group A {
| optional int32 B;
| }
| optional int32 c;
|}
""".stripMargin,
catalystSchema = {
val nestedType = new StructType().add("b", IntegerType, nullable = true)
new StructType()
.add("a", nestedType, nullable = true)
.add("c", IntegerType, nullable = true)
},
expectedSchema =
"""message root {
| required group A {
| optional int32 B;
| }
| optional int32 c;
|}
""".stripMargin)
testSchemaClipping( testSchemaClipping(
"simple nested struct", "simple nested struct",
......
...@@ -305,17 +305,7 @@ private[orc] object OrcRelation extends HiveInspectors { ...@@ -305,17 +305,7 @@ private[orc] object OrcRelation extends HiveInspectors {
def setRequiredColumns( def setRequiredColumns(
conf: Configuration, physicalSchema: StructType, requestedSchema: StructType): Unit = { conf: Configuration, physicalSchema: StructType, requestedSchema: StructType): Unit = {
val caseInsensitiveFieldMap: Map[String, Int] = physicalSchema.fieldNames val ids = requestedSchema.map(a => physicalSchema.fieldIndex(a.name): Integer)
.zipWithIndex
.map(f => (f._1.toLowerCase, f._2))
.toMap
val ids = requestedSchema.map { a =>
val exactMatch: Option[Int] = physicalSchema.getFieldIndex(a.name)
val res = exactMatch.getOrElse(
caseInsensitiveFieldMap.getOrElse(a.name,
throw new IllegalArgumentException(s"""Field "$a.name" does not exist.""")))
res: Integer
}
val (sortedIDs, sortedNames) = ids.zip(requestedSchema.fieldNames).sorted.unzip val (sortedIDs, sortedNames) = ids.zip(requestedSchema.fieldNames).sorted.unzip
HiveShim.appendReadColumns(conf, sortedIDs, sortedNames) HiveShim.appendReadColumns(conf, sortedIDs, sortedNames)
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment