Skip to content
Snippets Groups Projects
Commit 84e2c8bf authored by Michael Armbrust's avatar Michael Armbrust
Browse files

[SQL] Add test case with workaround for reading partitioned Avro files

In order to read from partitioned Avro files we need to also set the `SERDEPROPERTIES` since `TBLPROPERTIES` are not passed to the initialization.  This PR simply adds a test to make sure we don't break this workaround.

Author: Michael Armbrust <michael@databricks.com>

Closes #2340 from marmbrus/avroPartitioned and squashes the following commits:

6b969d6 [Michael Armbrust] fix style
fea2124 [Michael Armbrust] Add test case with workaround for reading partitioned avro files.
parent 79cdb9b6
No related branches found
No related tags found
No related merge requests found
...@@ -269,7 +269,74 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { ...@@ -269,7 +269,74 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
|) |)
""".stripMargin.cmd, """.stripMargin.cmd,
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}' INTO TABLE episodes".cmd s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}' INTO TABLE episodes".cmd
) ),
// THIS TABLE IS NOT THE SAME AS THE HIVE TEST TABLE episodes_partitioned AS DYNAMIC PARITIONING
// IS NOT YET SUPPORTED
TestTable("episodes_part",
s"""CREATE TABLE episodes_part (title STRING, air_date STRING, doctor INT)
|PARTITIONED BY (doctor_pt INT)
|ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
|STORED AS
|INPUTFORMAT '${classOf[AvroContainerInputFormat].getCanonicalName}'
|OUTPUTFORMAT '${classOf[AvroContainerOutputFormat].getCanonicalName}'
|TBLPROPERTIES (
| 'avro.schema.literal'='{
| "type": "record",
| "name": "episodes",
| "namespace": "testing.hive.avro.serde",
| "fields": [
| {
| "name": "title",
| "type": "string",
| "doc": "episode title"
| },
| {
| "name": "air_date",
| "type": "string",
| "doc": "initial date"
| },
| {
| "name": "doctor",
| "type": "int",
| "doc": "main actor playing the Doctor in episode"
| }
| ]
| }'
|)
""".stripMargin.cmd,
// WORKAROUND: Required to pass schema to SerDe for partitioned tables.
// TODO: Pass this automatically from the table to partitions.
s"""
|ALTER TABLE episodes_part SET SERDEPROPERTIES (
| 'avro.schema.literal'='{
| "type": "record",
| "name": "episodes",
| "namespace": "testing.hive.avro.serde",
| "fields": [
| {
| "name": "title",
| "type": "string",
| "doc": "episode title"
| },
| {
| "name": "air_date",
| "type": "string",
| "doc": "initial date"
| },
| {
| "name": "doctor",
| "type": "int",
| "doc": "main actor playing the Doctor in episode"
| }
| ]
| }'
|)
""".stripMargin.cmd,
s"""
INSERT OVERWRITE TABLE episodes_part PARTITION (doctor_pt=1)
SELECT title, air_date, doctor FROM episodes
""".cmd
)
) )
hiveQTestUtilTables.foreach(registerTestTable) hiveQTestUtilTables.foreach(registerTestTable)
......
The Eleventh Hour 3 April 2010 11 1
The Doctor's Wife 14 May 2011 11 1
Horror of Fang Rock 3 September 1977 4 1
An Unearthly Child 23 November 1963 1 1
The Mysterious Planet 6 September 1986 6 1
Rose 26 March 2005 9 1
The Power of the Daleks 5 November 1966 2 1
Castrolava 4 January 1982 5 1
...@@ -37,4 +37,6 @@ class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll { ...@@ -37,4 +37,6 @@ class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
createQueryTest("Read with RegexSerDe", "SELECT * FROM sales") createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes") createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment