diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 6140d1b129c64bc28e63b1bd866782fde45c74e5..43e3e9b910e7d5434014d3584c6913f91f1af58a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -179,6 +179,7 @@ object FunctionRegistry { expression[Decode]("decode"), expression[FindInSet]("find_in_set"), expression[FormatNumber]("format_number"), + expression[GetJsonObject]("get_json_object"), expression[InitCap]("initcap"), expression[Lower]("lcase"), expression[Lower]("lower"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala new file mode 100644 index 0000000000000000000000000000000000000000..23bfa18c94286e3bb81264a81908ff626f898813 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonFunctions.scala @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import java.io.{StringWriter, ByteArrayOutputStream} + +import com.fasterxml.jackson.core._ +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.{StringType, DataType} +import org.apache.spark.unsafe.types.UTF8String + +import scala.util.parsing.combinator.RegexParsers + +private[this] sealed trait PathInstruction +private[this] object PathInstruction { + private[expressions] case object Subscript extends PathInstruction + private[expressions] case object Wildcard extends PathInstruction + private[expressions] case object Key extends PathInstruction + private[expressions] case class Index(index: Long) extends PathInstruction + private[expressions] case class Named(name: String) extends PathInstruction +} + +private[this] sealed trait WriteStyle +private[this] object WriteStyle { + private[expressions] case object RawStyle extends WriteStyle + private[expressions] case object QuotedStyle extends WriteStyle + private[expressions] case object FlattenStyle extends WriteStyle +} + +private[this] object JsonPathParser extends RegexParsers { + import PathInstruction._ + + def root: Parser[Char] = '$' + + def long: Parser[Long] = "\\d+".r ^? { + case x => x.toLong + } + + // parse `[*]` and `[123]` subscripts + def subscript: Parser[List[PathInstruction]] = + for { + operand <- '[' ~> ('*' ^^^ Wildcard | long ^^ Index) <~ ']' + } yield { + Subscript :: operand :: Nil + } + + // parse `.name` or `['name']` child expressions + def named: Parser[List[PathInstruction]] = + for { + name <- '.' ~> "[^\\.\\[]+".r | "[\\'" ~> "[^\\'\\?]+" <~ "\\']" + } yield { + Key :: Named(name) :: Nil + } + + // child wildcards: `..`, `.*` or `['*']` + def wildcard: Parser[List[PathInstruction]] = + (".*" | "['*']") ^^^ List(Wildcard) + + def node: Parser[List[PathInstruction]] = + wildcard | + named | + subscript + + val expression: Parser[List[PathInstruction]] = { + phrase(root ~> rep(node) ^^ (x => x.flatten)) + } + + def parse(str: String): Option[List[PathInstruction]] = { + this.parseAll(expression, str) match { + case Success(result, _) => + Some(result) + + case NoSuccess(msg, next) => + None + } + } +} + +private[this] object GetJsonObject { + private val jsonFactory = new JsonFactory() + + // Enabled for Hive compatibility + jsonFactory.enable(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS) +} + +/** + * Extracts json object from a json string based on json path specified, and returns json string + * of the extracted json object. It will return null if the input json string is invalid. + */ +case class GetJsonObject(json: Expression, path: Expression) + extends BinaryExpression with ExpectsInputTypes with CodegenFallback { + + import GetJsonObject._ + import PathInstruction._ + import WriteStyle._ + import com.fasterxml.jackson.core.JsonToken._ + + override def left: Expression = json + override def right: Expression = path + override def inputTypes: Seq[DataType] = Seq(StringType, StringType) + override def dataType: DataType = StringType + override def prettyName: String = "get_json_object" + + @transient private lazy val parsedPath = parsePath(path.eval().asInstanceOf[UTF8String]) + + override def eval(input: InternalRow): Any = { + val jsonStr = json.eval(input).asInstanceOf[UTF8String] + if (jsonStr == null) { + return null + } + + val parsed = if (path.foldable) { + parsedPath + } else { + parsePath(path.eval(input).asInstanceOf[UTF8String]) + } + + if (parsed.isDefined) { + try { + val parser = jsonFactory.createParser(jsonStr.getBytes) + val output = new ByteArrayOutputStream() + val generator = jsonFactory.createGenerator(output, JsonEncoding.UTF8) + parser.nextToken() + val matched = evaluatePath(parser, generator, RawStyle, parsed.get) + generator.close() + if (matched) { + UTF8String.fromBytes(output.toByteArray) + } else { + null + } + } catch { + case _: JsonProcessingException => null + } + } else { + null + } + } + + private def parsePath(path: UTF8String): Option[List[PathInstruction]] = { + if (path != null) { + JsonPathParser.parse(path.toString) + } else { + None + } + } + + // advance to the desired array index, assumes to start at the START_ARRAY token + private def arrayIndex(p: JsonParser, f: () => Boolean): Long => Boolean = { + case _ if p.getCurrentToken == END_ARRAY => + // terminate, nothing has been written + false + + case 0 => + // we've reached the desired index + val dirty = f() + + while (p.nextToken() != END_ARRAY) { + // advance the token stream to the end of the array + p.skipChildren() + } + + dirty + + case i if i > 0 => + // skip this token and evaluate the next + p.skipChildren() + p.nextToken() + arrayIndex(p, f)(i - 1) + } + + /** + * Evaluate a list of JsonPath instructions, returning a bool that indicates if any leaf nodes + * have been written to the generator + */ + private def evaluatePath( + p: JsonParser, + g: JsonGenerator, + style: WriteStyle, + path: List[PathInstruction]): Boolean = { + (p.getCurrentToken, path) match { + case (VALUE_STRING, Nil) if style == RawStyle => + // there is no array wildcard or slice parent, emit this string without quotes + if (p.hasTextCharacters) { + g.writeRaw(p.getTextCharacters, p.getTextOffset, p.getTextLength) + } else { + g.writeRaw(p.getText) + } + true + + case (START_ARRAY, Nil) if style == FlattenStyle => + // flatten this array into the parent + var dirty = false + while (p.nextToken() != END_ARRAY) { + dirty |= evaluatePath(p, g, style, Nil) + } + dirty + + case (_, Nil) => + // general case: just copy the child tree verbatim + g.copyCurrentStructure(p) + true + + case (START_OBJECT, Key :: xs) => + var dirty = false + while (p.nextToken() != END_OBJECT) { + if (dirty) { + // once a match has been found we can skip other fields + p.skipChildren() + } else { + dirty = evaluatePath(p, g, style, xs) + } + } + dirty + + case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs) => + // special handling for the non-structure preserving double wildcard behavior in Hive + var dirty = false + g.writeStartArray() + while (p.nextToken() != END_ARRAY) { + dirty |= evaluatePath(p, g, FlattenStyle, xs) + } + g.writeEndArray() + dirty + + case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle => + // retain Flatten, otherwise use Quoted... cannot use Raw within an array + val nextStyle = style match { + case RawStyle => QuotedStyle + case FlattenStyle => FlattenStyle + case QuotedStyle => throw new IllegalStateException() + } + + // temporarily buffer child matches, the emitted json will need to be + // modified slightly if there is only a single element written + val buffer = new StringWriter() + val flattenGenerator = jsonFactory.createGenerator(buffer) + flattenGenerator.writeStartArray() + + var dirty = 0 + while (p.nextToken() != END_ARRAY) { + // track the number of array elements and only emit an outer array if + // we've written more than one element, this matches Hive's behavior + dirty += (if (evaluatePath(p, flattenGenerator, nextStyle, xs)) 1 else 0) + } + flattenGenerator.writeEndArray() + flattenGenerator.close() + + val buf = buffer.getBuffer + if (dirty > 1) { + g.writeRawValue(buf.toString) + } else if (dirty == 1) { + // remove outer array tokens + g.writeRawValue(buf.substring(1, buf.length()-1)) + } // else do not write anything + + dirty > 0 + + case (START_ARRAY, Subscript :: Wildcard :: xs) => + var dirty = false + g.writeStartArray() + while (p.nextToken() != END_ARRAY) { + // wildcards can have multiple matches, continually update the dirty count + dirty |= evaluatePath(p, g, QuotedStyle, xs) + } + g.writeEndArray() + + dirty + + case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) => + p.nextToken() + // we're going to have 1 or more results, switch to QuotedStyle + arrayIndex(p, () => evaluatePath(p, g, QuotedStyle, xs))(idx) + + case (START_ARRAY, Subscript :: Index(idx) :: xs) => + p.nextToken() + arrayIndex(p, () => evaluatePath(p, g, style, xs))(idx) + + case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name => + // exact field match + p.nextToken() + evaluatePath(p, g, style, xs) + + case (FIELD_NAME, Wildcard :: xs) => + // wildcard field match + p.nextToken() + evaluatePath(p, g, style, xs) + + case _ => + p.skipChildren() + false + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala new file mode 100644 index 0000000000000000000000000000000000000000..4addbaf0cbce7501aac67b2c4543553ce944b3e1 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.SparkFunSuite + +class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { + val json = + """ + |{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}], + |"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees", + |"title":"Sayings of the Century","category":"reference","price":8.95}, + |{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99, + |"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings", + |"category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}], + |"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}}, + |"email":"amy@only_for_json_udf_test.net","owner":"amy","zip code":"94025", + |"fb:testid":"1234"} + |""".stripMargin + + test("$.store.bicycle") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.bicycle")), + """{"price":19.95,"color":"red"}""") + } + + test("$.store.book") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book")), + """[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference", + |"price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction", + |"price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title": + |"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"}, + |{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}] + |""".stripMargin.replace("\n", "")) + } + + test("$.store.book[0]") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book[0]")), + """{"author":"Nigel Rees","title":"Sayings of the Century", + |"category":"reference","price":8.95}""".stripMargin.replace("\n", "")) + } + + test("$.store.book[*]") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book[*]")), + """[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference", + |"price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction", + |"price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title": + |"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"}, + |{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}] + |""".stripMargin.replace("\n", "")) + } + + test("$") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$")), + json.replace("\n", "")) + } + + test("$.store.book[0].category") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book[0].category")), + "reference") + } + + test("$.store.book[*].category") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book[*].category")), + """["reference","fiction","fiction"]""") + } + + test("$.store.book[*].isbn") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book[*].isbn")), + """["0-553-21311-3","0-395-19395-8"]""") + } + + test("$.store.book[*].reader") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book[*].reader")), + """[{"age":25,"name":"bob"},{"age":26,"name":"jack"}]""") + } + + test("$.store.basket[0][1]") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.basket[0][1]")), + "2") + } + + test("$.store.basket[*]") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.basket[*]")), + """[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]]""") + } + + test("$.store.basket[*][0]") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.basket[*][0]")), + "[1,3,5]") + } + + test("$.store.basket[0][*]") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.basket[0][*]")), + """[1,2,{"b":"y","a":"x"}]""") + } + + test("$.store.basket[*][*]") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.basket[*][*]")), + """[1,2,{"b":"y","a":"x"},3,4,5,6]""") + } + + test("$.store.basket[0][2].b") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.basket[0][2].b")), + "y") + } + + test("$.store.basket[0][*].b") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.basket[0][*].b")), + """["y"]""") + } + + test("$.zip code") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.zip code")), + "94025") + } + + test("$.fb:testid") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.fb:testid")), + "1234") + } + + test("preserve newlines") { + checkEvaluation( + GetJsonObject(Literal("""{"a":"b\nc"}"""), Literal("$.a")), + "b\nc") + } + + test("escape") { + checkEvaluation( + GetJsonObject(Literal("""{"a":"b\"c"}"""), Literal("$.a")), + "b\"c") + } + + test("$.non_exist_key") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.non_exist_key")), + null) + } + + test("$..no_recursive") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$..no_recursive")), + null) + } + + test("$.store.book[10]") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book[10]")), + null) + } + + test("$.store.book[0].non_exist_key") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.book[0].non_exist_key")), + null) + } + + test("$.store.basket[*].non_exist_key") { + checkEvaluation( + GetJsonObject(Literal(json), Literal("$.store.basket[*].non_exist_key")), + null) + } + + test("non foldable literal") { + checkEvaluation( + GetJsonObject(NonFoldableLiteral(json), NonFoldableLiteral("$.fb:testid")), + "1234") + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala new file mode 100644 index 0000000000000000000000000000000000000000..71c26a6f8d3678694299fb0b75bfa1a941a0602f --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +class JsonFunctionsSuite extends QueryTest { + + private lazy val ctx = org.apache.spark.sql.test.TestSQLContext + import ctx.implicits._ + + test("function get_json_object") { + val df: DataFrame = Seq(("""{"name": "alice", "age": 5}""", "")).toDF("a", "b") + checkAnswer( + df.selectExpr("get_json_object(a, '$.name')", "get_json_object(a, '$.age')"), + Row("alice", "5")) + } + +} diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index c46a4a4b0be548efdb911a05ca55f5161d6a3d3c..d4fc6c2b6ebc0ebaa84ab27ce0ce2cf88ef636d6 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -266,6 +266,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Hive returns string from UTC formatted timestamp, spark returns timestamp type "date_udf", + // Can't compare the result that have newline in it + "udf_get_json_object", + // Unlike Hive, we do support log base in (0, 1.0], therefore disable this "udf7", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 8d0bf46e8fad7154429672cc13162b1ac1edb176..167086db5bfe2749033cc93c3e28a73be626c987 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -374,7 +374,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { INSERT OVERWRITE TABLE episodes_part PARTITION (doctor_pt=1) SELECT title, air_date, doctor FROM episodes """.cmd - ) + ), + TestTable("src_json", + s"""CREATE TABLE src_json (json STRING) STORED AS TEXTFILE + """.stripMargin.cmd, + s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/json.txt")}' INTO TABLE src_json".cmd) ) hiveQTestUtilTables.foreach(registerTestTable) diff --git a/sql/hive/src/test/resources/golden/get_json_object #1-0-f01b340b5662c45bb5f1e3b7c6900e1f b/sql/hive/src/test/resources/golden/get_json_object #1-0-f01b340b5662c45bb5f1e3b7c6900e1f new file mode 100644 index 0000000000000000000000000000000000000000..1dcda4315a14d74081716229cb7ab81c77485751 --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #1-0-f01b340b5662c45bb5f1e3b7c6900e1f @@ -0,0 +1 @@ +{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}},"email":"amy@only_for_json_udf_test.net","owner":"amy","zip code":"94025","fb:testid":"1234"} diff --git a/sql/hive/src/test/resources/golden/get_json_object #10-0-f3f47d06d7c51d493d68112b0bd6c1fc b/sql/hive/src/test/resources/golden/get_json_object #10-0-f3f47d06d7c51d493d68112b0bd6c1fc new file mode 100644 index 0000000000000000000000000000000000000000..81c545efebe5f57d4cab2ba9ec294c4b0cadf672 --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #10-0-f3f47d06d7c51d493d68112b0bd6c1fc @@ -0,0 +1 @@ +1234 diff --git a/sql/hive/src/test/resources/golden/get_json_object #2-0-e84c2f8136919830fd665a278e4158a b/sql/hive/src/test/resources/golden/get_json_object #2-0-e84c2f8136919830fd665a278e4158a new file mode 100644 index 0000000000000000000000000000000000000000..99127db9e311c8d60b6f0394d125046e259a102f --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #2-0-e84c2f8136919830fd665a278e4158a @@ -0,0 +1 @@ +amy {"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}} diff --git a/sql/hive/src/test/resources/golden/get_json_object #3-0-bf140c65c31f8d892ec23e41e16e58bb b/sql/hive/src/test/resources/golden/get_json_object #3-0-bf140c65c31f8d892ec23e41e16e58bb new file mode 100644 index 0000000000000000000000000000000000000000..0bc03998296af2b7deaeb2d384f58747f35d7b50 --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #3-0-bf140c65c31f8d892ec23e41e16e58bb @@ -0,0 +1 @@ +{"price":19.95,"color":"red"} [{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}] diff --git a/sql/hive/src/test/resources/golden/get_json_object #4-0-f0bd902edc1990c9a6c65a6bb672c4d5 b/sql/hive/src/test/resources/golden/get_json_object #4-0-f0bd902edc1990c9a6c65a6bb672c4d5 new file mode 100644 index 0000000000000000000000000000000000000000..4f7e09bd3fa7385120123dcacea61a7be80b1767 --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #4-0-f0bd902edc1990c9a6c65a6bb672c4d5 @@ -0,0 +1 @@ +{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95} [{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}] diff --git a/sql/hive/src/test/resources/golden/get_json_object #5-0-3c09f4316a1533049aee8af749cdcab b/sql/hive/src/test/resources/golden/get_json_object #5-0-3c09f4316a1533049aee8af749cdcab new file mode 100644 index 0000000000000000000000000000000000000000..b2d212a597d998f523dbc880f9c7e292eae23d93 --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #5-0-3c09f4316a1533049aee8af749cdcab @@ -0,0 +1 @@ +reference ["reference","fiction","fiction"] ["0-553-21311-3","0-395-19395-8"] [{"age":25,"name":"bob"},{"age":26,"name":"jack"}] diff --git a/sql/hive/src/test/resources/golden/get_json_object #6-0-8334d1ddbe0f41fc7b80d4e6b45409da b/sql/hive/src/test/resources/golden/get_json_object #6-0-8334d1ddbe0f41fc7b80d4e6b45409da new file mode 100644 index 0000000000000000000000000000000000000000..21d88629fcdb6cbc05858efe4f5b80f69f235dcc --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #6-0-8334d1ddbe0f41fc7b80d4e6b45409da @@ -0,0 +1 @@ +25 [25,26] diff --git a/sql/hive/src/test/resources/golden/get_json_object #7-0-40d7dff94b26a2e3f4ab71baee3d3ce0 b/sql/hive/src/test/resources/golden/get_json_object #7-0-40d7dff94b26a2e3f4ab71baee3d3ce0 new file mode 100644 index 0000000000000000000000000000000000000000..e60721e1dd240fc18a3f20c14dbd38a8cf477d07 --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #7-0-40d7dff94b26a2e3f4ab71baee3d3ce0 @@ -0,0 +1 @@ +2 [[1,2,{"b":"y","a":"x"}],[3,4],[5,6]] 1 [1,2,{"b":"y","a":"x"}] [1,2,{"b":"y","a":"x"},3,4,5,6] y ["y"] diff --git a/sql/hive/src/test/resources/golden/get_json_object #8-0-180b4b6fdb26011fec05a7ca99fd9844 b/sql/hive/src/test/resources/golden/get_json_object #8-0-180b4b6fdb26011fec05a7ca99fd9844 new file mode 100644 index 0000000000000000000000000000000000000000..356fcdf7139bdae5955de9e5a14e64655b127b24 --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #8-0-180b4b6fdb26011fec05a7ca99fd9844 @@ -0,0 +1 @@ +NULL NULL NULL NULL NULL NULL diff --git a/sql/hive/src/test/resources/golden/get_json_object #9-0-47c451a969d856f008f4d6b3d378d94b b/sql/hive/src/test/resources/golden/get_json_object #9-0-47c451a969d856f008f4d6b3d378d94b new file mode 100644 index 0000000000000000000000000000000000000000..ef4a39675ed6dc59807b9c1af0939a37efa77a56 --- /dev/null +++ b/sql/hive/src/test/resources/golden/get_json_object #9-0-47c451a969d856f008f4d6b3d378d94b @@ -0,0 +1 @@ +94025 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index a7cfac51cc0978701a9c5a861917540c04f2ef9e..edb27553671d1805e40f50d1ff2ecc93b32f2cda 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -587,6 +587,57 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { |select * where key = 4 """.stripMargin) + // test get_json_object again Hive, because the HiveCompatabilitySuite cannot handle result + // with newline in it. + createQueryTest("get_json_object #1", + "SELECT get_json_object(src_json.json, '$') FROM src_json") + + createQueryTest("get_json_object #2", + "SELECT get_json_object(src_json.json, '$.owner'), get_json_object(src_json.json, '$.store')" + + " FROM src_json") + + createQueryTest("get_json_object #3", + "SELECT get_json_object(src_json.json, '$.store.bicycle'), " + + "get_json_object(src_json.json, '$.store.book') FROM src_json") + + createQueryTest("get_json_object #4", + "SELECT get_json_object(src_json.json, '$.store.book[0]'), " + + "get_json_object(src_json.json, '$.store.book[*]') FROM src_json") + + createQueryTest("get_json_object #5", + "SELECT get_json_object(src_json.json, '$.store.book[0].category'), " + + "get_json_object(src_json.json, '$.store.book[*].category'), " + + "get_json_object(src_json.json, '$.store.book[*].isbn'), " + + "get_json_object(src_json.json, '$.store.book[*].reader') FROM src_json") + + createQueryTest("get_json_object #6", + "SELECT get_json_object(src_json.json, '$.store.book[*].reader[0].age'), " + + "get_json_object(src_json.json, '$.store.book[*].reader[*].age') FROM src_json") + + createQueryTest("get_json_object #7", + "SELECT get_json_object(src_json.json, '$.store.basket[0][1]'), " + + "get_json_object(src_json.json, '$.store.basket[*]'), " + + // Hive returns wrong result with [*][0], so this expression is change to make test pass + "get_json_object(src_json.json, '$.store.basket[0][0]'), " + + "get_json_object(src_json.json, '$.store.basket[0][*]'), " + + "get_json_object(src_json.json, '$.store.basket[*][*]'), " + + "get_json_object(src_json.json, '$.store.basket[0][2].b'), " + + "get_json_object(src_json.json, '$.store.basket[0][*].b') FROM src_json") + + createQueryTest("get_json_object #8", + "SELECT get_json_object(src_json.json, '$.non_exist_key'), " + + "get_json_object(src_json.json, '$..no_recursive'), " + + "get_json_object(src_json.json, '$.store.book[10]'), " + + "get_json_object(src_json.json, '$.store.book[0].non_exist_key'), " + + "get_json_object(src_json.json, '$.store.basket[*].non_exist_key'), " + + "get_json_object(src_json.json, '$.store.basket[0][*].non_exist_key') FROM src_json") + + createQueryTest("get_json_object #9", + "SELECT get_json_object(src_json.json, '$.zip code') FROM src_json") + + createQueryTest("get_json_object #10", + "SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json") + test("predicates contains an empty AttributeSet() references") { sql( """