Skip to content
Snippets Groups Projects
Commit e92c24d3 authored by Spiro Michaylov's avatar Spiro Michaylov Committed by Reynold Xin
Browse files

[SPARK-8810] [SQL] Added several UDF unit tests for Spark SQL

One test for each of the GROUP BY, WHERE and HAVING clauses, and one that combines all three with an additional UDF in the SELECT.

(Since this is my first attempt at contributing to SPARK, meta-level guidance on anything I've screwed up would be greatly appreciated, whether important or minor.)

Author: Spiro Michaylov <spiro@michaylov.com>

Closes #7207 from spirom/udf-test-branch and squashes the following commits:

6bbba9e [Spiro Michaylov] Responded to review comments on UDF unit tests
1a3c5ff [Spiro Michaylov] Added several UDF unit tests for Spark SQL
parent f0fac2aa
No related branches found
No related tags found
No related merge requests found
...@@ -82,6 +82,76 @@ class UDFSuite extends QueryTest { ...@@ -82,6 +82,76 @@ class UDFSuite extends QueryTest {
assert(ctx.sql("SELECT strLenScala('test', 1)").head().getInt(0) === 5) assert(ctx.sql("SELECT strLenScala('test', 1)").head().getInt(0) === 5)
} }
test("UDF in a WHERE") {
ctx.udf.register("oneArgFilter", (n: Int) => { n > 80 })
val df = ctx.sparkContext.parallelize(
(1 to 100).map(i => TestData(i, i.toString))).toDF()
df.registerTempTable("integerData")
val result =
ctx.sql("SELECT * FROM integerData WHERE oneArgFilter(key)")
assert(result.count() === 20)
}
test("UDF in a HAVING") {
ctx.udf.register("havingFilter", (n: Long) => { n > 5 })
val df = Seq(("red", 1), ("red", 2), ("blue", 10),
("green", 100), ("green", 200)).toDF("g", "v")
df.registerTempTable("groupData")
val result =
ctx.sql(
"""
| SELECT g, SUM(v) as s
| FROM groupData
| GROUP BY g
| HAVING havingFilter(s)
""".stripMargin)
assert(result.count() === 2)
}
test("UDF in a GROUP BY") {
ctx.udf.register("groupFunction", (n: Int) => { n > 10 })
val df = Seq(("red", 1), ("red", 2), ("blue", 10),
("green", 100), ("green", 200)).toDF("g", "v")
df.registerTempTable("groupData")
val result =
ctx.sql(
"""
| SELECT SUM(v)
| FROM groupData
| GROUP BY groupFunction(v)
""".stripMargin)
assert(result.count() === 2)
}
test("UDFs everywhere") {
ctx.udf.register("groupFunction", (n: Int) => { n > 10 })
ctx.udf.register("havingFilter", (n: Long) => { n > 2000 })
ctx.udf.register("whereFilter", (n: Int) => { n < 150 })
ctx.udf.register("timesHundred", (n: Long) => { n * 100 })
val df = Seq(("red", 1), ("red", 2), ("blue", 10),
("green", 100), ("green", 200)).toDF("g", "v")
df.registerTempTable("groupData")
val result =
ctx.sql(
"""
| SELECT timesHundred(SUM(v)) as v100
| FROM groupData
| WHERE whereFilter(v)
| GROUP BY groupFunction(v)
| HAVING havingFilter(v100)
""".stripMargin)
assert(result.count() === 1)
}
test("struct UDF") { test("struct UDF") {
ctx.udf.register("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2)) ctx.udf.register("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment