Skip to content
Snippets Groups Projects
Commit ccdb0522 authored by Tarek Auel's avatar Tarek Auel Committed by Davies Liu
Browse files

[SPARK-8727] [SQL] Missing python api; md5, log2

Jira: https://issues.apache.org/jira/browse/SPARK-8727

Author: Tarek Auel <tarek.auel@gmail.com>
Author: Tarek Auel <tarek.auel@googlemail.com>

Closes #7114 from tarekauel/missing-python and squashes the following commits:

ef4c61b [Tarek Auel] [SPARK-8727] revert dataframe change
4029d4d [Tarek Auel] removed dataframe pi and e unit test
66f0d2b [Tarek Auel] removed pi and e from python api and dataframe api; added _to_java_column(col) for strlen
4d07318 [Tarek Auel] fixed python unit test
45f2bee [Tarek Auel] fixed result of pi and e
c39f47b [Tarek Auel] add python api
bd50a3a [Tarek Auel] add missing python functions
parent 8133125c
No related branches found
No related tags found
No related merge requests found
......@@ -39,12 +39,15 @@ __all__ = [
'coalesce',
'countDistinct',
'explode',
'log2',
'md5',
'monotonicallyIncreasingId',
'rand',
'randn',
'sha1',
'sha2',
'sparkPartitionId',
'strlen',
'struct',
'udf',
'when']
......@@ -320,6 +323,19 @@ def explode(col):
return Column(jc)
@ignore_unicode_prefix
@since(1.5)
def md5(col):
"""Calculates the MD5 digest and returns the value as a 32 character hex string.
>>> sqlContext.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
[Row(hash=u'902fbdd2b1df0c4f70b4a5d23525e932')]
"""
sc = SparkContext._active_spark_context
jc = sc._jvm.functions.md5(_to_java_column(col))
return Column(jc)
@since(1.4)
def monotonicallyIncreasingId():
"""A column that generates monotonically increasing 64-bit integers.
......@@ -365,6 +381,19 @@ def randn(seed=None):
return Column(jc)
@ignore_unicode_prefix
@since(1.5)
def sha1(col):
"""Returns the hex string result of SHA-1.
>>> sqlContext.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
[Row(hash=u'3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
"""
sc = SparkContext._active_spark_context
jc = sc._jvm.functions.sha1(_to_java_column(col))
return Column(jc)
@ignore_unicode_prefix
@since(1.5)
def sha2(col, numBits):
......@@ -383,19 +412,6 @@ def sha2(col, numBits):
return Column(jc)
@ignore_unicode_prefix
@since(1.5)
def sha1(col):
"""Returns the hex string result of SHA-1.
>>> sqlContext.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
[Row(hash=u'3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
"""
sc = SparkContext._active_spark_context
jc = sc._jvm.functions.sha1(_to_java_column(col))
return Column(jc)
@since(1.4)
def sparkPartitionId():
"""A column for partition ID of the Spark task.
......@@ -409,6 +425,18 @@ def sparkPartitionId():
return Column(sc._jvm.functions.sparkPartitionId())
@ignore_unicode_prefix
@since(1.5)
def strlen(col):
"""Calculates the length of a string expression.
>>> sqlContext.createDataFrame([('ABC',)], ['a']).select(strlen('a').alias('length')).collect()
[Row(length=3)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.strlen(_to_java_column(col)))
@ignore_unicode_prefix
@since(1.4)
def struct(*cols):
......@@ -471,6 +499,17 @@ def log(arg1, arg2=None):
return Column(jc)
@since(1.5)
def log2(col):
"""Returns the base-2 logarithm of the argument.
>>> sqlContext.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect()
[Row(log2=2.0)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.log2(_to_java_column(col)))
@since(1.4)
def lag(col, count=1, default=None):
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment