From 7bf4da8a33c33b03bbfddc698335fe9b86ce1e0e Mon Sep 17 00:00:00 2001 From: Bryan Cutler <cutlerb@gmail.com> Date: Thu, 28 Sep 2017 10:24:51 +0900 Subject: [PATCH] [MINOR] Fixed up pandas_udf related docs and formatting ## What changes were proposed in this pull request? Fixed some minor issues with pandas_udf related docs and formatting. ## How was this patch tested? NA Author: Bryan Cutler <cutlerb@gmail.com> Closes #19375 from BryanCutler/arrow-pandas_udf-cleanup-minor. --- python/pyspark/serializers.py | 6 +++--- python/pyspark/sql/functions.py | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index db77b7e150..ad18bd0c81 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -191,7 +191,7 @@ class FramedSerializer(Serializer): class ArrowSerializer(FramedSerializer): """ - Serializes an Arrow stream. + Serializes bytes as Arrow data with the Arrow file format. """ def dumps(self, batch): @@ -239,7 +239,7 @@ class ArrowStreamPandasSerializer(Serializer): def dump_stream(self, iterator, stream): """ - Make ArrowRecordBatches from Pandas Serieses and serialize. Input is a single series or + Make ArrowRecordBatches from Pandas Series and serialize. Input is a single series or a list of series accompanied by an optional pyarrow type to coerce the data to. """ import pyarrow as pa @@ -257,7 +257,7 @@ class ArrowStreamPandasSerializer(Serializer): def load_stream(self, stream): """ - Deserialize ArrowRecordBatchs to an Arrow table and return as a list of pandas.Series. + Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series. """ import pyarrow as pa reader = pa.open_stream(stream) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 63e9a830bb..b45a59db93 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2199,16 +2199,14 @@ def pandas_udf(f=None, returnType=StringType()): ... >>> df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name", "age")) >>> df.select(slen("name").alias("slen(name)"), to_upper("name"), add_one("age")) \\ - ... .show() # doctest: +SKIP + ... .show() # doctest: +SKIP +----------+--------------+------------+ |slen(name)|to_upper(name)|add_one(age)| +----------+--------------+------------+ | 8| JOHN DOE| 22| +----------+--------------+------------+ """ - wrapped_udf = _create_udf(f, returnType=returnType, vectorized=True) - - return wrapped_udf + return _create_udf(f, returnType=returnType, vectorized=True) blacklist = ['map', 'since', 'ignore_unicode_prefix'] -- GitLab