From 7bf4da8a33c33b03bbfddc698335fe9b86ce1e0e Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 28 Sep 2017 10:24:51 +0900
Subject: [PATCH] [MINOR] Fixed up pandas_udf related docs and formatting

## What changes were proposed in this pull request?

Fixed some minor issues with pandas_udf related docs and formatting.

## How was this patch tested?

NA

Author: Bryan Cutler <cutlerb@gmail.com>

Closes #19375 from BryanCutler/arrow-pandas_udf-cleanup-minor.
---
 python/pyspark/serializers.py   | 6 +++---
 python/pyspark/sql/functions.py | 6 ++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index db77b7e150..ad18bd0c81 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -191,7 +191,7 @@ class FramedSerializer(Serializer):
 
 class ArrowSerializer(FramedSerializer):
     """
-    Serializes an Arrow stream.
+    Serializes bytes as Arrow data with the Arrow file format.
     """
 
     def dumps(self, batch):
@@ -239,7 +239,7 @@ class ArrowStreamPandasSerializer(Serializer):
 
     def dump_stream(self, iterator, stream):
         """
-        Make ArrowRecordBatches from Pandas Serieses and serialize. Input is a single series or
+        Make ArrowRecordBatches from Pandas Series and serialize. Input is a single series or
         a list of series accompanied by an optional pyarrow type to coerce the data to.
         """
         import pyarrow as pa
@@ -257,7 +257,7 @@ class ArrowStreamPandasSerializer(Serializer):
 
     def load_stream(self, stream):
         """
-        Deserialize ArrowRecordBatchs to an Arrow table and return as a list of pandas.Series.
+        Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series.
         """
         import pyarrow as pa
         reader = pa.open_stream(stream)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 63e9a830bb..b45a59db93 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2199,16 +2199,14 @@ def pandas_udf(f=None, returnType=StringType()):
     ...
     >>> df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name", "age"))
     >>> df.select(slen("name").alias("slen(name)"), to_upper("name"), add_one("age")) \\
-    ...     .show() # doctest: +SKIP
+    ...     .show()  # doctest: +SKIP
     +----------+--------------+------------+
     |slen(name)|to_upper(name)|add_one(age)|
     +----------+--------------+------------+
     |         8|      JOHN DOE|          22|
     +----------+--------------+------------+
     """
-    wrapped_udf = _create_udf(f, returnType=returnType, vectorized=True)
-
-    return wrapped_udf
+    return _create_udf(f, returnType=returnType, vectorized=True)
 
 
 blacklist = ['map', 'since', 'ignore_unicode_prefix']
-- 
GitLab