diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 6831f9b7f8b95aac5e82f7d16cb0597289a086a8..657fe6f98975bfa2ea86bc6435a868f0a97d4661 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -256,8 +256,10 @@ def _test():
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
     globs['tempdir'] = tempfile.mkdtemp()
     atexit.register(lambda: shutil.rmtree(globs['tempdir']))
-    doctest.testmod(globs=globs)
+    (failure_count, test_count) = doctest.testmod(globs=globs)
     globs['sc'].stop()
+    if failure_count:
+        exit(-1)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 41ea6e6e14c07b9c044f9e54372a80947dd46349..fb144bc45d271ab94512c7e8e2e88da747574445 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -748,8 +748,10 @@ def _test():
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
     globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
-    doctest.testmod(globs=globs)
+    (failure_count, test_count) = doctest.testmod(globs=globs)
     globs['sc'].stop()
+    if failure_count:
+        exit(-1)
 
 
 if __name__ == "__main__":