From b86db517b6a2795f687211205b6a14c8685873eb Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Sun, 20 Jul 2014 18:40:36 -0700
Subject: [PATCH] [SPARK-2552][MLLIB] stabilize logistic function in pyspark

to avoid overflow in `exp(x)` if `x` is large.

Author: Xiangrui Meng <meng@databricks.com>

Closes #1493 from mengxr/py-logistic and squashes the following commits:

259e863 [Xiangrui Meng] stabilize logistic function in pyspark
---
 python/pyspark/mllib/classification.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 1c0c536c4f..9e28dfbb91 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -63,7 +63,10 @@ class LogisticRegressionModel(LinearModel):
     def predict(self, x):
         _linear_predictor_typecheck(x, self._coeff)
         margin = _dot(x, self._coeff) + self._intercept
-        prob = 1/(1 + exp(-margin))
+        if margin > 0:
+            prob = 1 / (1 + exp(-margin))
+        else:
+            prob = 1 - 1 / (1 + exp(margin))
         return 1 if prob > 0.5 else 0
 
 
-- 
GitLab