aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorJason White <jason.white@shopify.com>2017-03-07 13:14:37 -0800
committerDavies Liu <davies.liu@gmail.com>2017-03-07 13:14:37 -0800
commit6f4684622a951806bebe7652a14f7d1ce03e24c7 (patch)
treef96519c6144811ecf2279df9c82e244804d108fe /python/pyspark
parent49570ed05d44f96549c49929f35c1c202556731a (diff)
downloadspark-6f4684622a951806bebe7652a14f7d1ce03e24c7.tar.gz
spark-6f4684622a951806bebe7652a14f7d1ce03e24c7.tar.bz2
spark-6f4684622a951806bebe7652a14f7d1ce03e24c7.zip
[SPARK-19561] [PYTHON] cast TimestampType.toInternal output to long
## What changes were proposed in this pull request? Cast the output of `TimestampType.toInternal` to long to allow for proper Timestamp creation in DataFrames near the epoch. ## How was this patch tested? Added a new test that fails without the change. dongjoon-hyun davies Mind taking a look? The contribution is my original work and I license the work to the project under the project’s open source license. Author: Jason White <jason.white@shopify.com> Closes #16896 from JasonMWhite/SPARK-19561.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql/tests.py6
-rw-r--r--python/pyspark/sql/types.py2
2 files changed, 7 insertions, 1 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 81f3d1d36a..4d48ef694d 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1555,6 +1555,12 @@ class SQLTests(ReusedPySparkTestCase):
self.assertEqual(now, now1)
self.assertEqual(now, utcnow1)
+ # regression test for SPARK-19561
+ def test_datetime_at_epoch(self):
+ epoch = datetime.datetime.fromtimestamp(0)
+ df = self.spark.createDataFrame([Row(date=epoch)])
+ self.assertEqual(df.first()['date'], epoch)
+
def test_decimal(self):
from decimal import Decimal
schema = StructType([StructField("decimal", DecimalType(10, 5))])
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 26b54a7fb3..1d31f25efa 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -189,7 +189,7 @@ class TimestampType(AtomicType):
if dt is not None:
seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo
else time.mktime(dt.timetuple()))
- return int(seconds) * 1000000 + dt.microsecond
+ return long(seconds) * 1000000 + dt.microsecond
def fromInternal(self, ts):
if ts is not None: