diff options
author | Jason White <jason.white@shopify.com> | 2017-03-07 13:14:37 -0800 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2017-03-07 13:14:37 -0800 |
commit | 6f4684622a951806bebe7652a14f7d1ce03e24c7 (patch) | |
tree | f96519c6144811ecf2279df9c82e244804d108fe /python/pyspark | |
parent | 49570ed05d44f96549c49929f35c1c202556731a (diff) | |
download | spark-6f4684622a951806bebe7652a14f7d1ce03e24c7.tar.gz spark-6f4684622a951806bebe7652a14f7d1ce03e24c7.tar.bz2 spark-6f4684622a951806bebe7652a14f7d1ce03e24c7.zip |
[SPARK-19561] [PYTHON] cast TimestampType.toInternal output to long
## What changes were proposed in this pull request?
Cast the output of `TimestampType.toInternal` to long to allow for proper Timestamp creation in DataFrames near the epoch.
## How was this patch tested?
Added a new test that fails without the change.
dongjoon-hyun davies Mind taking a look?
The contribution is my original work and I license the work to the project under the project’s open source license.
Author: Jason White <jason.white@shopify.com>
Closes #16896 from JasonMWhite/SPARK-19561.
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/sql/tests.py | 6 | ||||
-rw-r--r-- | python/pyspark/sql/types.py | 2 |
2 files changed, 7 insertions, 1 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 81f3d1d36a..4d48ef694d 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1555,6 +1555,12 @@ class SQLTests(ReusedPySparkTestCase): self.assertEqual(now, now1) self.assertEqual(now, utcnow1) + # regression test for SPARK-19561 + def test_datetime_at_epoch(self): + epoch = datetime.datetime.fromtimestamp(0) + df = self.spark.createDataFrame([Row(date=epoch)]) + self.assertEqual(df.first()['date'], epoch) + def test_decimal(self): from decimal import Decimal schema = StructType([StructField("decimal", DecimalType(10, 5))]) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 26b54a7fb3..1d31f25efa 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -189,7 +189,7 @@ class TimestampType(AtomicType): if dt is not None: seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())) - return int(seconds) * 1000000 + dt.microsecond + return long(seconds) * 1000000 + dt.microsecond def fromInternal(self, ts): if ts is not None: |