diff options
author | Davies Liu <davies@databricks.com> | 2015-06-10 16:55:39 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-06-10 16:55:39 -0700 |
commit | 37719e0cd0b00cc5ffee0ebe1652d465a574db0f (patch) | |
tree | 326c8178ed25ef17135ed2978c6dbfaf9e7593e3 /python/pyspark/sql/types.py | |
parent | b928f543845ddd39e914a0e8f0b0205fd86100c5 (diff) | |
download | spark-37719e0cd0b00cc5ffee0ebe1652d465a574db0f.tar.gz spark-37719e0cd0b00cc5ffee0ebe1652d465a574db0f.tar.bz2 spark-37719e0cd0b00cc5ffee0ebe1652d465a574db0f.zip |
[SPARK-8189] [SQL] use Long for TimestampType in SQL
This PR change to use Long as internal type for TimestampType for efficiency, which means it will the precision below 100ns.
Author: Davies Liu <davies@databricks.com>
Closes #6733 from davies/timestamp and squashes the following commits:
d9565fa [Davies Liu] remove print
65cf2f1 [Davies Liu] fix Timestamp in SparkR
86fecfb [Davies Liu] disable two timestamp tests
8f77ee0 [Davies Liu] fix scala style
246ee74 [Davies Liu] address comments
309d2e1 [Davies Liu] use Long for TimestampType in SQL
Diffstat (limited to 'python/pyspark/sql/types.py')
-rw-r--r-- | python/pyspark/sql/types.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index b6ec6137c9..8f286b631f 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -19,6 +19,7 @@ import sys import decimal import time import datetime +import calendar import keyword import warnings import json @@ -654,6 +655,8 @@ def _need_python_to_sql_conversion(dataType): _need_python_to_sql_conversion(dataType.valueType) elif isinstance(dataType, UserDefinedType): return True + elif isinstance(dataType, TimestampType): + return True else: return False @@ -707,6 +710,14 @@ def _python_to_sql_converter(dataType): return lambda m: dict([(key_converter(k), value_converter(v)) for k, v in m.items()]) elif isinstance(dataType, UserDefinedType): return lambda obj: dataType.serialize(obj) + elif isinstance(dataType, TimestampType): + + def to_posix_timstamp(dt): + if dt.tzinfo is None: + return int(time.mktime(dt.timetuple()) * 1e7 + dt.microsecond * 10) + else: + return int(calendar.timegm(dt.utctimetuple()) * 1e7 + dt.microsecond * 10) + return to_posix_timstamp else: raise ValueError("Unexpected type %r" % dataType) |