aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-06-10 16:55:39 -0700
committerReynold Xin <rxin@databricks.com>2015-06-10 16:55:39 -0700
commit37719e0cd0b00cc5ffee0ebe1652d465a574db0f (patch)
tree326c8178ed25ef17135ed2978c6dbfaf9e7593e3 /python/pyspark
parentb928f543845ddd39e914a0e8f0b0205fd86100c5 (diff)
downloadspark-37719e0cd0b00cc5ffee0ebe1652d465a574db0f.tar.gz
spark-37719e0cd0b00cc5ffee0ebe1652d465a574db0f.tar.bz2
spark-37719e0cd0b00cc5ffee0ebe1652d465a574db0f.zip
[SPARK-8189] [SQL] use Long for TimestampType in SQL
This PR change to use Long as internal type for TimestampType for efficiency, which means it will the precision below 100ns. Author: Davies Liu <davies@databricks.com> Closes #6733 from davies/timestamp and squashes the following commits: d9565fa [Davies Liu] remove print 65cf2f1 [Davies Liu] fix Timestamp in SparkR 86fecfb [Davies Liu] disable two timestamp tests 8f77ee0 [Davies Liu] fix scala style 246ee74 [Davies Liu] address comments 309d2e1 [Davies Liu] use Long for TimestampType in SQL
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql/types.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index b6ec6137c9..8f286b631f 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -19,6 +19,7 @@ import sys
import decimal
import time
import datetime
+import calendar
import keyword
import warnings
import json
@@ -654,6 +655,8 @@ def _need_python_to_sql_conversion(dataType):
_need_python_to_sql_conversion(dataType.valueType)
elif isinstance(dataType, UserDefinedType):
return True
+ elif isinstance(dataType, TimestampType):
+ return True
else:
return False
@@ -707,6 +710,14 @@ def _python_to_sql_converter(dataType):
return lambda m: dict([(key_converter(k), value_converter(v)) for k, v in m.items()])
elif isinstance(dataType, UserDefinedType):
return lambda obj: dataType.serialize(obj)
+ elif isinstance(dataType, TimestampType):
+
+ def to_posix_timstamp(dt):
+ if dt.tzinfo is None:
+ return int(time.mktime(dt.timetuple()) * 1e7 + dt.microsecond * 10)
+ else:
+ return int(calendar.timegm(dt.utctimetuple()) * 1e7 + dt.microsecond * 10)
+ return to_posix_timstamp
else:
raise ValueError("Unexpected type %r" % dataType)