[SPARK-2674] [SQL] [PySpark] support datetime type for SchemaRDD

Datetime and time in Python will be converted into java.util.Calendar after serialization, it will be converted into java.sql.Timestamp during inferSchema(). In javaToPython(), Timestamp will be converted into Calendar, then be converted into datetime in Python after pickling. Author: Davies Liu <davies.liu@gmail.com> Closes #1601 from davies/date and squashes the following commits: f0599b0 [Davies Liu] remove tests for sets and tuple in sql, fix list of list c9d607a [Davies Liu] convert datetype for runtime 709d40d [Davies Liu] remove brackets 96db384 [Davies Liu] support datetime type for SchemaRDD
author: Davies Liu <davies.liu@gmail.com> 2014-07-29 12:31:39 -0700
committer: Michael Armbrust <michael@databricks.com> 2014-07-29 12:31:39 -0700
commit: f0d880e288eba97c86dceb1b5edab4f3a935943b (patch)
tree: 90475addac0ecb27d1a26f05ebb52e9628cd97ff /python/pyspark/sql.py
parent: e3643485de8fdaf5c52b266fead1b13214f29d5e (diff)
download: spark-f0d880e288eba97c86dceb1b5edab4f3a935943b.tar.gz
spark-f0d880e288eba97c86dceb1b5edab4f3a935943b.tar.bz2
spark-f0d880e288eba97c86dceb1b5edab4f3a935943b.zip
1 files changed, 12 insertions, 10 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index cb83e89176..a6b3277db3 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -47,12 +47,14 @@ class SQLContext:
             ...
         ValueError:...
 
-        >>> allTypes = sc.parallelize([{"int" : 1, "string" : "string", "double" : 1.0, "long": 1L,
-        ... "boolean" : True}])
+        >>> from datetime import datetime
+        >>> allTypes = sc.parallelize([{"int": 1, "string": "string", "double": 1.0, "long": 1L,
+        ... "boolean": True, "time": datetime(2010, 1, 1, 1, 1, 1), "dict": {"a": 1},
+        ... "list": [1, 2, 3]}])
         >>> srdd = sqlCtx.inferSchema(allTypes).map(lambda x: (x.int, x.string, x.double, x.long,
-        ... x.boolean))
+        ... x.boolean, x.time, x.dict["a"], x.list))
         >>> srdd.collect()[0]
-        (1, u'string', 1.0, 1, True)
+        (1, u'string', 1.0, 1, True, datetime.datetime(2010, 1, 1, 1, 1, 1), 1, [1, 2, 3])
         """
         self._sc = sparkContext
         self._jsc = self._sc._jsc
@@ -88,13 +90,13 @@ class SQLContext:
 
         >>> from array import array
         >>> srdd = sqlCtx.inferSchema(nestedRdd1)
-        >>> srdd.collect() == [{"f1" : array('i', [1, 2]), "f2" : {"row1" : 1.0}},
-        ...                    {"f1" : array('i', [2, 3]), "f2" : {"row2" : 2.0}}]
+        >>> srdd.collect() == [{"f1" : [1, 2], "f2" : {"row1" : 1.0}},
+        ...                    {"f1" : [2, 3], "f2" : {"row2" : 2.0}}]
         True
 
         >>> srdd = sqlCtx.inferSchema(nestedRdd2)
-        >>> srdd.collect() == [{"f1" : [[1, 2], [2, 3]], "f2" : set([1, 2]), "f3" : (1, 2)},
-        ...                    {"f1" : [[2, 3], [3, 4]], "f2" : set([2, 3]), "f3" : (2, 3)}]
+        >>> srdd.collect() == [{"f1" : [[1, 2], [2, 3]], "f2" : [1, 2]},
+        ...                    {"f1" : [[2, 3], [3, 4]], "f2" : [2, 3]}]
         True
         """
         if (rdd.__class__ is SchemaRDD):
@@ -509,8 +511,8 @@ def _test():
         {"f1": array('i', [1, 2]), "f2": {"row1": 1.0}},
         {"f1": array('i', [2, 3]), "f2": {"row2": 2.0}}])
     globs['nestedRdd2'] = sc.parallelize([
-        {"f1": [[1, 2], [2, 3]], "f2": set([1, 2]), "f3": (1, 2)},
-        {"f1": [[2, 3], [3, 4]], "f2": set([2, 3]), "f3": (2, 3)}])
+        {"f1": [[1, 2], [2, 3]], "f2": [1, 2]},
+        {"f1": [[2, 3], [3, 4]], "f2": [2, 3]}])
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs['sc'].stop()
     if failure_count:
author	Davies Liu <davies.liu@gmail.com>	2014-07-29 12:31:39 -0700
committer	Michael Armbrust <michael@databricks.com>	2014-07-29 12:31:39 -0700
commit	f0d880e288eba97c86dceb1b5edab4f3a935943b (patch)
tree	90475addac0ecb27d1a26f05ebb52e9628cd97ff /python/pyspark/sql.py
parent	e3643485de8fdaf5c52b266fead1b13214f29d5e (diff)
download	spark-f0d880e288eba97c86dceb1b5edab4f3a935943b.tar.gz spark-f0d880e288eba97c86dceb1b5edab4f3a935943b.tar.bz2 spark-f0d880e288eba97c86dceb1b5edab4f3a935943b.zip