diff options
author | Davies Liu <davies@databricks.com> | 2015-07-08 18:22:53 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-07-08 18:22:53 -0700 |
commit | 74d8d3d928cc9a7386b68588ac89ae042847d146 (patch) | |
tree | 0248cc711322eb4a7a6966e9cfbf3a90ca886733 /python/pyspark | |
parent | 2a4f88b6c16f2991e63b17c0e103bcd79f04dbbc (diff) | |
download | spark-74d8d3d928cc9a7386b68588ac89ae042847d146.tar.gz spark-74d8d3d928cc9a7386b68588ac89ae042847d146.tar.bz2 spark-74d8d3d928cc9a7386b68588ac89ae042847d146.zip |
[SPARK-8450] [SQL] [PYSARK] cleanup type converter for Python DataFrame
This PR fixes the converter for Python DataFrame, especially for DecimalType
Closes #7106
Author: Davies Liu <davies@databricks.com>
Closes #7131 from davies/decimal_python and squashes the following commits:
4d3c234 [Davies Liu] Merge branch 'master' of github.com:apache/spark into decimal_python
20531d6 [Davies Liu] Merge branch 'master' of github.com:apache/spark into decimal_python
7d73168 [Davies Liu] fix conflit
6cdd86a [Davies Liu] Merge branch 'master' of github.com:apache/spark into decimal_python
7104e97 [Davies Liu] improve type infer
9cd5a21 [Davies Liu] run python tests with SPARK_PREPEND_CLASSES
829a05b [Davies Liu] fix UDT in python
c99e8c5 [Davies Liu] fix mima
c46814a [Davies Liu] convert decimal for Python DataFrames
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/sql/tests.py | 13 | ||||
-rw-r--r-- | python/pyspark/sql/types.py | 4 |
2 files changed, 17 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 333378c7f1..66827d4885 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -700,6 +700,19 @@ class SQLTests(ReusedPySparkTestCase): self.assertTrue(now - now1 < datetime.timedelta(0.001)) self.assertTrue(now - utcnow1 < datetime.timedelta(0.001)) + def test_decimal(self): + from decimal import Decimal + schema = StructType([StructField("decimal", DecimalType(10, 5))]) + df = self.sqlCtx.createDataFrame([(Decimal("3.14159"),)], schema) + row = df.select(df.decimal + 1).first() + self.assertEqual(row[0], Decimal("4.14159")) + tmpPath = tempfile.mkdtemp() + shutil.rmtree(tmpPath) + df.write.parquet(tmpPath) + df2 = self.sqlCtx.read.parquet(tmpPath) + row = df2.first() + self.assertEqual(row[0], Decimal("3.14159")) + def test_dropna(self): schema = StructType([ StructField("name", StringType(), True), diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 160df40d65..7e64cb0b54 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -1069,6 +1069,10 @@ def _verify_type(obj, dataType): if obj is None: return + # StringType can work with any types + if isinstance(dataType, StringType): + return + if isinstance(dataType, UserDefinedType): if not (hasattr(obj, '__UDT__') and obj.__UDT__ == dataType): raise ValueError("%r is not an instance of type %r" % (obj, dataType)) |