aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <simonh@tw.ibm.com>2016-05-18 11:18:33 -0700
committerDavies Liu <davies.liu@gmail.com>2016-05-18 11:18:33 -0700
commit3d1e67f903ab3512fcad82b94b1825578f8117c9 (patch)
tree6f392bcbfbf0836ce44bccd95fab53a4d27e4b6b /python
parent8fb1d1c7f3ed1b62625052a532b7388ebec71bbf (diff)
downloadspark-3d1e67f903ab3512fcad82b94b1825578f8117c9.tar.gz
spark-3d1e67f903ab3512fcad82b94b1825578f8117c9.tar.bz2
spark-3d1e67f903ab3512fcad82b94b1825578f8117c9.zip
[SPARK-15342] [SQL] [PYSPARK] PySpark test for non ascii column name does not actually test with unicode column name
## What changes were proposed in this pull request? The PySpark SQL `test_column_name_with_non_ascii` wants to test non-ascii column name. But it doesn't actually test it. We need to construct an unicode explicitly using `unicode` under Python 2. ## How was this patch tested? Existing tests. Author: Liang-Chi Hsieh <simonh@tw.ibm.com> Closes #13134 from viirya/correct-non-ascii-colname-pytest.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/tests.py11
-rw-r--r--python/pyspark/sql/types.py3
2 files changed, 11 insertions, 3 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index e86f44281d..1790432edd 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1044,8 +1044,15 @@ class SQLTests(ReusedPySparkTestCase):
self.assertRaises(TypeError, lambda: df[{}])
def test_column_name_with_non_ascii(self):
- df = self.spark.createDataFrame([(1,)], ["数量"])
- self.assertEqual(StructType([StructField("数量", LongType(), True)]), df.schema)
+ if sys.version >= '3':
+ columnName = "数量"
+ self.assertTrue(isinstance(columnName, str))
+ else:
+ columnName = unicode("数量", "utf-8")
+ self.assertTrue(isinstance(columnName, unicode))
+ schema = StructType([StructField(columnName, LongType(), True)])
+ df = self.spark.createDataFrame([(1,)], schema)
+ self.assertEqual(schema, df.schema)
self.assertEqual("DataFrame[数量: bigint]", str(df))
self.assertEqual([("数量", 'bigint')], df.dtypes)
self.assertEqual(1, df.select("数量").first()[0])
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 30ab130f29..7d8d0230b4 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -27,7 +27,7 @@ from array import array
if sys.version >= "3":
long = int
- unicode = str
+ basestring = unicode = str
from py4j.protocol import register_input_converter
from py4j.java_gateway import JavaClass
@@ -401,6 +401,7 @@ class StructField(DataType):
False
"""
assert isinstance(dataType, DataType), "dataType should be DataType"
+ assert isinstance(name, basestring), "field name should be string"
if not isinstance(name, str):
name = name.encode('utf-8')
self.name = name