diff options
author | Cheng Lian <lian@databricks.com> | 2016-01-24 19:40:34 -0800 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-01-24 19:40:34 -0800 |
commit | 3327fd28170b549516fee1972dc6f4c32541591b (patch) | |
tree | 9d5a2d7a0fd49bd77907f3199d46b360b9fc42c7 /python | |
parent | e789b1d2c1eab6187f54424ed92697ca200c3101 (diff) | |
download | spark-3327fd28170b549516fee1972dc6f4c32541591b.tar.gz spark-3327fd28170b549516fee1972dc6f4c32541591b.tar.bz2 spark-3327fd28170b549516fee1972dc6f4c32541591b.zip |
[SPARK-12624][PYSPARK] Checks row length when converting Java arrays to Python rows
When actual row length doesn't conform to specified schema field length, we should give a better error message instead of throwing an unintuitive `ArrayOutOfBoundsException`.
Author: Cheng Lian <lian@databricks.com>
Closes #10886 from liancheng/spark-12624.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/tests.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index ae8620274d..7593b991a7 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -364,6 +364,15 @@ class SQLTests(ReusedPySparkTestCase): df3 = self.sqlCtx.createDataFrame(rdd, df.schema) self.assertEqual(10, df3.count()) + def test_create_dataframe_schema_mismatch(self): + input = [Row(a=1)] + rdd = self.sc.parallelize(range(3)).map(lambda i: Row(a=i)) + schema = StructType([StructField("a", IntegerType()), StructField("b", StringType())]) + df = self.sqlCtx.createDataFrame(rdd, schema) + message = ".*Input row doesn't have expected number of values required by the schema.*" + with self.assertRaisesRegexp(Exception, message): + df.show() + def test_serialize_nested_array_and_map(self): d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})] rdd = self.sc.parallelize(d) |