diff options
author | Holden Karau <holden@us.ibm.com> | 2015-12-30 11:14:47 -0800 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-12-30 11:14:47 -0800 |
commit | d1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9 (patch) | |
tree | 14988ffcfe92aa04d851112c1ddf22ad8d11f153 /python/pyspark/sql/context.py | |
parent | aa48164a43bd9ed9eab53fcacbed92819e84eaf7 (diff) | |
download | spark-d1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9.tar.gz spark-d1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9.tar.bz2 spark-d1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9.zip |
[SPARK-12300] [SQL] [PYSPARK] fix schema inferance on local collections
Current schema inference for local python collections halts as soon as there are no NullTypes. This is different than when we specify a sampling ratio of 1.0 on a distributed collection. This could result in incomplete schema information.
Author: Holden Karau <holden@us.ibm.com>
Closes #10275 from holdenk/SPARK-12300-fix-schmea-inferance-on-local-collections.
Diffstat (limited to 'python/pyspark/sql/context.py')
-rw-r--r-- | python/pyspark/sql/context.py | 10 |
1 files changed, 3 insertions, 7 deletions
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index b05aa2f5c4..ba6915a123 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -18,6 +18,7 @@ import sys import warnings import json +from functools import reduce if sys.version >= '3': basestring = unicode = str @@ -236,14 +237,9 @@ class SQLContext(object): if type(first) is dict: warnings.warn("inferring schema from dict is deprecated," "please use pyspark.sql.Row instead") - schema = _infer_schema(first) + schema = reduce(_merge_type, map(_infer_schema, data)) if _has_nulltype(schema): - for r in data: - schema = _merge_type(schema, _infer_schema(r)) - if not _has_nulltype(schema): - break - else: - raise ValueError("Some of types cannot be determined after inferring") + raise ValueError("Some of types cannot be determined after inferring") return schema def _inferSchema(self, rdd, samplingRatio=None): |