diff options
author | x1- <viva008@gmail.com> | 2015-06-30 20:35:46 -0700 |
---|---|---|
committer | Davies Liu <davies@databricks.com> | 2015-06-30 20:35:46 -0700 |
commit | b6e76edf3005c078b407f63b0a05d3a28c18c742 (patch) | |
tree | f49f86c94802d54b5b78e2add5f169d677193753 | |
parent | f457569886e9de9256ad269cb4a3d73a8918766d (diff) | |
download | spark-b6e76edf3005c078b407f63b0a05d3a28c18c742.tar.gz spark-b6e76edf3005c078b407f63b0a05d3a28c18c742.tar.bz2 spark-b6e76edf3005c078b407f63b0a05d3a28c18c742.zip |
[SPARK-8535] [PYSPARK] PySpark : Can't create DataFrame from Pandas dataframe with no explicit column name
Because implicit name of `pandas.columns` are Int, but `StructField` json expect `String`.
So I think `pandas.columns` are should be convert to `String`.
### issue
* [SPARK-8535 PySpark : Can't create DataFrame from Pandas dataframe with no explicit column name](https://issues.apache.org/jira/browse/SPARK-8535)
Author: x1- <viva008@gmail.com>
Closes #7124 from x1-/SPARK-8535 and squashes the following commits:
d68fd38 [x1-] modify unit-test using pandas.
ea1897d [x1-] For implicit name of pandas.columns are Int, so should be convert to String.
-rw-r--r-- | python/pyspark/sql/context.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 4bf232111c..309c11faf9 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -344,13 +344,15 @@ class SQLContext(object): >>> sqlContext.createDataFrame(df.toPandas()).collect() # doctest: +SKIP [Row(name=u'Alice', age=1)] + >>> sqlContext.createDataFrame(pandas.DataFrame([[1, 2]]).collect()) # doctest: +SKIP + [Row(0=1, 1=2)] """ if isinstance(data, DataFrame): raise TypeError("data is already a DataFrame") if has_pandas and isinstance(data, pandas.DataFrame): if schema is None: - schema = list(data.columns) + schema = [str(x) for x in data.columns] data = [r.tolist() for r in data.to_records(index=False)] if not isinstance(data, RDD): |