aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorx1- <viva008@gmail.com>2015-06-30 20:35:46 -0700
committerDavies Liu <davies@databricks.com>2015-06-30 20:38:05 -0700
commit214550b83f31837c6122636c0c84f77d3bb87273 (patch)
tree4fb369e654be93a2390a2bb13b5956a5aa8bc7ee
parente7dafd433eee089afcc0eeab02295deae90be7d3 (diff)
downloadspark-214550b83f31837c6122636c0c84f77d3bb87273.tar.gz
spark-214550b83f31837c6122636c0c84f77d3bb87273.tar.bz2
spark-214550b83f31837c6122636c0c84f77d3bb87273.zip
[SPARK-8535] [PYSPARK] PySpark : Can't create DataFrame from Pandas dataframe with no explicit column name
Because implicit name of `pandas.columns` are Int, but `StructField` json expect `String`. So I think `pandas.columns` are should be convert to `String`. ### issue * [SPARK-8535 PySpark : Can't create DataFrame from Pandas dataframe with no explicit column name](https://issues.apache.org/jira/browse/SPARK-8535) Author: x1- <viva008@gmail.com> Closes #7124 from x1-/SPARK-8535 and squashes the following commits: d68fd38 [x1-] modify unit-test using pandas. ea1897d [x1-] For implicit name of pandas.columns are Int, so should be convert to String. (cherry picked from commit b6e76edf3005c078b407f63b0a05d3a28c18c742) Signed-off-by: Davies Liu <davies@databricks.com>
-rw-r--r--python/pyspark/sql/context.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 1bebfc4837..7978f50019 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -309,13 +309,15 @@ class SQLContext(object):
>>> sqlContext.createDataFrame(df.toPandas()).collect() # doctest: +SKIP
[Row(name=u'Alice', age=1)]
+ >>> sqlContext.createDataFrame(pandas.DataFrame([[1, 2]]).collect()) # doctest: +SKIP
+ [Row(0=1, 1=2)]
"""
if isinstance(data, DataFrame):
raise TypeError("data is already a DataFrame")
if has_pandas and isinstance(data, pandas.DataFrame):
if schema is None:
- schema = list(data.columns)
+ schema = [str(x) for x in data.columns]
data = [r.tolist() for r in data.to_records(index=False)]
if not isinstance(data, RDD):