From b6e76edf3005c078b407f63b0a05d3a28c18c742 Mon Sep 17 00:00:00 2001
From: x1- <viva008@gmail.com>
Date: Tue, 30 Jun 2015 20:35:46 -0700
Subject: [SPARK-8535] [PYSPARK] PySpark : Can't create DataFrame from Pandas
 dataframe with no explicit column name

Because implicit name of `pandas.columns` are Int, but `StructField` json expect `String`.
So I think `pandas.columns` are should be convert to `String`.

### issue

* [SPARK-8535 PySpark : Can't create DataFrame from Pandas dataframe with no explicit column name](https://issues.apache.org/jira/browse/SPARK-8535)

Author: x1- <viva008@gmail.com>

Closes #7124 from x1-/SPARK-8535 and squashes the following commits:

d68fd38 [x1-] modify unit-test using pandas.
ea1897d [x1-] For implicit name of pandas.columns are Int, so should be convert to String.
---
 python/pyspark/sql/context.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'python')

diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 4bf232111c..309c11faf9 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -344,13 +344,15 @@ class SQLContext(object):
 
         >>> sqlContext.createDataFrame(df.toPandas()).collect()  # doctest: +SKIP
         [Row(name=u'Alice', age=1)]
+        >>> sqlContext.createDataFrame(pandas.DataFrame([[1, 2]]).collect())  # doctest: +SKIP
+        [Row(0=1, 1=2)]
         """
         if isinstance(data, DataFrame):
             raise TypeError("data is already a DataFrame")
 
         if has_pandas and isinstance(data, pandas.DataFrame):
             if schema is None:
-                schema = list(data.columns)
+                schema = [str(x) for x in data.columns]
             data = [r.tolist() for r in data.to_records(index=False)]
 
         if not isinstance(data, RDD):
-- 
cgit v1.2.3