From 1150a19b188a075166899fdb1e107b2ba1e505d8 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@outlook.com>
Date: Fri, 14 Aug 2015 14:09:46 -0700
Subject: [SPARK-8670] [SQL] Nested columns can't be referenced in pyspark

This bug is caused by a wrong column-exist-check in `__getitem__` of pyspark dataframe. `DataFrame.apply` accepts not only top level column names, but also nested column name like `a.b`, so we should remove that check from `__getitem__`.

Author: Wenchen Fan <cloud0fan@outlook.com>

Closes #8202 from cloud-fan/nested.
---
 python/pyspark/sql/dataframe.py | 2 --
 1 file changed, 2 deletions(-)

(limited to 'python/pyspark/sql/dataframe.py')

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 09647ff6d0..da742d7ce7 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -722,8 +722,6 @@ class DataFrame(object):
         [Row(age=5, name=u'Bob')]
         """
         if isinstance(item, basestring):
-            if item not in self.columns:
-                raise IndexError("no such column: %s" % item)
             jc = self._jdf.apply(item)
             return Column(jc)
         elif isinstance(item, Column):
-- 
cgit v1.2.3