From 1150a19b188a075166899fdb1e107b2ba1e505d8 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 14 Aug 2015 14:09:46 -0700 Subject: [SPARK-8670] [SQL] Nested columns can't be referenced in pyspark This bug is caused by a wrong column-exist-check in `__getitem__` of pyspark dataframe. `DataFrame.apply` accepts not only top level column names, but also nested column name like `a.b`, so we should remove that check from `__getitem__`. Author: Wenchen Fan Closes #8202 from cloud-fan/nested. --- python/pyspark/sql/dataframe.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'python/pyspark/sql/dataframe.py') diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 09647ff6d0..da742d7ce7 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -722,8 +722,6 @@ class DataFrame(object): [Row(age=5, name=u'Bob')] """ if isinstance(item, basestring): - if item not in self.columns: - raise IndexError("no such column: %s" % item) jc = self._jdf.apply(item) return Column(jc) elif isinstance(item, Column): -- cgit v1.2.3