diff options
author | Yin Huai <yhuai@databricks.com> | 2015-04-23 18:52:55 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-04-23 18:52:55 -0700 |
commit | 2d010f7afe6ac8e67e07da6bea700e9e8c9e6cc2 (patch) | |
tree | 880b031fd7da1b43dfc5909456184e40d6ee3fa2 /python | |
parent | 336f7f5373e5f6960ecd9967d3703c8507e329ec (diff) | |
download | spark-2d010f7afe6ac8e67e07da6bea700e9e8c9e6cc2.tar.gz spark-2d010f7afe6ac8e67e07da6bea700e9e8c9e6cc2.tar.bz2 spark-2d010f7afe6ac8e67e07da6bea700e9e8c9e6cc2.zip |
[SPARK-7060][SQL] Add alias function to python dataframe
This pr tries to provide a way to let python users workaround https://issues.apache.org/jira/browse/SPARK-6231.
Author: Yin Huai <yhuai@databricks.com>
Closes #5634 from yhuai/pythonDFAlias and squashes the following commits:
8465acd [Yin Huai] Add an alias to a Python DF.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/dataframe.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index c8c30ce402..4759f5fe78 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -453,6 +453,20 @@ class DataFrame(object): return [f.name for f in self.schema.fields] @ignore_unicode_prefix + def alias(self, alias): + """Returns a new :class:`DataFrame` with an alias set. + + >>> from pyspark.sql.functions import * + >>> df_as1 = df.alias("df_as1") + >>> df_as2 = df.alias("df_as2") + >>> joined_df = df_as1.join(df_as2, col("df_as1.name") == col("df_as2.name"), 'inner') + >>> joined_df.select(col("df_as1.name"), col("df_as2.name"), col("df_as2.age")).collect() + [Row(name=u'Alice', name=u'Alice', age=2), Row(name=u'Bob', name=u'Bob', age=5)] + """ + assert isinstance(alias, basestring), "alias should be a string" + return DataFrame(getattr(self._jdf, "as")(alias), self.sql_ctx) + + @ignore_unicode_prefix def join(self, other, joinExprs=None, joinType=None): """Joins with another :class:`DataFrame`, using the given join expression. |