[SPARK-5554] [SQL] [PySpark] add more tests for DataFrame Python API

Add more tests and docs for DataFrame Python API, improve test coverage, fix bugs. Author: Davies Liu <davies@databricks.com> Closes #4331 from davies/fix_df and squashes the following commits: dd9919f [Davies Liu] fix tests 467332c [Davies Liu] support string in cast() 83c92fe [Davies Liu] address comments c052f6f [Davies Liu] Merge branch 'master' of github.com:apache/spark into fix_df 8dd19a9 [Davies Liu] fix tests in python 2.6 35ccb9f [Davies Liu] fix build 78ebcfa [Davies Liu] add sql_test.py in run_tests 9ab78b4 [Davies Liu] Merge branch 'master' of github.com:apache/spark into fix_df 6040ba7 [Davies Liu] fix docs 3ab2661 [Davies Liu] add more tests for DataFrame
author: Davies Liu <davies@databricks.com> 2015-02-03 16:01:56 -0800
committer: Reynold Xin <rxin@databricks.com> 2015-02-03 16:01:56 -0800
commit: 068c0e2ee05ee8b133c2dc26b8fa094ab2712d45 (patch)
tree: ab40097fe86c2aae58b11d0f0160ae5d07ecfd94 /sql
parent: 1e8b5394b44a0d3b36f64f10576c3ae3b977810c (diff)
download: spark-068c0e2ee05ee8b133c2dc26b8fa094ab2712d45.tar.gz
spark-068c0e2ee05ee8b133c2dc26b8fa094ab2712d45.tar.bz2
spark-068c0e2ee05ee8b133c2dc26b8fa094ab2712d45.zip
2 files changed, 5 insertions, 35 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 4aa37219e1..ddce77deb8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -116,15 +116,6 @@ trait Column extends DataFrame {
   def unary_- : Column = exprToColumn(UnaryMinus(expr))
 
   /**
-   * Bitwise NOT.
-   * {{{
-   *   // Scala: select the flags column and negate every bit.
-   *   df.select( ~df("flags") )
-   * }}}
-   */
-  def unary_~ : Column = exprToColumn(BitwiseNot(expr))
-
-  /**
    * Inversion of boolean expression, i.e. NOT.
    * {{
    *   // Scala: select rows that are not active (isActive === false)
@@ -363,27 +354,6 @@ trait Column extends DataFrame {
   def and(other: Column): Column = this && other
 
   /**
-   * Bitwise AND.
-   */
-  def & (other: Any): Column = constructColumn(other) { o =>
-    BitwiseAnd(expr, o.expr)
-  }
-
-  /**
-   * Bitwise OR with an expression.
-   */
-  def | (other: Any): Column = constructColumn(other) { o =>
-    BitwiseOr(expr, o.expr)
-  }
-
-  /**
-   * Bitwise XOR with an expression.
-   */
-  def ^ (other: Any): Column = constructColumn(other) { o =>
-    BitwiseXor(expr, o.expr)
-  }
-
-  /**
    * Sum of this expression and another expression.
    * {{{
    *   // Scala: The following selects the sum of a person's height and weight.
@@ -527,16 +497,16 @@ trait Column extends DataFrame {
    * @param startPos expression for the starting position.
    * @param len expression for the length of the substring.
    */
-  def substr(startPos: Column, len: Column): Column = {
-    new IncomputableColumn(Substring(expr, startPos.expr, len.expr))
-  }
+  def substr(startPos: Column, len: Column): Column =
+    exprToColumn(Substring(expr, startPos.expr, len.expr), computable = false)
 
   /**
    * An expression that returns a substring.
    * @param startPos starting position.
    * @param len length of the substring.
    */
-  def substr(startPos: Int, len: Int): Column = this.substr(lit(startPos), lit(len))
+  def substr(startPos: Int, len: Int): Column =
+    exprToColumn(Substring(expr, lit(startPos).expr, lit(len).expr))
 
   def contains(other: Any): Column = constructColumn(other) { o =>
     Contains(expr, o.expr)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
index 006b16fbe0..e6f622e87f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/ExamplePointUDT.scala
@@ -37,7 +37,7 @@ private[sql] class ExamplePointUDT extends UserDefinedType[ExamplePoint] {
 
   override def sqlType: DataType = ArrayType(DoubleType, false)
 
-  override def pyUDT: String = "pyspark.tests.ExamplePointUDT"
+  override def pyUDT: String = "pyspark.sql_tests.ExamplePointUDT"
 
   override def serialize(obj: Any): Seq[Double] = {
     obj match {
author	Davies Liu <davies@databricks.com>	2015-02-03 16:01:56 -0800
committer	Reynold Xin <rxin@databricks.com>	2015-02-03 16:01:56 -0800
commit	068c0e2ee05ee8b133c2dc26b8fa094ab2712d45 (patch)
tree	ab40097fe86c2aae58b11d0f0160ae5d07ecfd94 /sql
parent	1e8b5394b44a0d3b36f64f10576c3ae3b977810c (diff)
download	spark-068c0e2ee05ee8b133c2dc26b8fa094ab2712d45.tar.gz spark-068c0e2ee05ee8b133c2dc26b8fa094ab2712d45.tar.bz2 spark-068c0e2ee05ee8b133c2dc26b8fa094ab2712d45.zip