aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-07-16 17:06:44 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-07-16 17:06:44 -0700
commitc33e4b0d96d424568963c7e716c20f02949c72d1 (patch)
tree23339971cf8b5aa6488a51e56f253a32b725a54d /R
parent416730483643a0a92dbd6ae4ad07e80ceb3c5285 (diff)
downloadspark-c33e4b0d96d424568963c7e716c20f02949c72d1.tar.gz
spark-c33e4b0d96d424568963c7e716c20f02949c72d1.tar.bz2
spark-c33e4b0d96d424568963c7e716c20f02949c72d1.zip
[SPARK-16507][SPARKR] Add a CRAN checker, fix Rd aliases
## What changes were proposed in this pull request? Add a check-cran.sh script that runs `R CMD check` as CRAN. Also fixes a number of issues pointed out by the check. These include - Updating `DESCRIPTION` to be appropriate - Adding a .Rbuildignore to ignore lintr, src-native, html that are non-standard files / dirs - Adding aliases to all S4 methods in DataFrame, Column, GroupedData etc. This is required as stated in https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Documenting-S4-classes-and-methods - Other minor fixes ## How was this patch tested? SparkR unit tests, running the above mentioned script Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu> Closes #14173 from shivaram/sparkr-cran-changes.
Diffstat (limited to 'R')
-rwxr-xr-xR/check-cran.sh52
-rw-r--r--R/pkg/.Rbuildignore5
-rw-r--r--R/pkg/DESCRIPTION8
-rw-r--r--R/pkg/NAMESPACE9
-rw-r--r--R/pkg/R/DataFrame.R105
-rw-r--r--R/pkg/R/SQLContext.R3
-rw-r--r--R/pkg/R/WindowSpec.R6
-rw-r--r--R/pkg/R/column.R27
-rw-r--r--R/pkg/R/functions.R436
-rw-r--r--R/pkg/R/generics.R8
-rw-r--r--R/pkg/R/group.R9
-rw-r--r--R/pkg/R/mllib.R4
-rw-r--r--R/pkg/R/schema.R16
-rw-r--r--R/pkg/R/stats.R6
-rw-r--r--R/pkg/R/utils.R20
-rw-r--r--R/pkg/R/window.R4
16 files changed, 675 insertions, 43 deletions
diff --git a/R/check-cran.sh b/R/check-cran.sh
new file mode 100755
index 0000000000..b3a6860961
--- /dev/null
+++ b/R/check-cran.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -o pipefail
+set -e
+
+FWDIR="$(cd `dirname $0`; pwd)"
+pushd $FWDIR > /dev/null
+
+if [ ! -z "$R_HOME" ]
+ then
+ R_SCRIPT_PATH="$R_HOME/bin"
+ else
+ # if system wide R_HOME is not found, then exit
+ if [ ! `command -v R` ]; then
+ echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
+ exit 1
+ fi
+ R_SCRIPT_PATH="$(dirname $(which R))"
+fi
+echo "USING R_HOME = $R_HOME"
+
+# Build the latest docs
+$FWDIR/create-docs.sh
+
+# Build a zip file containing the source package
+"$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
+
+# Run check as-cran.
+# TODO(shivaram): Remove the skip tests once we figure out the install mechanism
+
+VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
+
+"$R_SCRIPT_PATH/"R CMD check --as-cran --no-tests SparkR_"$VERSION".tar.gz
+
+popd > /dev/null
diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore
new file mode 100644
index 0000000000..544d203a6d
--- /dev/null
+++ b/R/pkg/.Rbuildignore
@@ -0,0 +1,5 @@
+^.*\.Rproj$
+^\.Rproj\.user$
+^\.lintr$
+^src-native$
+^html$
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 963a1bb580..ac73d6c798 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,12 +1,10 @@
Package: SparkR
Type: Package
-Title: R frontend for Spark
+Title: R Frontend for Apache Spark
Version: 2.0.0
-Date: 2013-09-09
+Date: 2016-07-07
Author: The Apache Software Foundation
Maintainer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
-Imports:
- methods
Depends:
R (>= 3.0),
methods,
@@ -14,7 +12,7 @@ Suggests:
testthat,
e1071,
survival
-Description: R frontend for Spark
+Description: The SparkR package provides an R frontend for Apache Spark.
License: Apache License (== 2.0)
Collate:
'schema.R'
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index fe52905e0a..1d74c6d955 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -343,3 +343,12 @@ export("partitionBy",
export("windowPartitionBy",
"windowOrderBy")
+
+S3method(print, jobj)
+S3method(print, structField)
+S3method(print, structType)
+S3method(print, summary.GeneralizedLinearRegressionModel)
+S3method(structField, character)
+S3method(structField, jobj)
+S3method(structType, jobj)
+S3method(structType, structField)
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 47f9203ace..2e99aa026d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -74,6 +74,7 @@ dataFrame <- function(sdf, isCached = FALSE) {
#' @family SparkDataFrame functions
#' @rdname printSchema
#' @name printSchema
+#' @aliases printSchema,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -99,6 +100,7 @@ setMethod("printSchema",
#' @family SparkDataFrame functions
#' @rdname schema
#' @name schema
+#' @aliases schema,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -121,6 +123,7 @@ setMethod("schema",
#' @param x A SparkDataFrame
#' @param extended Logical. If extended is FALSE, explain() only prints the physical plan.
#' @family SparkDataFrame functions
+#' @aliases explain,SparkDataFrame-method
#' @rdname explain
#' @name explain
#' @export
@@ -154,6 +157,7 @@ setMethod("explain",
#' @family SparkDataFrame functions
#' @rdname isLocal
#' @name isLocal
+#' @aliases isLocal,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -179,6 +183,7 @@ setMethod("isLocal",
#' truncated. However, if set greater than zero, truncates strings longer than `truncate`
#' characters and all cells will be aligned right.
#' @family SparkDataFrame functions
+#' @aliases showDF,SparkDataFrame-method
#' @rdname showDF
#' @name showDF
#' @export
@@ -210,6 +215,7 @@ setMethod("showDF",
#'
#' @family SparkDataFrame functions
#' @rdname show
+#' @aliases show,SparkDataFrame-method
#' @name show
#' @export
#' @examples
@@ -238,6 +244,7 @@ setMethod("show", "SparkDataFrame",
#' @family SparkDataFrame functions
#' @rdname dtypes
#' @name dtypes
+#' @aliases dtypes,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -264,7 +271,7 @@ setMethod("dtypes",
#' @family SparkDataFrame functions
#' @rdname columns
#' @name columns
-
+#' @aliases columns,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -285,6 +292,7 @@ setMethod("columns",
#' @rdname columns
#' @name names
+#' @aliases names,SparkDataFrame-method
#' @note names since 1.5.0
setMethod("names",
signature(x = "SparkDataFrame"),
@@ -293,6 +301,7 @@ setMethod("names",
})
#' @rdname columns
+#' @aliases names<-,SparkDataFrame-method
#' @name names<-
#' @note names<- since 1.5.0
setMethod("names<-",
@@ -305,6 +314,7 @@ setMethod("names<-",
})
#' @rdname columns
+#' @aliases colnames,SparkDataFrame-method
#' @name colnames
#' @note colnames since 1.6.0
setMethod("colnames",
@@ -314,6 +324,7 @@ setMethod("colnames",
})
#' @rdname columns
+#' @aliases colnames<-,SparkDataFrame-method
#' @name colnames<-
#' @note colnames<- since 1.6.0
setMethod("colnames<-",
@@ -350,6 +361,7 @@ setMethod("colnames<-",
#' @param x A SparkDataFrame
#' @return value A character vector with the column types of the given SparkDataFrame
#' @rdname coltypes
+#' @aliases coltypes,SparkDataFrame-method
#' @name coltypes
#' @family SparkDataFrame functions
#' @export
@@ -405,6 +417,7 @@ setMethod("coltypes",
#' to keep that column as-is.
#' @rdname coltypes
#' @name coltypes<-
+#' @aliases coltypes<-,SparkDataFrame,character-method
#' @export
#' @examples
#'\dontrun{
@@ -453,6 +466,7 @@ setMethod("coltypes<-",
#' @family SparkDataFrame functions
#' @rdname createOrReplaceTempView
#' @name createOrReplaceTempView
+#' @aliases createOrReplaceTempView,SparkDataFrame,character-method
#' @export
#' @examples
#'\dontrun{
@@ -479,6 +493,7 @@ setMethod("createOrReplaceTempView",
#' @seealso \link{createOrReplaceTempView}
#' @rdname registerTempTable-deprecated
#' @name registerTempTable
+#' @aliases registerTempTable,SparkDataFrame,character-method
#' @export
#' @examples
#'\dontrun{
@@ -508,6 +523,7 @@ setMethod("registerTempTable",
#' @family SparkDataFrame functions
#' @rdname insertInto
#' @name insertInto
+#' @aliases insertInto,SparkDataFrame,character-method
#' @export
#' @examples
#'\dontrun{
@@ -534,6 +550,7 @@ setMethod("insertInto",
#' @param x A SparkDataFrame
#'
#' @family SparkDataFrame functions
+#' @aliases cache,SparkDataFrame-method
#' @rdname cache
#' @name cache
#' @export
@@ -564,6 +581,7 @@ setMethod("cache",
#' @family SparkDataFrame functions
#' @rdname persist
#' @name persist
+#' @aliases persist,SparkDataFrame,character-method
#' @export
#' @examples
#'\dontrun{
@@ -591,6 +609,7 @@ setMethod("persist",
#'
#' @family SparkDataFrame functions
#' @rdname unpersist-methods
+#' @aliases unpersist,SparkDataFrame-method
#' @name unpersist
#' @export
#' @examples
@@ -627,6 +646,7 @@ setMethod("unpersist",
#' @family SparkDataFrame functions
#' @rdname repartition
#' @name repartition
+#' @aliases repartition,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -670,6 +690,7 @@ setMethod("repartition",
#'
#' @param x A SparkDataFrame
#' @return A StringRRDD of JSON objects
+#' @aliases toJSON,SparkDataFrame-method
#' @noRd
#' @examples
#'\dontrun{
@@ -697,6 +718,7 @@ setMethod("toJSON",
#' @family SparkDataFrame functions
#' @rdname write.json
#' @name write.json
+#' @aliases write.json,SparkDataFrame,character-method
#' @export
#' @examples
#'\dontrun{
@@ -722,6 +744,7 @@ setMethod("write.json",
#' @param path The directory where the file is saved
#'
#' @family SparkDataFrame functions
+#' @aliases write.orc,SparkDataFrame,character-method
#' @rdname write.orc
#' @name write.orc
#' @export
@@ -751,6 +774,7 @@ setMethod("write.orc",
#' @family SparkDataFrame functions
#' @rdname write.parquet
#' @name write.parquet
+#' @aliases write.parquet,SparkDataFrame,character-method
#' @export
#' @examples
#'\dontrun{
@@ -770,6 +794,7 @@ setMethod("write.parquet",
#' @rdname write.parquet
#' @name saveAsParquetFile
+#' @aliases saveAsParquetFile,SparkDataFrame,character-method
#' @export
#' @note saveAsParquetFile since 1.4.0
setMethod("saveAsParquetFile",
@@ -789,6 +814,7 @@ setMethod("saveAsParquetFile",
#' @param path The directory where the file is saved
#'
#' @family SparkDataFrame functions
+#' @aliases write.text,SparkDataFrame,character-method
#' @rdname write.text
#' @name write.text
#' @export
@@ -814,6 +840,7 @@ setMethod("write.text",
#' @param x A SparkDataFrame
#'
#' @family SparkDataFrame functions
+#' @aliases distinct,SparkDataFrame-method
#' @rdname distinct
#' @name distinct
#' @export
@@ -834,6 +861,7 @@ setMethod("distinct",
#' @rdname distinct
#' @name unique
+#' @aliases unique,SparkDataFrame-method
#' @note unique since 1.5.0
setMethod("unique",
signature(x = "SparkDataFrame"),
@@ -851,6 +879,7 @@ setMethod("unique",
#' @param seed Randomness seed value
#'
#' @family SparkDataFrame functions
+#' @aliases sample,SparkDataFrame,logical,numeric-method
#' @rdname sample
#' @name sample
#' @export
@@ -879,6 +908,7 @@ setMethod("sample",
})
#' @rdname sample
+#' @aliases sample_frac,SparkDataFrame,logical,numeric-method
#' @name sample_frac
#' @note sample_frac since 1.4.0
setMethod("sample_frac",
@@ -895,6 +925,7 @@ setMethod("sample_frac",
#' @family SparkDataFrame functions
#' @rdname nrow
#' @name count
+#' @aliases count,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -912,6 +943,7 @@ setMethod("count",
#' @name nrow
#' @rdname nrow
+#' @aliases nrow,SparkDataFrame-method
#' @note nrow since 1.5.0
setMethod("nrow",
signature(x = "SparkDataFrame"),
@@ -926,6 +958,7 @@ setMethod("nrow",
#' @family SparkDataFrame functions
#' @rdname ncol
#' @name ncol
+#' @aliases ncol,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -948,6 +981,7 @@ setMethod("ncol",
#'
#' @family SparkDataFrame functions
#' @rdname dim
+#' @aliases dim,SparkDataFrame-method
#' @name dim
#' @export
#' @examples
@@ -972,6 +1006,7 @@ setMethod("dim",
#'
#' @family SparkDataFrame functions
#' @rdname collect
+#' @aliases collect,SparkDataFrame-method
#' @name collect
#' @export
#' @examples
@@ -1045,6 +1080,7 @@ setMethod("collect",
#' @family SparkDataFrame functions
#' @rdname limit
#' @name limit
+#' @aliases limit,SparkDataFrame,numeric-method
#' @export
#' @examples
#' \dontrun{
@@ -1066,6 +1102,7 @@ setMethod("limit",
#' @family SparkDataFrame functions
#' @rdname take
#' @name take
+#' @aliases take,SparkDataFrame,numeric-method
#' @export
#' @examples
#'\dontrun{
@@ -1093,6 +1130,7 @@ setMethod("take",
#' @return A data.frame
#'
#' @family SparkDataFrame functions
+#' @aliases head,SparkDataFrame-method
#' @rdname head
#' @name head
#' @export
@@ -1116,6 +1154,7 @@ setMethod("head",
#' @param x A SparkDataFrame
#'
#' @family SparkDataFrame functions
+#' @aliases first,SparkDataFrame-method
#' @rdname first
#' @name first
#' @export
@@ -1166,6 +1205,7 @@ setMethod("toRDD",
#' @param x a SparkDataFrame
#' @return a GroupedData
#' @family SparkDataFrame functions
+#' @aliases groupBy,SparkDataFrame-method
#' @rdname groupBy
#' @name groupBy
#' @export
@@ -1193,6 +1233,7 @@ setMethod("groupBy",
#' @rdname groupBy
#' @name group_by
+#' @aliases group_by,SparkDataFrame-method
#' @note group_by since 1.4.0
setMethod("group_by",
signature(x = "SparkDataFrame"),
@@ -1206,7 +1247,8 @@ setMethod("group_by",
#'
#' @param x a SparkDataFrame
#' @family SparkDataFrame functions
-#' @rdname agg
+#' @aliases agg,SparkDataFrame-method
+#' @rdname summarize
#' @name agg
#' @export
#' @note agg since 1.4.0
@@ -1216,8 +1258,9 @@ setMethod("agg",
agg(groupBy(x), ...)
})
-#' @rdname agg
+#' @rdname summarize
#' @name summarize
+#' @aliases summarize,SparkDataFrame-method
#' @note summarize since 1.4.0
setMethod("summarize",
signature(x = "SparkDataFrame"),
@@ -1256,6 +1299,7 @@ dapplyInternal <- function(x, func, schema) {
#' It must match the output of func.
#' @family SparkDataFrame functions
#' @rdname dapply
+#' @aliases dapply,SparkDataFrame,function,structType-method
#' @name dapply
#' @seealso \link{dapplyCollect}
#' @export
@@ -1294,7 +1338,7 @@ setMethod("dapply",
#' dapplyCollect
#'
#' Apply a function to each partition of a SparkDataFrame and collect the result back
-#’ to R as a data.frame.
+#' to R as a data.frame.
#'
#' @param x A SparkDataFrame
#' @param func A function to be applied to each partition of the SparkDataFrame.
@@ -1303,6 +1347,7 @@ setMethod("dapply",
#' The output of func should be a R data.frame.
#' @family SparkDataFrame functions
#' @rdname dapplyCollect
+#' @aliases dapplyCollect,SparkDataFrame,function-method
#' @name dapplyCollect
#' @seealso \link{dapply}
#' @export
@@ -1358,6 +1403,7 @@ setMethod("dapplyCollect",
#' output column with preferred output column name and corresponding data type.
#' @return a SparkDataFrame
#' @family SparkDataFrame functions
+#' @aliases gapply,SparkDataFrame-method
#' @rdname gapply
#' @name gapply
#' @seealso \link{gapplyCollect}
@@ -1446,6 +1492,7 @@ setMethod("gapply",
#' The output of `func` is a local R data.frame.
#' @return a data.frame
#' @family SparkDataFrame functions
+#' @aliases gapplyCollect,SparkDataFrame-method
#' @rdname gapplyCollect
#' @name gapplyCollect
#' @seealso \link{gapply}
@@ -1592,6 +1639,7 @@ getColumn <- function(x, c) {
#' @rdname select
#' @name $
+#' @aliases $,SparkDataFrame-method
#' @note $ since 1.4.0
setMethod("$", signature(x = "SparkDataFrame"),
function(x, name) {
@@ -1600,6 +1648,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
#' @rdname select
#' @name $<-
+#' @aliases $<-,SparkDataFrame-method
#' @note $<- since 1.4.0
setMethod("$<-", signature(x = "SparkDataFrame"),
function(x, name, value) {
@@ -1618,6 +1667,7 @@ setClassUnion("numericOrcharacter", c("numeric", "character"))
#' @rdname subset
#' @name [[
+#' @aliases [[,SparkDataFrame,numericOrcharacter-method
#' @note [[ since 1.4.0
setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
function(x, i) {
@@ -1630,6 +1680,7 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
#' @rdname subset
#' @name [
+#' @aliases [,SparkDataFrame-method
#' @note [ since 1.4.0
setMethod("[", signature(x = "SparkDataFrame"),
function(x, i, j, ..., drop = F) {
@@ -1677,6 +1728,7 @@ setMethod("[", signature(x = "SparkDataFrame"),
#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns
#' @export
#' @family SparkDataFrame functions
+#' @aliases subset,SparkDataFrame-method
#' @rdname subset
#' @name subset
#' @family subsetting functions
@@ -1714,6 +1766,7 @@ setMethod("subset", signature(x = "SparkDataFrame"),
#' @export
#' @family SparkDataFrame functions
#' @rdname select
+#' @aliases select,SparkDataFrame,character-method
#' @name select
#' @family subsetting functions
#' @examples
@@ -1743,6 +1796,7 @@ setMethod("select", signature(x = "SparkDataFrame", col = "character"),
#' @rdname select
#' @export
+#' @aliases select,SparkDataFrame,Column-method
#' @note select(SparkDataFrame, Column) since 1.4.0
setMethod("select", signature(x = "SparkDataFrame", col = "Column"),
function(x, col, ...) {
@@ -1755,6 +1809,7 @@ setMethod("select", signature(x = "SparkDataFrame", col = "Column"),
#' @rdname select
#' @export
+#' @aliases select,SparkDataFrame,list-method
#' @note select(SparkDataFrame, list) since 1.4.0
setMethod("select",
signature(x = "SparkDataFrame", col = "list"),
@@ -1779,6 +1834,7 @@ setMethod("select",
#' @param ... Additional expressions
#' @return A SparkDataFrame
#' @family SparkDataFrame functions
+#' @aliases selectExpr,SparkDataFrame,character-method
#' @rdname selectExpr
#' @name selectExpr
#' @export
@@ -1808,6 +1864,7 @@ setMethod("selectExpr",
#' @param col A Column expression.
#' @return A SparkDataFrame with the new column added or the existing column replaced.
#' @family SparkDataFrame functions
+#' @aliases withColumn,SparkDataFrame,character,Column-method
#' @rdname withColumn
#' @name withColumn
#' @seealso \link{rename} \link{mutate}
@@ -1837,6 +1894,7 @@ setMethod("withColumn",
#' @param col a named argument of the form name = col
#' @return A new SparkDataFrame with the new columns added or replaced.
#' @family SparkDataFrame functions
+#' @aliases mutate,SparkDataFrame-method
#' @rdname mutate
#' @name mutate
#' @seealso \link{rename} \link{withColumn}
@@ -1912,6 +1970,7 @@ setMethod("mutate",
#' @export
#' @rdname mutate
+#' @aliases transform,SparkDataFrame-method
#' @name transform
#' @note transform since 1.5.0
setMethod("transform",
@@ -1931,6 +1990,7 @@ setMethod("transform",
#' @family SparkDataFrame functions
#' @rdname rename
#' @name withColumnRenamed
+#' @aliases withColumnRenamed,SparkDataFrame,character,character-method
#' @seealso \link{mutate}
#' @export
#' @examples
@@ -1957,6 +2017,7 @@ setMethod("withColumnRenamed",
#' @param ... A named pair of the form new_column_name = existing_column
#' @rdname rename
#' @name rename
+#' @aliases rename,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -1999,6 +2060,7 @@ setClassUnion("characterOrColumn", c("character", "Column"))
#' a character vector is specified for col
#' @return A SparkDataFrame where all elements are sorted.
#' @family SparkDataFrame functions
+#' @aliases arrange,SparkDataFrame,Column-method
#' @rdname arrange
#' @name arrange
#' @export
@@ -2026,6 +2088,7 @@ setMethod("arrange",
#' @rdname arrange
#' @name arrange
+#' @aliases arrange,SparkDataFrame,character-method
#' @export
#' @note arrange(SparkDataFrame, character) since 1.4.0
setMethod("arrange",
@@ -2059,6 +2122,7 @@ setMethod("arrange",
#' @rdname arrange
#' @name orderBy
+#' @aliases orderBy,SparkDataFrame,characterOrColumn-method
#' @export
#' @note orderBy(SparkDataFrame, characterOrColumn) since 1.4.0
setMethod("orderBy",
@@ -2076,6 +2140,7 @@ setMethod("orderBy",
#' or a string containing a SQL statement
#' @return A SparkDataFrame containing only the rows that meet the condition.
#' @family SparkDataFrame functions
+#' @aliases filter,SparkDataFrame,characterOrColumn-method
#' @rdname filter
#' @name filter
#' @family subsetting functions
@@ -2101,6 +2166,7 @@ setMethod("filter",
#' @rdname filter
#' @name where
+#' @aliases where,SparkDataFrame,characterOrColumn-method
#' @note where since 1.4.0
setMethod("where",
signature(x = "SparkDataFrame", condition = "characterOrColumn"),
@@ -2118,6 +2184,7 @@ setMethod("where",
#' If the first argument contains a character vector, the followings are ignored.
#' @return A SparkDataFrame with duplicate rows removed.
#' @family SparkDataFrame functions
+#' @aliases dropDuplicates,SparkDataFrame-method
#' @rdname dropDuplicates
#' @name dropDuplicates
#' @export
@@ -2164,6 +2231,7 @@ setMethod("dropDuplicates",
#' 'right_outer', 'rightouter', 'right', and 'leftsemi'. The default joinType is "inner".
#' @return A SparkDataFrame containing the result of the join operation.
#' @family SparkDataFrame functions
+#' @aliases join,SparkDataFrame,SparkDataFrame-method
#' @rdname join
#' @name join
#' @seealso \link{merge}
@@ -2223,6 +2291,7 @@ setMethod("join",
#' outer join will be returned. If all.x and all.y are set to TRUE, a full
#' outer join will be returned.
#' @family SparkDataFrame functions
+#' @aliases merge,SparkDataFrame,SparkDataFrame-method
#' @rdname merge
#' @seealso \link{join}
#' @export
@@ -2360,6 +2429,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
#' @family SparkDataFrame functions
#' @rdname union
#' @name union
+#' @aliases union,SparkDataFrame,SparkDataFrame-method
#' @seealso \link{rbind}
#' @export
#' @examples
@@ -2381,6 +2451,7 @@ setMethod("union",
#' unionAll is deprecated - use union instead
#' @rdname union
#' @name unionAll
+#' @aliases unionAll,SparkDataFrame,SparkDataFrame-method
#' @export
#' @note unionAll since 1.4.0
setMethod("unionAll",
@@ -2399,6 +2470,7 @@ setMethod("unionAll",
#' @param ... Additional SparkDataFrame
#' @return A SparkDataFrame containing the result of the union.
#' @family SparkDataFrame functions
+#' @aliases rbind,SparkDataFrame-method
#' @rdname rbind
#' @name rbind
#' @seealso \link{union}
@@ -2428,6 +2500,7 @@ setMethod("rbind",
#' @param y A SparkDataFrame
#' @return A SparkDataFrame containing the result of the intersect.
#' @family SparkDataFrame functions
+#' @aliases intersect,SparkDataFrame,SparkDataFrame-method
#' @rdname intersect
#' @name intersect
#' @export
@@ -2455,6 +2528,7 @@ setMethod("intersect",
#' @param y A SparkDataFrame
#' @return A SparkDataFrame containing the result of the except operation.
#' @family SparkDataFrame functions
+#' @aliases except,SparkDataFrame,SparkDataFrame-method
#' @rdname except
#' @name except
#' @export
@@ -2498,6 +2572,7 @@ setMethod("except",
#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
#'
#' @family SparkDataFrame functions
+#' @aliases write.df,SparkDataFrame,character-method
#' @rdname write.df
#' @name write.df
#' @export
@@ -2529,6 +2604,7 @@ setMethod("write.df",
#' @rdname write.df
#' @name saveDF
+#' @aliases saveDF,SparkDataFrame,character-method
#' @export
#' @note saveDF since 1.4.0
setMethod("saveDF",
@@ -2558,6 +2634,7 @@ setMethod("saveDF",
#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
#'
#' @family SparkDataFrame functions
+#' @aliases saveAsTable,SparkDataFrame,character-method
#' @rdname saveAsTable
#' @name saveAsTable
#' @export
@@ -2595,6 +2672,7 @@ setMethod("saveAsTable",
#' @param ... Additional expressions
#' @return A SparkDataFrame
#' @family SparkDataFrame functions
+#' @aliases describe,SparkDataFrame,character-method describe,SparkDataFrame,ANY-method
#' @rdname summary
#' @name describe
#' @export
@@ -2618,6 +2696,7 @@ setMethod("describe",
#' @rdname summary
#' @name describe
+#' @aliases describe,SparkDataFrame-method
#' @note describe(SparkDataFrame) since 1.4.0
setMethod("describe",
signature(x = "SparkDataFrame"),
@@ -2628,6 +2707,7 @@ setMethod("describe",
#' @rdname summary
#' @name summary
+#' @aliases summary,SparkDataFrame-method
#' @note summary(SparkDataFrame) since 1.5.0
setMethod("summary",
signature(object = "SparkDataFrame"),
@@ -2653,6 +2733,7 @@ setMethod("summary",
#'
#' @family SparkDataFrame functions
#' @rdname nafunctions
+#' @aliases dropna,SparkDataFrame-method
#' @name dropna
#' @export
#' @examples
@@ -2682,6 +2763,7 @@ setMethod("dropna",
#' @rdname nafunctions
#' @name na.omit
+#' @aliases na.omit,SparkDataFrame-method
#' @export
#' @note na.omit since 1.5.0
setMethod("na.omit",
@@ -2707,6 +2789,7 @@ setMethod("na.omit",
#'
#' @rdname nafunctions
#' @name fillna
+#' @aliases fillna,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -2770,6 +2853,7 @@ setMethod("fillna",
#' @param x a SparkDataFrame
#' @return a data.frame
#' @family SparkDataFrame functions
+#' @aliases as.data.frame,SparkDataFrame-method
#' @rdname as.data.frame
#' @examples \dontrun{
#'
@@ -2791,6 +2875,7 @@ setMethod("as.data.frame",
#'
#' @family SparkDataFrame functions
#' @rdname attach
+#' @aliases attach,SparkDataFrame-method
#' @param what (SparkDataFrame) The SparkDataFrame to attach
#' @param pos (integer) Specify position in search() where to attach.
#' @param name (character) Name to use for the attached SparkDataFrame. Names
@@ -2821,6 +2906,7 @@ setMethod("attach",
#'
#' @rdname with
#' @family SparkDataFrame functions
+#' @aliases with,SparkDataFrame-method
#' @param data (SparkDataFrame) SparkDataFrame to use for constructing an environment.
#' @param expr (expression) Expression to evaluate.
#' @param ... arguments to be passed to future methods.
@@ -2844,6 +2930,7 @@ setMethod("with",
#'
#' @name str
#' @rdname str
+#' @aliases str,SparkDataFrame-method
#' @family SparkDataFrame functions
#' @param object a SparkDataFrame
#' @examples \dontrun{
@@ -2925,6 +3012,7 @@ setMethod("str",
#' @family SparkDataFrame functions
#' @rdname drop
#' @name drop
+#' @aliases drop,SparkDataFrame-method
#' @export
#' @examples
#'\dontrun{
@@ -2950,6 +3038,10 @@ setMethod("drop",
})
# Expose base::drop
+#' @name drop
+#' @rdname drop
+#' @aliases drop,ANY-method
+#' @export
setMethod("drop",
signature(x = "ANY"),
function(x) {
@@ -2966,6 +3058,7 @@ setMethod("drop",
#' @param colname the name of the column to build the histogram from.
#' @return a data.frame with the histogram statistics, i.e., counts and centroids.
#' @rdname histogram
+#' @aliases histogram,SparkDataFrame,characterOrColumn-method
#' @family SparkDataFrame functions
#' @export
#' @examples
@@ -3025,7 +3118,7 @@ setMethod("histogram",
# columns AND all of them have names 100 characters long (which is very unlikely),
# AND they run 1 billion histograms, the probability of collision will roughly be
# 1 in 4.4 x 10 ^ 96
- colname <- paste(base:::sample(c(letters, LETTERS),
+ colname <- paste(base::sample(c(letters, LETTERS),
size = min(max(nchar(colnames(df))) + 1, 100),
replace = TRUE),
collapse = "")
@@ -3100,6 +3193,7 @@ setMethod("histogram",
#' @family SparkDataFrame functions
#' @rdname write.jdbc
#' @name write.jdbc
+#' @aliases write.jdbc,SparkDataFrame,character,character-method
#' @export
#' @examples
#'\dontrun{
@@ -3127,6 +3221,7 @@ setMethod("write.jdbc",
#' @param seed A seed to use for random split
#'
#' @family SparkDataFrame functions
+#' @aliases randomSplit,SparkDataFrame,numeric-method
#' @rdname randomSplit
#' @name randomSplit
#' @export
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index d2ea1553c6..a14bcd91b3 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -269,6 +269,9 @@ as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
createDataFrame(data, schema, samplingRatio)
}
+#' @rdname createDataFrame
+#' @aliases as.DataFrame
+#' @export
as.DataFrame <- function(x, ...) {
dispatchFunc("as.DataFrame(data, schema = NULL, samplingRatio = 1.0)", x, ...)
}
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index e20d05dece..4746380096 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -58,6 +58,7 @@ setMethod("show", "WindowSpec",
#' @return a WindowSpec
#' @rdname partitionBy
#' @name partitionBy
+#' @aliases partitionBy,WindowSpec-method
#' @family windowspec_method
#' @export
#' @examples
@@ -89,6 +90,7 @@ setMethod("partitionBy",
#' @return a WindowSpec
#' @rdname arrange
#' @name orderBy
+#' @aliases orderBy,WindowSpec,character-method
#' @family windowspec_method
#' @export
#' @examples
@@ -105,6 +107,7 @@ setMethod("orderBy",
#' @rdname arrange
#' @name orderBy
+#' @aliases orderBy,WindowSpec,Column-method
#' @export
#' @note orderBy(WindowSpec, Column) since 2.0.0
setMethod("orderBy",
@@ -131,6 +134,7 @@ setMethod("orderBy",
#' The frame is unbounded if this is the maximum long value.
#' @return a WindowSpec
#' @rdname rowsBetween
+#' @aliases rowsBetween,WindowSpec,numeric,numeric-method
#' @name rowsBetween
#' @family windowspec_method
#' @export
@@ -162,6 +166,7 @@ setMethod("rowsBetween",
#' The frame is unbounded if this is the maximum long value.
#' @return a WindowSpec
#' @rdname rangeBetween
+#' @aliases rangeBetween,WindowSpec,numeric,numeric-method
#' @name rangeBetween
#' @family windowspec_method
#' @export
@@ -187,6 +192,7 @@ setMethod("rangeBetween",
#'
#' @rdname over
#' @name over
+#' @aliases over,Column,WindowSpec-method
#' @family colum_func
#' @export
#' @note over since 2.0.0
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 1a65912d3a..0edb9d2ae5 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -44,6 +44,9 @@ setMethod("initialize", "Column", function(.Object, jc) {
.Object
})
+#' @rdname column
+#' @name column
+#' @aliases column,jobj-method
setMethod("column",
signature(x = "jobj"),
function(x) {
@@ -52,6 +55,7 @@ setMethod("column",
#' @rdname show
#' @name show
+#' @aliases show,Column-method
#' @export
#' @note show(Column) since 1.4.0
setMethod("show", "Column",
@@ -131,8 +135,12 @@ createMethods()
#'
#' Set a new name for a column
#'
+#' @param object Column to rename
+#' @param data new name to use
+#'
#' @rdname alias
#' @name alias
+#' @aliases alias,Column-method
#' @family colum_func
#' @export
#' @note alias since 1.4.0
@@ -153,6 +161,7 @@ setMethod("alias",
#' @rdname substr
#' @name substr
#' @family colum_func
+#' @aliases substr,Column-method
#'
#' @param start starting position
#' @param stop ending position
@@ -171,8 +180,9 @@ setMethod("substr", signature(x = "Column"),
#' @rdname startsWith
#' @name startsWith
#' @family colum_func
+#' @aliases startsWith,Column-method
#'
-#' @param x vector of character string whose “starts” are considered
+#' @param x vector of character string whose "starts" are considered
#' @param prefix character vector (often of length one)
#' @note startsWith since 1.4.0
setMethod("startsWith", signature(x = "Column"),
@@ -189,8 +199,9 @@ setMethod("startsWith", signature(x = "Column"),
#' @rdname endsWith
#' @name endsWith
#' @family colum_func
+#' @aliases endsWith,Column-method
#'
-#' @param x vector of character string whose “ends” are considered
+#' @param x vector of character string whose "ends" are considered
#' @param suffix character vector (often of length one)
#' @note endsWith since 1.4.0
setMethod("endsWith", signature(x = "Column"),
@@ -206,6 +217,7 @@ setMethod("endsWith", signature(x = "Column"),
#' @rdname between
#' @name between
#' @family colum_func
+#' @aliases between,Column-method
#'
#' @param bounds lower and upper bounds
#' @note between since 1.5.0
@@ -224,10 +236,10 @@ setMethod("between", signature(x = "Column"),
#' @rdname cast
#' @name cast
#' @family colum_func
+#' @aliases cast,Column-method
#'
#' @examples \dontrun{
#' cast(df$age, "string")
-#' cast(df$name, list(type="array", elementType="byte", containsNull = TRUE))
#' }
#' @note cast since 1.4.0
setMethod("cast",
@@ -235,12 +247,8 @@ setMethod("cast",
function(x, dataType) {
if (is.character(dataType)) {
column(callJMethod(x@jc, "cast", dataType))
- } else if (is.list(dataType)) {
- json <- tojson(dataType)
- jdataType <- callJStatic("org.apache.spark.sql.types.DataType", "fromJson", json)
- column(callJMethod(x@jc, "cast", jdataType))
} else {
- stop("dataType should be character or list")
+ stop("dataType should be character")
}
})
@@ -248,7 +256,7 @@ setMethod("cast",
#'
#' @rdname match
#' @name %in%
-#' @aliases %in%
+#' @aliases %in%,Column-method
#' @return a matched values as a result of comparing with given values.
#' @export
#' @examples
@@ -272,6 +280,7 @@ setMethod("%in%",
#' @rdname otherwise
#' @name otherwise
#' @family colum_func
+#' @aliases otherwise,Column-method
#' @export
#' @note otherwise since 1.5.0
setMethod("otherwise",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 52d46f9d76..573c915a5c 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -27,6 +27,7 @@ NULL
#' @rdname lit
#' @name lit
#' @export
+#' @aliases lit,ANY-method
#' @examples
#' \dontrun{
#' lit(df$name)
@@ -46,11 +47,14 @@ setMethod("lit", signature("ANY"),
#'
#' Computes the absolute value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname abs
#' @name abs
#' @family normal_funcs
#' @export
#' @examples \dontrun{abs(df$c)}
+#' @aliases abs,Column-method
#' @note abs since 1.5.0
setMethod("abs",
signature(x = "Column"),
@@ -64,11 +68,14 @@ setMethod("abs",
#' Computes the cosine inverse of the given value; the returned angle is in the range
#' 0.0 through pi.
#'
+#' @param x Column to compute on.
+#'
#' @rdname acos
#' @name acos
#' @family math_funcs
#' @export
#' @examples \dontrun{acos(df$c)}
+#' @aliases acos,Column-method
#' @note acos since 1.5.0
setMethod("acos",
signature(x = "Column"),
@@ -82,10 +89,13 @@ setMethod("acos",
#' Returns the approximate number of distinct items in a group. This is a column
#' aggregate function.
#'
+#' @param x Column to compute on.
+#'
#' @rdname approxCountDistinct
#' @name approxCountDistinct
#' @return the approximate number of distinct items in a group.
#' @export
+#' @aliases approxCountDistinct,Column-method
#' @examples \dontrun{approxCountDistinct(df$c)}
#' @note approxCountDistinct(Column) since 1.4.0
setMethod("approxCountDistinct",
@@ -100,10 +110,13 @@ setMethod("approxCountDistinct",
#' Computes the numeric value of the first character of the string column, and returns the
#' result as a int column.
#'
+#' @param x Column to compute on.
+#'
#' @rdname ascii
#' @name ascii
#' @family string_funcs
#' @export
+#' @aliases ascii,Column-method
#' @examples \dontrun{\dontrun{ascii(df$c)}}
#' @note ascii since 1.5.0
setMethod("ascii",
@@ -118,10 +131,13 @@ setMethod("ascii",
#' Computes the sine inverse of the given value; the returned angle is in the range
#' -pi/2 through pi/2.
#'
+#' @param x Column to compute on.
+#'
#' @rdname asin
#' @name asin
#' @family math_funcs
#' @export
+#' @aliases asin,Column-method
#' @examples \dontrun{asin(df$c)}
#' @note asin since 1.5.0
setMethod("asin",
@@ -135,10 +151,13 @@ setMethod("asin",
#'
#' Computes the tangent inverse of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname atan
#' @name atan
#' @family math_funcs
#' @export
+#' @aliases atan,Column-method
#' @examples \dontrun{atan(df$c)}
#' @note atan since 1.5.0
setMethod("atan",
@@ -152,10 +171,13 @@ setMethod("atan",
#'
#' Aggregate function: returns the average of the values in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname avg
#' @name avg
#' @family agg_funcs
#' @export
+#' @aliases avg,Column-method
#' @examples \dontrun{avg(df$c)}
#' @note avg since 1.4.0
setMethod("avg",
@@ -170,10 +192,13 @@ setMethod("avg",
#' Computes the BASE64 encoding of a binary column and returns it as a string column.
#' This is the reverse of unbase64.
#'
+#' @param x Column to compute on.
+#'
#' @rdname base64
#' @name base64
#' @family string_funcs
#' @export
+#' @aliases base64,Column-method
#' @examples \dontrun{base64(df$c)}
#' @note base64 since 1.5.0
setMethod("base64",
@@ -188,10 +213,13 @@ setMethod("base64",
#' An expression that returns the string representation of the binary value of the given long
#' column. For example, bin("12") returns "1100".
#'
+#' @param x Column to compute on.
+#'
#' @rdname bin
#' @name bin
#' @family math_funcs
#' @export
+#' @aliases bin,Column-method
#' @examples \dontrun{bin(df$c)}
#' @note bin since 1.5.0
setMethod("bin",
@@ -205,10 +233,13 @@ setMethod("bin",
#'
#' Computes bitwise NOT.
#'
+#' @param x Column to compute on.
+#'
#' @rdname bitwiseNOT
#' @name bitwiseNOT
#' @family normal_funcs
#' @export
+#' @aliases bitwiseNOT,Column-method
#' @examples \dontrun{bitwiseNOT(df$c)}
#' @note bitwiseNOT since 1.5.0
setMethod("bitwiseNOT",
@@ -222,10 +253,13 @@ setMethod("bitwiseNOT",
#'
#' Computes the cube-root of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname cbrt
#' @name cbrt
#' @family math_funcs
#' @export
+#' @aliases cbrt,Column-method
#' @examples \dontrun{cbrt(df$c)}
#' @note cbrt since 1.4.0
setMethod("cbrt",
@@ -239,10 +273,13 @@ setMethod("cbrt",
#'
#' Computes the ceiling of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname ceil
#' @name ceil
#' @family math_funcs
#' @export
+#' @aliases ceil,Column-method
#' @examples \dontrun{ceil(df$c)}
#' @note ceil since 1.5.0
setMethod("ceil",
@@ -263,11 +300,14 @@ col <- function(x) {
#' Returns a Column based on the given column name
#'
#' Returns a Column based on the given column name.
+#
+#' @param x Character column name.
#'
#' @rdname column
#' @name column
#' @family normal_funcs
#' @export
+#' @aliases column,character-method
#' @examples \dontrun{column(df)}
#' @note column since 1.6.0
setMethod("column",
@@ -279,10 +319,13 @@ setMethod("column",
#'
#' Computes the Pearson Correlation Coefficient for two Columns.
#'
+#' @param x Column to compute on.
+#'
#' @rdname corr
#' @name corr
#' @family math_funcs
#' @export
+#' @aliases corr,Column-method
#' @examples \dontrun{corr(df$c, df$d)}
#' @note corr since 1.6.0
setMethod("corr", signature(x = "Column"),
@@ -296,10 +339,13 @@ setMethod("corr", signature(x = "Column"),
#'
#' Compute the sample covariance between two expressions.
#'
+#' @param x Column to compute on.
+#'
#' @rdname cov
#' @name cov
#' @family math_funcs
#' @export
+#' @aliases cov,characterOrColumn-method
#' @examples
#' \dontrun{
#' cov(df$c, df$d)
@@ -315,7 +361,11 @@ setMethod("cov", signature(x = "characterOrColumn"),
})
#' @rdname cov
+#'
+#' @param col1 First column to compute cov_samp.
+#' @param col2 Second column to compute cov_samp.
#' @name covar_samp
+#' @aliases covar_samp,characterOrColumn,characterOrColumn-method
#' @note covar_samp since 2.0.0
setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
function(col1, col2) {
@@ -332,10 +382,14 @@ setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterO
#'
#' Compute the population covariance between two expressions.
#'
+#' @param col1 First column to compute cov_pop.
+#' @param col2 Second column to compute cov_pop.
+#'
#' @rdname covar_pop
#' @name covar_pop
#' @family math_funcs
#' @export
+#' @aliases covar_pop,characterOrColumn,characterOrColumn-method
#' @examples
#' \dontrun{
#' covar_pop(df$c, df$d)
@@ -357,9 +411,12 @@ setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOr
#'
#' Computes the cosine of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname cos
#' @name cos
#' @family math_funcs
+#' @aliases cos,Column-method
#' @export
#' @examples \dontrun{cos(df$c)}
#' @note cos since 1.5.0
@@ -374,9 +431,12 @@ setMethod("cos",
#'
#' Computes the hyperbolic cosine of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname cosh
#' @name cosh
#' @family math_funcs
+#' @aliases cosh,Column-method
#' @export
#' @examples \dontrun{cosh(df$c)}
#' @note cosh since 1.5.0
@@ -391,9 +451,12 @@ setMethod("cosh",
#'
#' Returns the number of items in a group. This is a column aggregate function.
#'
-#' @rdname count
+#' @param x Column to compute on.
+#'
+#' @rdname nrow
#' @name count
#' @family agg_funcs
+#' @aliases count,Column-method
#' @export
#' @examples \dontrun{count(df$c)}
#' @note count since 1.4.0
@@ -409,9 +472,12 @@ setMethod("count",
#' Calculates the cyclic redundancy check value (CRC32) of a binary column and
#' returns the value as a bigint.
#'
+#' @param x Column to compute on.
+#'
#' @rdname crc32
#' @name crc32
#' @family misc_funcs
+#' @aliases crc32,Column-method
#' @export
#' @examples \dontrun{crc32(df$c)}
#' @note crc32 since 1.5.0
@@ -426,9 +492,12 @@ setMethod("crc32",
#'
#' Calculates the hash code of given columns, and returns the result as a int column.
#'
+#' @param x Column to compute on.
+#'
#' @rdname hash
#' @name hash
#' @family misc_funcs
+#' @aliases hash,Column-method
#' @export
#' @examples \dontrun{hash(df$c)}
#' @note hash since 2.0.0
@@ -447,9 +516,12 @@ setMethod("hash",
#'
#' Extracts the day of the month as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname dayofmonth
#' @name dayofmonth
#' @family datetime_funcs
+#' @aliases dayofmonth,Column-method
#' @export
#' @examples \dontrun{dayofmonth(df$c)}
#' @note dayofmonth since 1.5.0
@@ -464,9 +536,12 @@ setMethod("dayofmonth",
#'
#' Extracts the day of the year as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname dayofyear
#' @name dayofyear
#' @family datetime_funcs
+#' @aliases dayofyear,Column-method
#' @export
#' @examples \dontrun{dayofyear(df$c)}
#' @note dayofyear since 1.5.0
@@ -482,9 +557,13 @@ setMethod("dayofyear",
#' Computes the first argument into a string from a binary using the provided character set
#' (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
#'
+#' @param x Column to compute on.
+#' @param charset Character set to use
+#'
#' @rdname decode
#' @name decode
#' @family string_funcs
+#' @aliases decode,Column,character-method
#' @export
#' @examples \dontrun{decode(df$c, "UTF-8")}
#' @note decode since 1.6.0
@@ -500,9 +579,13 @@ setMethod("decode",
#' Computes the first argument into a binary from a string using the provided character set
#' (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
#'
+#' @param x Column to compute on.
+#' @param charset Character set to use
+#'
#' @rdname encode
#' @name encode
#' @family string_funcs
+#' @aliases encode,Column,character-method
#' @export
#' @examples \dontrun{encode(df$c, "UTF-8")}
#' @note encode since 1.6.0
@@ -517,9 +600,12 @@ setMethod("encode",
#'
#' Computes the exponential of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname exp
#' @name exp
#' @family math_funcs
+#' @aliases exp,Column-method
#' @export
#' @examples \dontrun{exp(df$c)}
#' @note exp since 1.5.0
@@ -534,8 +620,11 @@ setMethod("exp",
#'
#' Computes the exponential of the given value minus one.
#'
+#' @param x Column to compute on.
+#'
#' @rdname expm1
#' @name expm1
+#' @aliases expm1,Column-method
#' @family math_funcs
#' @export
#' @examples \dontrun{expm1(df$c)}
@@ -551,8 +640,11 @@ setMethod("expm1",
#'
#' Computes the factorial of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname factorial
#' @name factorial
+#' @aliases factorial,Column-method
#' @family math_funcs
#' @export
#' @examples \dontrun{factorial(df$c)}
@@ -571,8 +663,11 @@ setMethod("factorial",
#' The function by default returns the first values it sees. It will return the first non-missing
#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
#'
+#' @param x Column to compute on.
+#'
#' @rdname first
#' @name first
+#' @aliases first,characterOrColumn-method
#' @family agg_funcs
#' @export
#' @examples
@@ -597,8 +692,11 @@ setMethod("first",
#'
#' Computes the floor of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname floor
#' @name floor
+#' @aliases floor,Column-method
#' @family math_funcs
#' @export
#' @examples \dontrun{floor(df$c)}
@@ -614,9 +712,12 @@ setMethod("floor",
#'
#' Computes hex value of the given column.
#'
+#' @param x Column to compute on.
+#'
#' @rdname hex
#' @name hex
#' @family math_funcs
+#' @aliases hex,Column-method
#' @export
#' @examples \dontrun{hex(df$c)}
#' @note hex since 1.5.0
@@ -631,8 +732,11 @@ setMethod("hex",
#'
#' Extracts the hours as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname hour
#' @name hour
+#' @aliases hour,Column-method
#' @family datetime_funcs
#' @export
#' @examples \dontrun{hour(df$c)}
@@ -651,9 +755,12 @@ setMethod("hour",
#'
#' For example, "hello world" will become "Hello World".
#'
+#' @param x Column to compute on.
+#'
#' @rdname initcap
#' @name initcap
#' @family string_funcs
+#' @aliases initcap,Column-method
#' @export
#' @examples \dontrun{initcap(df$c)}
#' @note initcap since 1.5.0
@@ -668,9 +775,12 @@ setMethod("initcap",
#'
#' Return true if the column is NaN, alias for \link{isnan}
#'
+#' @param x Column to compute on.
+#'
#' @rdname is.nan
#' @name is.nan
#' @family normal_funcs
+#' @aliases is.nan,Column-method
#' @export
#' @examples
#' \dontrun{
@@ -686,6 +796,7 @@ setMethod("is.nan",
#' @rdname is.nan
#' @name isnan
+#' @aliases isnan,Column-method
#' @note isnan since 2.0.0
setMethod("isnan",
signature(x = "Column"),
@@ -698,8 +809,11 @@ setMethod("isnan",
#'
#' Aggregate function: returns the kurtosis of the values in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname kurtosis
#' @name kurtosis
+#' @aliases kurtosis,Column-method
#' @family agg_funcs
#' @export
#' @examples \dontrun{kurtosis(df$c)}
@@ -718,8 +832,11 @@ setMethod("kurtosis",
#' The function by default returns the last values it sees. It will return the last non-missing
#' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
#'
+#' @param x Column to compute on.
+#'
#' @rdname last
#' @name last
+#' @aliases last,characterOrColumn-method
#' @family agg_funcs
#' @export
#' @examples
@@ -746,8 +863,11 @@ setMethod("last",
#' For example, input "2015-07-27" returns "2015-07-31" since July 31 is the last day of the
#' month in July 2015.
#'
+#' @param x Column to compute on.
+#'
#' @rdname last_day
#' @name last_day
+#' @aliases last_day,Column-method
#' @family datetime_funcs
#' @export
#' @examples \dontrun{last_day(df$c)}
@@ -763,8 +883,11 @@ setMethod("last_day",
#'
#' Computes the length of a given string or binary column.
#'
+#' @param x Column to compute on.
+#'
#' @rdname length
#' @name length
+#' @aliases length,Column-method
#' @family string_funcs
#' @export
#' @examples \dontrun{length(df$c)}
@@ -780,8 +903,11 @@ setMethod("length",
#'
#' Computes the natural logarithm of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname log
#' @name log
+#' @aliases log,Column-method
#' @family math_funcs
#' @export
#' @examples \dontrun{log(df$c)}
@@ -797,9 +923,12 @@ setMethod("log",
#'
#' Computes the logarithm of the given value in base 10.
#'
+#' @param x Column to compute on.
+#'
#' @rdname log10
#' @name log10
#' @family math_funcs
+#' @aliases log10,Column-method
#' @export
#' @examples \dontrun{log10(df$c)}
#' @note log10 since 1.5.0
@@ -814,9 +943,12 @@ setMethod("log10",
#'
#' Computes the natural logarithm of the given value plus one.
#'
+#' @param x Column to compute on.
+#'
#' @rdname log1p
#' @name log1p
#' @family math_funcs
+#' @aliases log1p,Column-method
#' @export
#' @examples \dontrun{log1p(df$c)}
#' @note log1p since 1.5.0
@@ -831,9 +963,12 @@ setMethod("log1p",
#'
#' Computes the logarithm of the given column in base 2.
#'
+#' @param x Column to compute on.
+#'
#' @rdname log2
#' @name log2
#' @family math_funcs
+#' @aliases log2,Column-method
#' @export
#' @examples \dontrun{log2(df$c)}
#' @note log2 since 1.5.0
@@ -848,9 +983,12 @@ setMethod("log2",
#'
#' Converts a string column to lower case.
#'
+#' @param x Column to compute on.
+#'
#' @rdname lower
#' @name lower
#' @family string_funcs
+#' @aliases lower,Column-method
#' @export
#' @examples \dontrun{lower(df$c)}
#' @note lower since 1.4.0
@@ -865,9 +1003,12 @@ setMethod("lower",
#'
#' Trim the spaces from left end for the specified string value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname ltrim
#' @name ltrim
#' @family string_funcs
+#' @aliases ltrim,Column-method
#' @export
#' @examples \dontrun{ltrim(df$c)}
#' @note ltrim since 1.5.0
@@ -882,9 +1023,12 @@ setMethod("ltrim",
#'
#' Aggregate function: returns the maximum value of the expression in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname max
#' @name max
#' @family agg_funcs
+#' @aliases max,Column-method
#' @export
#' @examples \dontrun{max(df$c)}
#' @note max since 1.5.0
@@ -900,9 +1044,12 @@ setMethod("max",
#' Calculates the MD5 digest of a binary column and returns the value
#' as a 32 character hex string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname md5
#' @name md5
#' @family misc_funcs
+#' @aliases md5,Column-method
#' @export
#' @examples \dontrun{md5(df$c)}
#' @note md5 since 1.5.0
@@ -918,9 +1065,12 @@ setMethod("md5",
#' Aggregate function: returns the average of the values in a group.
#' Alias for avg.
#'
+#' @param x Column to compute on.
+#'
#' @rdname mean
#' @name mean
#' @family agg_funcs
+#' @aliases mean,Column-method
#' @export
#' @examples \dontrun{mean(df$c)}
#' @note mean since 1.5.0
@@ -935,8 +1085,11 @@ setMethod("mean",
#'
#' Aggregate function: returns the minimum value of the expression in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname min
#' @name min
+#' @aliases min,Column-method
#' @family agg_funcs
#' @export
#' @examples \dontrun{min(df$c)}
@@ -952,8 +1105,11 @@ setMethod("min",
#'
#' Extracts the minutes as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname minute
#' @name minute
+#' @aliases minute,Column-method
#' @family datetime_funcs
#' @export
#' @examples \dontrun{minute(df$c)}
@@ -981,6 +1137,7 @@ setMethod("minute",
#' This is equivalent to the MONOTONICALLY_INCREASING_ID function in SQL.
#'
#' @rdname monotonically_increasing_id
+#' @aliases monotonically_increasing_id,missing-method
#' @name monotonically_increasing_id
#' @family misc_funcs
#' @export
@@ -996,8 +1153,11 @@ setMethod("monotonically_increasing_id",
#'
#' Extracts the month as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname month
#' @name month
+#' @aliases month,Column-method
#' @family datetime_funcs
#' @export
#' @examples \dontrun{month(df$c)}
@@ -1013,9 +1173,12 @@ setMethod("month",
#'
#' Unary minus, i.e. negate the expression.
#'
+#' @param x Column to compute on.
+#'
#' @rdname negate
#' @name negate
#' @family normal_funcs
+#' @aliases negate,Column-method
#' @export
#' @examples \dontrun{negate(df$c)}
#' @note negate since 1.5.0
@@ -1030,9 +1193,12 @@ setMethod("negate",
#'
#' Extracts the quarter as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname quarter
#' @name quarter
#' @family datetime_funcs
+#' @aliases quarter,Column-method
#' @export
#' @examples \dontrun{quarter(df$c)}
#' @note quarter since 1.5.0
@@ -1047,9 +1213,12 @@ setMethod("quarter",
#'
#' Reverses the string column and returns it as a new string column.
#'
+#' @param x Column to compute on.
+#'
#' @rdname reverse
#' @name reverse
#' @family string_funcs
+#' @aliases reverse,Column-method
#' @export
#' @examples \dontrun{reverse(df$c)}
#' @note reverse since 1.5.0
@@ -1065,9 +1234,12 @@ setMethod("reverse",
#' Returns the double value that is closest in value to the argument and
#' is equal to a mathematical integer.
#'
+#' @param x Column to compute on.
+#'
#' @rdname rint
#' @name rint
#' @family math_funcs
+#' @aliases rint,Column-method
#' @export
#' @examples \dontrun{rint(df$c)}
#' @note rint since 1.5.0
@@ -1082,9 +1254,12 @@ setMethod("rint",
#'
#' Returns the value of the column `e` rounded to 0 decimal places using HALF_UP rounding mode.
#'
+#' @param x Column to compute on.
+#'
#' @rdname round
#' @name round
#' @family math_funcs
+#' @aliases round,Column-method
#' @export
#' @examples \dontrun{round(df$c)}
#' @note round since 1.5.0
@@ -1102,9 +1277,12 @@ setMethod("round",
#' Also known as Gaussian rounding or bankers' rounding that rounds to the nearest even number.
#' bround(2.5, 0) = 2, bround(3.5, 0) = 4.
#'
+#' @param x Column to compute on.
+#'
#' @rdname bround
#' @name bround
#' @family math_funcs
+#' @aliases bround,Column-method
#' @export
#' @examples \dontrun{bround(df$c, 0)}
#' @note bround since 2.0.0
@@ -1120,9 +1298,12 @@ setMethod("bround",
#'
#' Trim the spaces from right end for the specified string value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname rtrim
#' @name rtrim
#' @family string_funcs
+#' @aliases rtrim,Column-method
#' @export
#' @examples \dontrun{rtrim(df$c)}
#' @note rtrim since 1.5.0
@@ -1137,9 +1318,12 @@ setMethod("rtrim",
#'
#' Aggregate function: alias for \link{stddev_samp}
#'
+#' @param x Column to compute on.
+#'
#' @rdname sd
#' @name sd
#' @family agg_funcs
+#' @aliases sd,Column-method
#' @seealso \link{stddev_pop}, \link{stddev_samp}
#' @export
#' @examples
@@ -1160,9 +1344,12 @@ setMethod("sd",
#'
#' Extracts the seconds as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname second
#' @name second
#' @family datetime_funcs
+#' @aliases second,Column-method
#' @export
#' @examples \dontrun{second(df$c)}
#' @note second since 1.5.0
@@ -1178,9 +1365,12 @@ setMethod("second",
#' Calculates the SHA-1 digest of a binary column and returns the value
#' as a 40 character hex string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname sha1
#' @name sha1
#' @family misc_funcs
+#' @aliases sha1,Column-method
#' @export
#' @examples \dontrun{sha1(df$c)}
#' @note sha1 since 1.5.0
@@ -1195,8 +1385,11 @@ setMethod("sha1",
#'
#' Computes the signum of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname sign
#' @name signum
+#' @aliases signum,Column-method
#' @family math_funcs
#' @export
#' @examples \dontrun{signum(df$c)}
@@ -1212,9 +1405,12 @@ setMethod("signum",
#'
#' Computes the sine of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname sin
#' @name sin
#' @family math_funcs
+#' @aliases sin,Column-method
#' @export
#' @examples \dontrun{sin(df$c)}
#' @note sin since 1.5.0
@@ -1229,9 +1425,12 @@ setMethod("sin",
#'
#' Computes the hyperbolic sine of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname sinh
#' @name sinh
#' @family math_funcs
+#' @aliases sinh,Column-method
#' @export
#' @examples \dontrun{sinh(df$c)}
#' @note sinh since 1.5.0
@@ -1246,9 +1445,12 @@ setMethod("sinh",
#'
#' Aggregate function: returns the skewness of the values in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname skewness
#' @name skewness
#' @family agg_funcs
+#' @aliases skewness,Column-method
#' @export
#' @examples \dontrun{skewness(df$c)}
#' @note skewness since 1.6.0
@@ -1263,9 +1465,12 @@ setMethod("skewness",
#'
#' Return the soundex code for the specified expression.
#'
+#' @param x Column to compute on.
+#'
#' @rdname soundex
#' @name soundex
#' @family string_funcs
+#' @aliases soundex,Column-method
#' @export
#' @examples \dontrun{soundex(df$c)}
#' @note soundex since 1.5.0
@@ -1286,6 +1491,7 @@ setMethod("soundex",
#'
#' @rdname spark_partition_id
#' @name spark_partition_id
+#' @aliases spark_partition_id,missing-method
#' @export
#' @examples
#' \dontrun{select(df, spark_partition_id())}
@@ -1298,6 +1504,7 @@ setMethod("spark_partition_id",
})
#' @rdname sd
+#' @aliases stddev,Column-method
#' @name stddev
#' @note stddev since 1.6.0
setMethod("stddev",
@@ -1311,9 +1518,12 @@ setMethod("stddev",
#'
#' Aggregate function: returns the population standard deviation of the expression in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname stddev_pop
#' @name stddev_pop
#' @family agg_funcs
+#' @aliases stddev_pop,Column-method
#' @seealso \link{sd}, \link{stddev_samp}
#' @export
#' @examples \dontrun{stddev_pop(df$c)}
@@ -1329,9 +1539,12 @@ setMethod("stddev_pop",
#'
#' Aggregate function: returns the unbiased sample standard deviation of the expression in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname stddev_samp
#' @name stddev_samp
#' @family agg_funcs
+#' @aliases stddev_samp,Column-method
#' @seealso \link{stddev_pop}, \link{sd}
#' @export
#' @examples \dontrun{stddev_samp(df$c)}
@@ -1347,9 +1560,12 @@ setMethod("stddev_samp",
#'
#' Creates a new struct column that composes multiple input columns.
#'
+#' @param x Column to compute on.
+#'
#' @rdname struct
#' @name struct
#' @family normal_funcs
+#' @aliases struct,characterOrColumn-method
#' @export
#' @examples
#' \dontrun{
@@ -1373,9 +1589,12 @@ setMethod("struct",
#'
#' Computes the square root of the specified float value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname sqrt
#' @name sqrt
#' @family math_funcs
+#' @aliases sqrt,Column-method
#' @export
#' @examples \dontrun{sqrt(df$c)}
#' @note sqrt since 1.5.0
@@ -1390,9 +1609,12 @@ setMethod("sqrt",
#'
#' Aggregate function: returns the sum of all values in the expression.
#'
+#' @param x Column to compute on.
+#'
#' @rdname sum
#' @name sum
#' @family agg_funcs
+#' @aliases sum,Column-method
#' @export
#' @examples \dontrun{sum(df$c)}
#' @note sum since 1.5.0
@@ -1407,9 +1629,12 @@ setMethod("sum",
#'
#' Aggregate function: returns the sum of distinct values in the expression.
#'
+#' @param x Column to compute on.
+#'
#' @rdname sumDistinct
#' @name sumDistinct
#' @family agg_funcs
+#' @aliases sumDistinct,Column-method
#' @export
#' @examples \dontrun{sumDistinct(df$c)}
#' @note sumDistinct since 1.4.0
@@ -1424,9 +1649,12 @@ setMethod("sumDistinct",
#'
#' Computes the tangent of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname tan
#' @name tan
#' @family math_funcs
+#' @aliases tan,Column-method
#' @export
#' @examples \dontrun{tan(df$c)}
#' @note tan since 1.5.0
@@ -1441,9 +1669,12 @@ setMethod("tan",
#'
#' Computes the hyperbolic tangent of the given value.
#'
+#' @param x Column to compute on.
+#'
#' @rdname tanh
#' @name tanh
#' @family math_funcs
+#' @aliases tanh,Column-method
#' @export
#' @examples \dontrun{tanh(df$c)}
#' @note tanh since 1.5.0
@@ -1458,9 +1689,12 @@ setMethod("tanh",
#'
#' Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
#'
+#' @param x Column to compute on.
+#'
#' @rdname toDegrees
#' @name toDegrees
#' @family math_funcs
+#' @aliases toDegrees,Column-method
#' @export
#' @examples \dontrun{toDegrees(df$c)}
#' @note toDegrees since 1.4.0
@@ -1475,9 +1709,12 @@ setMethod("toDegrees",
#'
#' Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
#'
+#' @param x Column to compute on.
+#'
#' @rdname toRadians
#' @name toRadians
#' @family math_funcs
+#' @aliases toRadians,Column-method
#' @export
#' @examples \dontrun{toRadians(df$c)}
#' @note toRadians since 1.4.0
@@ -1492,9 +1729,12 @@ setMethod("toRadians",
#'
#' Converts the column into DateType.
#'
+#' @param x Column to compute on.
+#'
#' @rdname to_date
#' @name to_date
#' @family datetime_funcs
+#' @aliases to_date,Column-method
#' @export
#' @examples \dontrun{to_date(df$c)}
#' @note to_date since 1.5.0
@@ -1509,9 +1749,12 @@ setMethod("to_date",
#'
#' Trim the spaces from both ends for the specified string column.
#'
+#' @param x Column to compute on.
+#'
#' @rdname trim
#' @name trim
#' @family string_funcs
+#' @aliases trim,Column-method
#' @export
#' @examples \dontrun{trim(df$c)}
#' @note trim since 1.5.0
@@ -1527,9 +1770,12 @@ setMethod("trim",
#' Decodes a BASE64 encoded string column and returns it as a binary column.
#' This is the reverse of base64.
#'
+#' @param x Column to compute on.
+#'
#' @rdname unbase64
#' @name unbase64
#' @family string_funcs
+#' @aliases unbase64,Column-method
#' @export
#' @examples \dontrun{unbase64(df$c)}
#' @note unbase64 since 1.5.0
@@ -1545,9 +1791,12 @@ setMethod("unbase64",
#' Inverse of hex. Interprets each pair of characters as a hexadecimal number
#' and converts to the byte representation of number.
#'
+#' @param x Column to compute on.
+#'
#' @rdname unhex
#' @name unhex
#' @family math_funcs
+#' @aliases unhex,Column-method
#' @export
#' @examples \dontrun{unhex(df$c)}
#' @note unhex since 1.5.0
@@ -1562,9 +1811,12 @@ setMethod("unhex",
#'
#' Converts a string column to upper case.
#'
+#' @param x Column to compute on.
+#'
#' @rdname upper
#' @name upper
#' @family string_funcs
+#' @aliases upper,Column-method
#' @export
#' @examples \dontrun{upper(df$c)}
#' @note upper since 1.4.0
@@ -1579,9 +1831,12 @@ setMethod("upper",
#'
#' Aggregate function: alias for \link{var_samp}.
#'
+#' @param x Column to compute on.
+#'
#' @rdname var
#' @name var
#' @family agg_funcs
+#' @aliases var,Column-method
#' @seealso \link{var_pop}, \link{var_samp}
#' @export
#' @examples
@@ -1599,6 +1854,7 @@ setMethod("var",
})
#' @rdname var
+#' @aliases variance,Column-method
#' @name variance
#' @note variance since 1.6.0
setMethod("variance",
@@ -1612,9 +1868,12 @@ setMethod("variance",
#'
#' Aggregate function: returns the population variance of the values in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname var_pop
#' @name var_pop
#' @family agg_funcs
+#' @aliases var_pop,Column-method
#' @seealso \link{var}, \link{var_samp}
#' @export
#' @examples \dontrun{var_pop(df$c)}
@@ -1630,8 +1889,11 @@ setMethod("var_pop",
#'
#' Aggregate function: returns the unbiased variance of the values in a group.
#'
+#' @param x Column to compute on.
+#'
#' @rdname var_samp
#' @name var_samp
+#' @aliases var_samp,Column-method
#' @family agg_funcs
#' @seealso \link{var_pop}, \link{var}
#' @export
@@ -1648,8 +1910,11 @@ setMethod("var_samp",
#'
#' Extracts the week number as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname weekofyear
#' @name weekofyear
+#' @aliases weekofyear,Column-method
#' @family datetime_funcs
#' @export
#' @examples \dontrun{weekofyear(df$c)}
@@ -1665,9 +1930,12 @@ setMethod("weekofyear",
#'
#' Extracts the year as an integer from a given date/timestamp/string.
#'
+#' @param x Column to compute on.
+#'
#' @rdname year
#' @name year
#' @family datetime_funcs
+#' @aliases year,Column-method
#' @export
#' @examples \dontrun{year(df$c)}
#' @note year since 1.5.0
@@ -1682,10 +1950,14 @@ setMethod("year",
#'
#' Returns the angle theta from the conversion of rectangular coordinates (x, y) to
#' polar coordinates (r, theta).
+#
+#' @param x Column to compute on.
+#' @param y Column to compute on.
#'
#' @rdname atan2
#' @name atan2
#' @family math_funcs
+#' @aliases atan2,Column-method
#' @export
#' @examples \dontrun{atan2(df$c, x)}
#' @note atan2 since 1.5.0
@@ -1702,8 +1974,12 @@ setMethod("atan2", signature(y = "Column"),
#'
#' Returns the number of days from `start` to `end`.
#'
+#' @param x start Column to use.
+#' @param y end Column to use.
+#'
#' @rdname datediff
#' @name datediff
+#' @aliases datediff,Column-method
#' @family datetime_funcs
#' @export
#' @examples \dontrun{datediff(df$c, x)}
@@ -1720,10 +1996,14 @@ setMethod("datediff", signature(y = "Column"),
#' hypot
#'
#' Computes "sqrt(a^2 + b^2)" without intermediate overflow or underflow.
+#
+#' @param x Column to compute on.
+#' @param y Column to compute on.
#'
#' @rdname hypot
#' @name hypot
#' @family math_funcs
+#' @aliases hypot,Column-method
#' @export
#' @examples \dontrun{hypot(df$c, x)}
#' @note hypot since 1.4.0
@@ -1740,9 +2020,13 @@ setMethod("hypot", signature(y = "Column"),
#'
#' Computes the Levenshtein distance of the two given string columns.
#'
+#' @param x Column to compute on.
+#' @param y Column to compute on.
+#'
#' @rdname levenshtein
#' @name levenshtein
#' @family string_funcs
+#' @aliases levenshtein,Column-method
#' @export
#' @examples \dontrun{levenshtein(df$c, x)}
#' @note levenshtein since 1.5.0
@@ -1759,9 +2043,13 @@ setMethod("levenshtein", signature(y = "Column"),
#'
#' Returns number of months between dates `date1` and `date2`.
#'
+#' @param x start Column to use.
+#' @param y end Column to use.
+#'
#' @rdname months_between
#' @name months_between
#' @family datetime_funcs
+#' @aliases months_between,Column-method
#' @export
#' @examples \dontrun{months_between(df$c, x)}
#' @note months_between since 1.5.0
@@ -1779,9 +2067,13 @@ setMethod("months_between", signature(y = "Column"),
#' Returns col1 if it is not NaN, or col2 if col1 is NaN.
#' Both inputs should be floating point columns (DoubleType or FloatType).
#'
+#' @param x first Column.
+#' @param y second Column.
+#'
#' @rdname nanvl
#' @name nanvl
#' @family normal_funcs
+#' @aliases nanvl,Column-method
#' @export
#' @examples \dontrun{nanvl(df$c, x)}
#' @note nanvl since 1.5.0
@@ -1798,10 +2090,14 @@ setMethod("nanvl", signature(y = "Column"),
#'
#' Returns the positive value of dividend mod divisor.
#'
+#' @param x divisor Column.
+#' @param y dividend Column.
+#'
#' @rdname pmod
#' @name pmod
#' @docType methods
#' @family math_funcs
+#' @aliases pmod,Column-method
#' @export
#' @examples \dontrun{pmod(df$c, x)}
#' @note pmod since 1.5.0
@@ -1817,6 +2113,10 @@ setMethod("pmod", signature(y = "Column"),
#' @rdname approxCountDistinct
#' @name approxCountDistinct
+#'
+#' @param rsd maximum estimation error allowed (default = 0.05)
+#'
+#' @aliases approxCountDistinct,Column-method
#' @export
#' @examples \dontrun{approxCountDistinct(df$c, 0.02)}
#' @note approxCountDistinct(Column, numeric) since 1.4.0
@@ -1829,9 +2129,13 @@ setMethod("approxCountDistinct",
#' Count Distinct
#'
+#' @param x Column to compute on
+#' @param ... other columns
+#'
#' @family agg_funcs
#' @rdname countDistinct
#' @name countDistinct
+#' @aliases countDistinct,Column-method
#' @return the number of distinct items in a group.
#' @export
#' @examples \dontrun{countDistinct(df$c)}
@@ -1852,10 +2156,14 @@ setMethod("countDistinct",
#' concat
#'
#' Concatenates multiple input string columns together into a single string column.
+#'
+#' @param x Column to compute on
+#' @param ... other columns
#'
#' @family string_funcs
#' @rdname concat
#' @name concat
+#' @aliases concat,Column-method
#' @export
#' @examples \dontrun{concat(df$strings, df$strings2)}
#' @note concat since 1.5.0
@@ -1875,9 +2183,13 @@ setMethod("concat",
#' Returns the greatest value of the list of column names, skipping null values.
#' This function takes at least 2 parameters. It will return null if all parameters are null.
#'
+#' @param x Column to compute on
+#' @param ... other columns
+#'
#' @family normal_funcs
#' @rdname greatest
#' @name greatest
+#' @aliases greatest,Column-method
#' @export
#' @examples \dontrun{greatest(df$c, df$d)}
#' @note greatest since 1.5.0
@@ -1898,8 +2210,12 @@ setMethod("greatest",
#' Returns the least value of the list of column names, skipping null values.
#' This function takes at least 2 parameters. It will return null if all parameters are null.
#'
+#' @param x Column to compute on
+#' @param ... other columns
+#'
#' @family normal_funcs
#' @rdname least
+#' @aliases least,Column-method
#' @name least
#' @export
#' @examples \dontrun{least(df$c, df$d)}
@@ -1917,7 +2233,9 @@ setMethod("least",
})
#' @rdname ceil
+#'
#' @name ceiling
+#' @aliases ceiling,Column-method
#' @export
#' @examples \dontrun{ceiling(df$c)}
#' @note ceiling since 1.5.0
@@ -1928,7 +2246,10 @@ setMethod("ceiling",
})
#' @rdname sign
+#' @param x Column to compute on
+#'
#' @name sign
+#' @aliases sign,Column-method
#' @export
#' @examples \dontrun{sign(df$c)}
#' @note sign since 1.5.0
@@ -1941,8 +2262,12 @@ setMethod("sign", signature(x = "Column"),
#'
#' Aggregate function: returns the number of distinct items in a group.
#'
+#' @param x Column to compute on
+#' @param ... other columns
+#'
#' @rdname countDistinct
#' @name n_distinct
+#' @aliases n_distinct,Column-method
#' @export
#' @examples \dontrun{n_distinct(df$c)}
#' @note n_distinct since 1.4.0
@@ -1951,8 +2276,11 @@ setMethod("n_distinct", signature(x = "Column"),
countDistinct(x, ...)
})
-#' @rdname count
+#' @rdname nrow
+#' @param x Column to compute on
+#'
#' @name n
+#' @aliases n,Column-method
#' @export
#' @examples \dontrun{n(df$c)}
#' @note n since 1.4.0
@@ -1972,9 +2300,13 @@ setMethod("n", signature(x = "Column"),
#' NOTE: Use when ever possible specialized functions like \code{year}. These benefit from a
#' specialized implementation.
#'
+#' @param y Column to compute on
+#' @param x date format specification
+#'
#' @family datetime_funcs
#' @rdname date_format
#' @name date_format
+#' @aliases date_format,Column,character-method
#' @export
#' @examples \dontrun{date_format(df$t, 'MM/dd/yyy')}
#' @note date_format since 1.5.0
@@ -1988,9 +2320,13 @@ setMethod("date_format", signature(y = "Column", x = "character"),
#'
#' Assumes given timestamp is UTC and converts to given timezone.
#'
+#' @param y Column to compute on
+#' @param x time zone to use
+#'
#' @family datetime_funcs
#' @rdname from_utc_timestamp
#' @name from_utc_timestamp
+#' @aliases from_utc_timestamp,Column,character-method
#' @export
#' @examples \dontrun{from_utc_timestamp(df$t, 'PST')}
#' @note from_utc_timestamp since 1.5.0
@@ -2011,6 +2347,7 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
#' @param y column to check
#' @param x substring to check
#' @family string_funcs
+#' @aliases instr,Column,character-method
#' @rdname instr
#' @name instr
#' @export
@@ -2033,9 +2370,13 @@ setMethod("instr", signature(y = "Column", x = "character"),
#' Day of the week parameter is case insensitive, and accepts first three or two characters:
#' "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
#'
+#' @param y Column to compute on
+#' @param x Day of the week string
+#'
#' @family datetime_funcs
#' @rdname next_day
#' @name next_day
+#' @aliases next_day,Column,character-method
#' @export
#' @examples
#'\dontrun{
@@ -2053,9 +2394,13 @@ setMethod("next_day", signature(y = "Column", x = "character"),
#'
#' Assumes given timestamp is in given timezone and converts to UTC.
#'
+#' @param y Column to compute on
+#' @param x timezone to use
+#'
#' @family datetime_funcs
#' @rdname to_utc_timestamp
#' @name to_utc_timestamp
+#' @aliases to_utc_timestamp,Column,character-method
#' @export
#' @examples \dontrun{to_utc_timestamp(df$t, 'PST')}
#' @note to_utc_timestamp since 1.5.0
@@ -2069,9 +2414,13 @@ setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
#'
#' Returns the date that is numMonths after startDate.
#'
+#' @param y Column to compute on
+#' @param x Number of months to add
+#'
#' @name add_months
#' @family datetime_funcs
#' @rdname add_months
+#' @aliases add_months,Column,numeric-method
#' @export
#' @examples \dontrun{add_months(df$d, 1)}
#' @note add_months since 1.5.0
@@ -2085,9 +2434,13 @@ setMethod("add_months", signature(y = "Column", x = "numeric"),
#'
#' Returns the date that is `days` days after `start`
#'
+#' @param y Column to compute on
+#' @param x Number of days to add
+#'
#' @family datetime_funcs
#' @rdname date_add
#' @name date_add
+#' @aliases date_add,Column,numeric-method
#' @export
#' @examples \dontrun{date_add(df$d, 1)}
#' @note date_add since 1.5.0
@@ -2101,9 +2454,13 @@ setMethod("date_add", signature(y = "Column", x = "numeric"),
#'
#' Returns the date that is `days` days before `start`
#'
+#' @param y Column to compute on
+#' @param x Number of days to substract
+#'
#' @family datetime_funcs
#' @rdname date_sub
#' @name date_sub
+#' @aliases date_sub,Column,numeric-method
#' @export
#' @examples \dontrun{date_sub(df$d, 1)}
#' @note date_sub since 1.5.0
@@ -2126,6 +2483,7 @@ setMethod("date_sub", signature(y = "Column", x = "numeric"),
#' @family string_funcs
#' @rdname format_number
#' @name format_number
+#' @aliases format_number,Column,numeric-method
#' @export
#' @examples \dontrun{format_number(df$n, 4)}
#' @note format_number since 1.5.0
@@ -2147,6 +2505,7 @@ setMethod("format_number", signature(y = "Column", x = "numeric"),
#' @family misc_funcs
#' @rdname sha2
#' @name sha2
+#' @aliases sha2,Column,numeric-method
#' @export
#' @examples \dontrun{sha2(df$c, 256)}
#' @note sha2 since 1.5.0
@@ -2161,9 +2520,13 @@ setMethod("sha2", signature(y = "Column", x = "numeric"),
#' Shift the given value numBits left. If the given value is a long value, this function
#' will return a long value else it will return an integer value.
#'
+#' @param y column to compute on.
+#' @param x number of bits to shift.
+#'
#' @family math_funcs
#' @rdname shiftLeft
#' @name shiftLeft
+#' @aliases shiftLeft,Column,numeric-method
#' @export
#' @examples \dontrun{shiftLeft(df$c, 1)}
#' @note shiftLeft since 1.5.0
@@ -2180,9 +2543,13 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
#' Shift the given value numBits right. If the given value is a long value, it will return
#' a long value else it will return an integer value.
#'
+#' @param y column to compute on.
+#' @param x number of bits to shift.
+#'
#' @family math_funcs
#' @rdname shiftRight
#' @name shiftRight
+#' @aliases shiftRight,Column,numeric-method
#' @export
#' @examples \dontrun{shiftRight(df$c, 1)}
#' @note shiftRight since 1.5.0
@@ -2199,9 +2566,13 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"),
#' Unsigned shift the given value numBits right. If the given value is a long value,
#' it will return a long value else it will return an integer value.
#'
+#' @param y column to compute on.
+#' @param x number of bits to shift.
+#'
#' @family math_funcs
#' @rdname shiftRightUnsigned
#' @name shiftRightUnsigned
+#' @aliases shiftRightUnsigned,Column,numeric-method
#' @export
#' @examples \dontrun{shiftRightUnsigned(df$c, 1)}
#' @note shiftRightUnsigned since 1.5.0
@@ -2218,9 +2589,14 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
#' Concatenates multiple input string columns together into a single string column,
#' using the given separator.
#'
+#' @param x column to concatenate.
+#' @param sep separator to use.
+#' @param ... other columns to concatenate.
+#'
#' @family string_funcs
#' @rdname concat_ws
#' @name concat_ws
+#' @aliases concat_ws,character,Column-method
#' @export
#' @examples \dontrun{concat_ws('-', df$s, df$d)}
#' @note concat_ws since 1.5.0
@@ -2235,8 +2611,13 @@ setMethod("concat_ws", signature(sep = "character", x = "Column"),
#'
#' Convert a number in a string column from one base to another.
#'
+#' @param x column to convert.
+#' @param fromBase base to convert from.
+#' @param toBase base to convert to.
+#'
#' @family math_funcs
#' @rdname conv
+#' @aliases conv,Column,numeric,numeric-method
#' @name conv
#' @export
#' @examples \dontrun{conv(df$n, 2, 16)}
@@ -2258,6 +2639,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri
#'
#' @family normal_funcs
#' @rdname expr
+#' @aliases expr,character-method
#' @name expr
#' @export
#' @examples \dontrun{expr('length(name)')}
@@ -2275,6 +2657,7 @@ setMethod("expr", signature(x = "character"),
#' @family string_funcs
#' @rdname format_string
#' @name format_string
+#' @aliases format_string,character,Column-method
#' @export
#' @examples \dontrun{format_string('%d %s', df$a, df$b)}
#' @note format_string since 1.5.0
@@ -2296,6 +2679,7 @@ setMethod("format_string", signature(format = "character", x = "Column"),
#' @family datetime_funcs
#' @rdname from_unixtime
#' @name from_unixtime
+#' @aliases from_unixtime,Column-method
#' @export
#' @examples
#'\dontrun{
@@ -2334,6 +2718,7 @@ setMethod("from_unixtime", signature(x = "Column"),
#' @family datetime_funcs
#' @rdname window
#' @name window
+#' @aliases window,Column-method
#' @export
#' @examples
#'\dontrun{
@@ -2383,6 +2768,7 @@ setMethod("window", signature(x = "Column"),
#'
#' @family string_funcs
#' @rdname locate
+#' @aliases locate,character,Column-method
#' @name locate
#' @export
#' @examples \dontrun{locate('b', df$c, 1)}
@@ -2401,6 +2787,7 @@ setMethod("locate", signature(substr = "character", str = "Column"),
#'
#' @family string_funcs
#' @rdname lpad
+#' @aliases lpad,Column,numeric,character-method
#' @name lpad
#' @export
#' @examples \dontrun{lpad(df$c, 6, '#')}
@@ -2420,6 +2807,7 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
#' @family normal_funcs
#' @rdname rand
#' @name rand
+#' @aliases rand,missing-method
#' @export
#' @examples \dontrun{rand()}
#' @note rand since 1.5.0
@@ -2431,6 +2819,7 @@ setMethod("rand", signature(seed = "missing"),
#' @rdname rand
#' @name rand
+#' @aliases rand,numeric-method
#' @export
#' @note rand(numeric) since 1.5.0
setMethod("rand", signature(seed = "numeric"),
@@ -2446,6 +2835,7 @@ setMethod("rand", signature(seed = "numeric"),
#' @family normal_funcs
#' @rdname randn
#' @name randn
+#' @aliases randn,missing-method
#' @export
#' @examples \dontrun{randn()}
#' @note randn since 1.5.0
@@ -2457,6 +2847,7 @@ setMethod("randn", signature(seed = "missing"),
#' @rdname randn
#' @name randn
+#' @aliases randn,numeric-method
#' @export
#' @note randn(numeric) since 1.5.0
setMethod("randn", signature(seed = "numeric"),
@@ -2472,6 +2863,7 @@ setMethod("randn", signature(seed = "numeric"),
#' @family string_funcs
#' @rdname regexp_extract
#' @name regexp_extract
+#' @aliases regexp_extract,Column,character,numeric-method
#' @export
#' @examples \dontrun{regexp_extract(df$c, '(\d+)-(\d+)', 1)}
#' @note regexp_extract since 1.5.0
@@ -2491,6 +2883,7 @@ setMethod("regexp_extract",
#' @family string_funcs
#' @rdname regexp_replace
#' @name regexp_replace
+#' @aliases regexp_replace,Column,character,character-method
#' @export
#' @examples \dontrun{regexp_replace(df$c, '(\\d+)', '--')}
#' @note regexp_replace since 1.5.0
@@ -2510,6 +2903,7 @@ setMethod("regexp_replace",
#' @family string_funcs
#' @rdname rpad
#' @name rpad
+#' @aliases rpad,Column,numeric,character-method
#' @export
#' @examples \dontrun{rpad(df$c, 6, '#')}
#' @note rpad since 1.5.0
@@ -2530,6 +2924,7 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
#'
#' @family string_funcs
#' @rdname substring_index
+#' @aliases substring_index,Column,character,numeric-method
#' @name substring_index
#' @export
#' @examples
@@ -2557,6 +2952,7 @@ setMethod("substring_index",
#' @family string_funcs
#' @rdname translate
#' @name translate
+#' @aliases translate,Column,character,character-method
#' @export
#' @examples \dontrun{translate(df$c, 'rnlt', '123')}
#' @note translate since 1.5.0
@@ -2575,6 +2971,7 @@ setMethod("translate",
#' @family datetime_funcs
#' @rdname unix_timestamp
#' @name unix_timestamp
+#' @aliases unix_timestamp,missing,missing-method
#' @export
#' @examples
#'\dontrun{
@@ -2591,6 +2988,7 @@ setMethod("unix_timestamp", signature(x = "missing", format = "missing"),
#' @rdname unix_timestamp
#' @name unix_timestamp
+#' @aliases unix_timestamp,Column,missing-method
#' @export
#' @note unix_timestamp(Column) since 1.5.0
setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
@@ -2601,6 +2999,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
#' @rdname unix_timestamp
#' @name unix_timestamp
+#' @aliases unix_timestamp,Column,character-method
#' @export
#' @note unix_timestamp(Column, character) since 1.5.0
setMethod("unix_timestamp", signature(x = "Column", format = "character"),
@@ -2616,6 +3015,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
#' @family normal_funcs
#' @rdname when
#' @name when
+#' @aliases when,Column-method
#' @seealso \link{ifelse}
#' @export
#' @examples \dontrun{when(df$age == 2, df$age + 1)}
@@ -2636,6 +3036,7 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
#' @family normal_funcs
#' @rdname ifelse
#' @name ifelse
+#' @aliases ifelse,Column-method
#' @seealso \link{when}
#' @export
#' @examples \dontrun{
@@ -2671,6 +3072,7 @@ setMethod("ifelse",
#' @rdname cume_dist
#' @name cume_dist
#' @family window_funcs
+#' @aliases cume_dist,missing-method
#' @export
#' @examples \dontrun{cume_dist()}
#' @note cume_dist since 1.6.0
@@ -2694,6 +3096,7 @@ setMethod("cume_dist",
#' @rdname dense_rank
#' @name dense_rank
#' @family window_funcs
+#' @aliases dense_rank,missing-method
#' @export
#' @examples \dontrun{dense_rank()}
#' @note dense_rank since 1.6.0
@@ -2714,6 +3117,7 @@ setMethod("dense_rank",
#'
#' @rdname lag
#' @name lag
+#' @aliases lag,characterOrColumn-method
#' @family window_funcs
#' @export
#' @examples \dontrun{lag(df$c)}
@@ -2739,10 +3143,15 @@ setMethod("lag",
#' an `offset` of one will return the next row at any given point in the window partition.
#'
#' This is equivalent to the LEAD function in SQL.
+#'
+#' @param x Column to compute on
+#' @param offset Number of rows to offset
+#' @param defaultValue (Optional) default value to use
#'
#' @rdname lead
#' @name lead
#' @family window_funcs
+#' @aliases lead,characterOrColumn,numeric-method
#' @export
#' @examples \dontrun{lead(df$c)}
#' @note lead since 1.6.0
@@ -2763,13 +3172,16 @@ setMethod("lead",
#' ntile
#'
#' Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window
-#' partition. Fow example, if `n` is 4, the first quarter of the rows will get value 1, the second
+#' partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the second
#' quarter will get 2, the third quarter will get 3, and the last quarter will get 4.
#'
#' This is equivalent to the NTILE function in SQL.
#'
+#' @param x Number of ntile groups
+#'
#' @rdname ntile
#' @name ntile
+#' @aliases ntile,numeric-method
#' @family window_funcs
#' @export
#' @examples \dontrun{ntile(1)}
@@ -2794,6 +3206,7 @@ setMethod("ntile",
#' @rdname percent_rank
#' @name percent_rank
#' @family window_funcs
+#' @aliases percent_rank,missing-method
#' @export
#' @examples \dontrun{percent_rank()}
#' @note percent_rank since 1.6.0
@@ -2818,6 +3231,7 @@ setMethod("percent_rank",
#' @rdname rank
#' @name rank
#' @family window_funcs
+#' @aliases rank,missing-method
#' @export
#' @examples \dontrun{rank()}
#' @note rank since 1.6.0
@@ -2829,6 +3243,10 @@ setMethod("rank",
})
# Expose rank() in the R base package
+#' @name rank
+#' @rdname rank
+#' @aliases rank,ANY-method
+#' @export
setMethod("rank",
signature(x = "ANY"),
function(x, ...) {
@@ -2843,6 +3261,7 @@ setMethod("rank",
#'
#' @rdname row_number
#' @name row_number
+#' @aliases row_number,missing-method
#' @family window_funcs
#' @export
#' @examples \dontrun{row_number()}
@@ -2863,6 +3282,7 @@ setMethod("row_number",
#' @param x A Column
#' @param value A value to be checked if contained in the column
#' @rdname array_contains
+#' @aliases array_contains,Column-method
#' @name array_contains
#' @family collection_funcs
#' @export
@@ -2879,9 +3299,12 @@ setMethod("array_contains",
#'
#' Creates a new row for each element in the given array or map column.
#'
+#' @param x Column to compute on
+#'
#' @rdname explode
#' @name explode
#' @family collection_funcs
+#' @aliases explode,Column-method
#' @export
#' @examples \dontrun{explode(df$c)}
#' @note explode since 1.5.0
@@ -2895,9 +3318,12 @@ setMethod("explode",
#' size
#'
#' Returns length of array or map.
+#'
+#' @param x Column to compute on
#'
#' @rdname size
#' @name size
+#' @aliases size,Column-method
#' @family collection_funcs
#' @export
#' @examples \dontrun{size(df$c)}
@@ -2920,6 +3346,7 @@ setMethod("size",
#' FALSE, sorting is in descending order.
#' @rdname sort_array
#' @name sort_array
+#' @aliases sort_array,Column-method
#' @family collection_funcs
#' @export
#' @examples
@@ -2939,9 +3366,12 @@ setMethod("sort_array",
#'
#' Creates a new row for each element with position in the given array or map column.
#'
+#' @param x Column to compute on
+#'
#' @rdname posexplode
#' @name posexplode
#' @family collection_funcs
+#' @aliases posexplode,Column-method
#' @export
#' @examples \dontrun{posexplode(df$c)}
#' @note posexplode since 2.1.0
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8416e5cdb8..e7444ac246 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -51,7 +51,7 @@ setGeneric("collectPartition",
standardGeneric("collectPartition")
})
-# @rdname count
+# @rdname nrow
# @export
setGeneric("count", function(x) { standardGeneric("count") })
@@ -395,7 +395,7 @@ setGeneric("value", function(bcast) { standardGeneric("value") })
#################### SparkDataFrame Methods ########################
-#' @rdname agg
+#' @rdname summarize
#' @export
setGeneric("agg", function (x, ...) { standardGeneric("agg") })
@@ -654,7 +654,7 @@ setGeneric("showDF", function(x, ...) { standardGeneric("showDF") })
# @export
setGeneric("subset", function(x, ...) { standardGeneric("subset") })
-#' @rdname agg
+#' @rdname summarize
#' @export
setGeneric("summarize", function(x, ...) { standardGeneric("summarize") })
@@ -1022,7 +1022,7 @@ setGeneric("month", function(x) { standardGeneric("month") })
#' @export
setGeneric("months_between", function(y, x) { standardGeneric("months_between") })
-#' @rdname count
+#' @rdname nrow
#' @export
setGeneric("n", function(x) { standardGeneric("n") })
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 5ed7e8abb4..85348ae76b 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -47,6 +47,7 @@ groupedData <- function(sgd) {
#' @rdname show
+#' @aliases show,GroupedData-method
#' @note show(GroupedData) since 1.4.0
setMethod("show", "GroupedData",
function(object) {
@@ -61,6 +62,7 @@ setMethod("show", "GroupedData",
#' @param x a GroupedData
#' @return a SparkDataFrame
#' @rdname count
+#' @aliases count,GroupedData-method
#' @export
#' @examples
#' \dontrun{
@@ -84,6 +86,7 @@ setMethod("count",
#' @param x a GroupedData
#' @return a SparkDataFrame
#' @rdname summarize
+#' @aliases agg,GroupedData-method
#' @name agg
#' @family agg_funcs
#' @export
@@ -121,6 +124,7 @@ setMethod("agg",
#' @rdname summarize
#' @name summarize
+#' @aliases summarize,GroupedData-method
#' @note summarize since 1.4.0
setMethod("summarize",
signature(x = "GroupedData"),
@@ -146,6 +150,7 @@ methods <- c("avg", "max", "mean", "min", "sum")
#' @param values A value or a list/vector of distinct values for the output columns.
#' @return GroupedData object
#' @rdname pivot
+#' @aliases pivot,GroupedData,character-method
#' @name pivot
#' @export
#' @examples
@@ -198,6 +203,7 @@ createMethods()
#'
#' @param x A GroupedData
#' @rdname gapply
+#' @aliases gapply,GroupedData-method
#' @name gapply
#' @export
#' @note gapply(GroupedData) since 2.0.0
@@ -212,6 +218,7 @@ setMethod("gapply",
#'
#' @param x A GroupedData
#' @rdname gapplyCollect
+#' @aliases gapplyCollect,GroupedData-method
#' @name gapplyCollect
#' @export
#' @note gapplyCollect(GroupedData) since 2.0.0
@@ -243,4 +250,4 @@ gapplyInternal <- function(x, func, schema) {
broadcastArr,
if (class(schema) == "structType") { schema$jobj } else { NULL })
dataFrame(sdf)
-} \ No newline at end of file
+}
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 94e1f654f5..50c601fcd9 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -91,6 +91,7 @@ NULL
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
#' @param tol Positive convergence tolerance of iterations.
#' @param maxIter Integer giving the maximal number of IRLS iterations.
+#' @aliases spark.glm,SparkDataFrame,formula-method
#' @return \code{spark.glm} returns a fitted generalized linear model
#' @rdname spark.glm
#' @name spark.glm
@@ -306,6 +307,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
#' @param initMode The initialization algorithm choosen to fit the model
#' @return \code{spark.kmeans} returns a fitted k-means model
#' @rdname spark.kmeans
+#' @aliases spark.kmeans,SparkDataFrame,formula-method
#' @name spark.kmeans
#' @export
#' @examples
@@ -418,6 +420,7 @@ setMethod("predict", signature(object = "KMeansModel"),
#' @param smoothing Smoothing parameter
#' @return \code{spark.naiveBayes} returns a fitted naive Bayes model
#' @rdname spark.naiveBayes
+#' @aliases spark.naiveBayes,SparkDataFrame,formula-method
#' @name spark.naiveBayes
#' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/}
#' @export
@@ -512,7 +515,6 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
#' which means throw exception if the output path exists.
#'
#' @rdname spark.kmeans
-#' @name write.ml
#' @export
#' @note write.ml(KMeansModel, character) since 2.0.0
setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index a91e9980df..b429f5de13 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -26,6 +26,7 @@
#' @param x a structField object (created with the field() function)
#' @param ... additional structField objects
#' @return a structType object
+#' @rdname structType
#' @export
#' @examples
#'\dontrun{
@@ -40,13 +41,19 @@ structType <- function(x, ...) {
UseMethod("structType", x)
}
-structType.jobj <- function(x) {
+#' @rdname structType
+#' @method structType jobj
+#' @export
+structType.jobj <- function(x, ...) {
obj <- structure(list(), class = "structType")
obj$jobj <- x
obj$fields <- function() { lapply(callJMethod(obj$jobj, "fields"), structField) }
obj
}
+#' @rdname structType
+#' @method structType structField
+#' @export
structType.structField <- function(x, ...) {
fields <- list(x, ...)
if (!all(sapply(fields, inherits, "structField"))) {
@@ -104,7 +111,10 @@ structField <- function(x, ...) {
UseMethod("structField", x)
}
-structField.jobj <- function(x) {
+#' @rdname structField
+#' @method structField jobj
+#' @export
+structField.jobj <- function(x, ...) {
obj <- structure(list(), class = "structField")
obj$jobj <- x
obj$name <- function() { callJMethod(x, "name") }
@@ -179,7 +189,7 @@ checkType <- function(type) {
#' @param nullable A logical vector indicating whether or not the field is nullable
#' @rdname structField
#' @export
-structField.character <- function(x, type, nullable = TRUE) {
+structField.character <- function(x, type, nullable = TRUE, ...) {
if (class(x) != "character") {
stop("Field name must be a string.")
}
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index c92352e1b0..2b4ce195cb 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -34,6 +34,7 @@ setOldClass("jobj")
#'
#' @rdname crosstab
#' @name crosstab
+#' @aliases crosstab,SparkDataFrame,character,character-method
#' @family stat functions
#' @export
#' @examples
@@ -59,6 +60,7 @@ setMethod("crosstab",
#'
#' @rdname cov
#' @name cov
+#' @aliases cov,SparkDataFrame-method
#' @family stat functions
#' @export
#' @examples
@@ -88,6 +90,7 @@ setMethod("cov",
#'
#' @rdname corr
#' @name corr
+#' @aliases corr,SparkDataFrame-method
#' @family stat functions
#' @export
#' @examples
@@ -120,6 +123,7 @@ setMethod("corr",
#'
#' @rdname freqItems
#' @name freqItems
+#' @aliases freqItems,SparkDataFrame,character-method
#' @family stat functions
#' @export
#' @examples
@@ -158,6 +162,7 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
#'
#' @rdname approxQuantile
#' @name approxQuantile
+#' @aliases approxQuantile,SparkDataFrame,character,numeric,numeric-method
#' @family stat functions
#' @export
#' @examples
@@ -188,6 +193,7 @@ setMethod("approxQuantile",
#' @return A new SparkDataFrame that represents the stratified sample
#'
#' @rdname sampleBy
+#' @aliases sampleBy,SparkDataFrame,character,list,numeric-method
#' @name sampleBy
#' @family stat functions
#' @export
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index e75bfbf037..240b9f669b 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -126,20 +126,16 @@ hashCode <- function(key) {
as.integer(bitwXor(intBits[2], intBits[1]))
} else if (class(key) == "character") {
# TODO: SPARK-7839 means we might not have the native library available
- if (is.loaded("stringHashCode")) {
- .Call("stringHashCode", key)
+ n <- nchar(key)
+ if (n == 0) {
+ 0L
} else {
- n <- nchar(key)
- if (n == 0) {
- 0L
- } else {
- asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
- hashC <- 0
- for (k in 1:length(asciiVals)) {
- hashC <- mult31AndAdd(hashC, asciiVals[k])
- }
- as.integer(hashC)
+ asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
+ hashC <- 0
+ for (k in 1:length(asciiVals)) {
+ hashC <- mult31AndAdd(hashC, asciiVals[k])
}
+ as.integer(hashC)
}
} else {
warning(paste("Could not hash object, returning 0", sep = ""))
diff --git a/R/pkg/R/window.R b/R/pkg/R/window.R
index d9d069cde9..215d0e7b5c 100644
--- a/R/pkg/R/window.R
+++ b/R/pkg/R/window.R
@@ -28,6 +28,7 @@
#'
#' @rdname windowPartitionBy
#' @name windowPartitionBy
+#' @aliases windowPartitionBy,character-method
#' @export
#' @examples
#' \dontrun{
@@ -50,6 +51,7 @@ setMethod("windowPartitionBy",
#' @rdname windowPartitionBy
#' @name windowPartitionBy
+#' @aliases windowPartitionBy,Column-method
#' @export
#' @note windowPartitionBy(Column) since 2.0.0
setMethod("windowPartitionBy",
@@ -75,6 +77,7 @@ setMethod("windowPartitionBy",
#'
#' @rdname windowOrderBy
#' @name windowOrderBy
+#' @aliases windowOrderBy,character-method
#' @export
#' @examples
#' \dontrun{
@@ -97,6 +100,7 @@ setMethod("windowOrderBy",
#' @rdname windowOrderBy
#' @name windowOrderBy
+#' @aliases windowOrderBy,Column-method
#' @export
#' @note windowOrderBy(Column) since 2.0.0
setMethod("windowOrderBy",