<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (version 1.7.0_51) on Wed Sep 16 15:55:12 PDT 2015 -->
<title>PairRDDFunctions</title>
<meta name="date" content="2015-09-16">
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
</head>
<body>
<script type="text/javascript"><!--
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="PairRDDFunctions";
}
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar_top">
<!-- -->
</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../org/apache/spark/rdd/OrderedRDDFunctions.html" title="class in org.apache.spark.rdd"><span class="strong">Prev Class</span></a></li>
<li><a href="../../../../org/apache/spark/rdd/PartitionCoalescer.html" title="class in org.apache.spark.rdd"><span class="strong">Next Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../index.html?org/apache/spark/rdd/PairRDDFunctions.html" target="_top">Frames</a></li>
<li><a href="PairRDDFunctions.html" target="_top">No Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary: </li>
<li>Nested | </li>
<li>Field | </li>
<li><a href="#constructor_summary">Constr</a> | </li>
<li><a href="#method_summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail: </li>
<li>Field | </li>
<li><a href="#constructor_detail">Constr</a> | </li>
<li><a href="#method_detail">Method</a></li>
</ul>
</div>
<a name="skip-navbar_top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.spark.rdd</div>
<h2 title="Class PairRDDFunctions" class="title">Class PairRDDFunctions<K,V></h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.spark.rdd.PairRDDFunctions<K,V></li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Implemented Interfaces:</dt>
<dd>java.io.Serializable, <a href="../../../../org/apache/spark/Logging.html" title="interface in org.apache.spark">Logging</a></dd>
</dl>
<hr>
<br>
<pre>public class <span class="strong">PairRDDFunctions<K,V></span>
extends java.lang.Object
implements <a href="../../../../org/apache/spark/Logging.html" title="interface in org.apache.spark">Logging</a>, scala.Serializable</pre>
<div class="block">Extra functions available on RDDs of (key, value) pairs through an implicit conversion.</div>
<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../../serialized-form.html#org.apache.spark.rdd.PairRDDFunctions">Serialized Form</a></dd></dl>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor_summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd"> </span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#PairRDDFunctions(org.apache.spark.rdd.RDD, scala.reflect.ClassTag, scala.reflect.ClassTag, scala.math.Ordering)">PairRDDFunctions</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> self,
scala.reflect.ClassTag<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>> kt,
scala.reflect.ClassTag<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> vt,
scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>> ord)</code> </td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method_summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span>Methods</span><span class="tabEnd"> </span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#aggregateByKey(U, scala.Function2, scala.Function2, scala.reflect.ClassTag)">aggregateByKey</a></strong>(U zeroValue,
scala.Function2<U,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,U> seqOp,
scala.Function2<U,U,U> combOp,
scala.reflect.ClassTag<U> evidence$3)</code>
<div class="block">Aggregate the values of each key, using given combine functions and a neutral "zero value".</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#aggregateByKey(U, int, scala.Function2, scala.Function2, scala.reflect.ClassTag)">aggregateByKey</a></strong>(U zeroValue,
int numPartitions,
scala.Function2<U,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,U> seqOp,
scala.Function2<U,U,U> combOp,
scala.reflect.ClassTag<U> evidence$2)</code>
<div class="block">Aggregate the values of each key, using given combine functions and a neutral "zero value".</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#aggregateByKey(U, org.apache.spark.Partitioner, scala.Function2, scala.Function2, scala.reflect.ClassTag)">aggregateByKey</a></strong>(U zeroValue,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner,
scala.Function2<U,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,U> seqOp,
scala.Function2<U,U,U> combOp,
scala.reflect.ClassTag<U> evidence$1)</code>
<div class="block">Aggregate the values of each key, using given combine functions and a neutral "zero value".</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</code>
<div class="block">For each key k in <code>this</code> or <code>other</code>, return a resulting RDD that contains a tuple with the
list of values for that key in <code>this</code> as well as <code>other</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD, int)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</code>
<div class="block">For each key k in <code>this</code> or <code>other</code>, return a resulting RDD that contains a tuple with the
list of values for that key in <code>this</code> as well as <code>other</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code> </td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W1,W2> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple3<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2)</code>
<div class="block">For each key k in <code>this</code> or <code>other1</code> or <code>other2</code>, return a resulting RDD that contains a
tuple with the list of values for that key in <code>this</code>, <code>other1</code> and <code>other2</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W1,W2> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple3<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, int)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
int numPartitions)</code>
<div class="block">For each key k in <code>this</code> or <code>other1</code> or <code>other2</code>, return a resulting RDD that contains a
tuple with the list of values for that key in <code>this</code>, <code>other1</code> and <code>other2</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W1,W2> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple3<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code> </td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W1,W2,W3> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple4<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>,scala.collection.Iterable<W3>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W3>> other3)</code> </td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W1,W2,W3> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple4<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>,scala.collection.Iterable<W3>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, int)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W3>> other3,
int numPartitions)</code>
<div class="block">For each key k in <code>this</code> or <code>other1</code> or <code>other2</code> or <code>other3</code>,
return a resulting RDD that contains a tuple with the list of values
for that key in <code>this</code>, <code>other1</code>, <code>other2</code> and <code>other3</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W1,W2,W3> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple4<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>,scala.collection.Iterable<W3>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">cogroup</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W3>> other3,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">For each key k in <code>this</code> or <code>other1</code> or <code>other2</code> or <code>other3</code>,
return a resulting RDD that contains a tuple with the list of values
for that key in <code>this</code>, <code>other1</code>, <code>other2</code> and <code>other3</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#collectAsMap()">collectAsMap</a></strong>()</code>
<div class="block">Return the key-value pairs in this RDD to the master as a Map.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><C> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,C>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#combineByKey(scala.Function1, scala.Function2, scala.Function2)">combineByKey</a></strong>(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> createCombiner,
scala.Function2<C,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> mergeValue,
scala.Function2<C,C,C> mergeCombiners)</code> </td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><C> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,C>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#combineByKey(scala.Function1, scala.Function2, scala.Function2, int)">combineByKey</a></strong>(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> createCombiner,
scala.Function2<C,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> mergeValue,
scala.Function2<C,C,C> mergeCombiners,
int numPartitions)</code>
<div class="block">Simplified version of combineByKey that hash-partitions the output RDD.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><C> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,C>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#combineByKey(scala.Function1, scala.Function2, scala.Function2, org.apache.spark.Partitioner, boolean, org.apache.spark.serializer.Serializer)">combineByKey</a></strong>(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> createCombiner,
scala.Function2<C,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> mergeValue,
scala.Function2<C,C,C> mergeCombiners,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner,
boolean mapSideCombine,
<a href="../../../../org/apache/spark/serializer/Serializer.html" title="class in org.apache.spark.serializer">Serializer</a> serializer)</code>
<div class="block">Generic function to combine the elements for each key using a custom set of aggregation
functions.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#countApproxDistinctByKey(double)">countApproxDistinctByKey</a></strong>(double relativeSD)</code>
<div class="block">Return approximate number of distinct values for each key in this RDD.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#countApproxDistinctByKey(double, int)">countApproxDistinctByKey</a></strong>(double relativeSD,
int numPartitions)</code>
<div class="block">Return approximate number of distinct values for each key in this RDD.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#countApproxDistinctByKey(double, org.apache.spark.Partitioner)">countApproxDistinctByKey</a></strong>(double relativeSD,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">Return approximate number of distinct values for each key in this RDD.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#countApproxDistinctByKey(int, int, org.apache.spark.Partitioner)">countApproxDistinctByKey</a></strong>(int p,
int sp,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">:: Experimental ::</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#countByKey()">countByKey</a></strong>()</code>
<div class="block">Count the number of elements for each key, collecting the results to a local Map.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/partial/BoundedDouble.html" title="class in org.apache.spark.partial">BoundedDouble</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#countByKeyApprox(long, double)">countByKeyApprox</a></strong>(long timeout,
double confidence)</code>
<div class="block">:: Experimental ::
Approximate version of countByKey that can return a partial result if it does
not finish within a timeout.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static scala.util.DynamicVariable<java.lang.Object></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#disableOutputSpecValidation()">disableOutputSpecValidation</a></strong>()</code> </td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#flatMapValues(scala.Function1)">flatMapValues</a></strong>(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,scala.collection.TraversableOnce<U>> f)</code>
<div class="block">Pass each value in the key-value pair RDD through a flatMap function without changing the
keys; this also retains the original RDD's partitioning.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#foldByKey(V, scala.Function2)">foldByKey</a></strong>(<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a> zeroValue,
scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</code>
<div class="block">Merge the values for each key using an associative function and a neutral "zero value" which
may be added to the result an arbitrary number of times, and must not change the result
(e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#foldByKey(V, int, scala.Function2)">foldByKey</a></strong>(<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a> zeroValue,
int numPartitions,
scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</code>
<div class="block">Merge the values for each key using an associative function and a neutral "zero value" which
may be added to the result an arbitrary number of times, and must not change the result
(e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#foldByKey(V, org.apache.spark.Partitioner, scala.Function2)">foldByKey</a></strong>(<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a> zeroValue,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner,
scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</code>
<div class="block">Merge the values for each key using an associative function and a neutral "zero value" which
may be added to the result an arbitrary number of times, and must not change the result
(e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.Option<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#fullOuterJoin(org.apache.spark.rdd.RDD)">fullOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</code>
<div class="block">Perform a full outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.Option<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#fullOuterJoin(org.apache.spark.rdd.RDD, int)">fullOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</code>
<div class="block">Perform a full outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.Option<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#fullOuterJoin(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">fullOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">Perform a full outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#groupByKey()">groupByKey</a></strong>()</code>
<div class="block">Group the values for each key in the RDD into a single sequence.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#groupByKey(int)">groupByKey</a></strong>(int numPartitions)</code>
<div class="block">Group the values for each key in the RDD into a single sequence.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#groupByKey(org.apache.spark.Partitioner)">groupByKey</a></strong>(<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">Group the values for each key in the RDD into a single sequence.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#groupWith(org.apache.spark.rdd.RDD)">groupWith</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</code>
<div class="block">Alias for cogroup.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W1,W2> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple3<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#groupWith(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD)">groupWith</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2)</code>
<div class="block">Alias for cogroup.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W1,W2,W3> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple4<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>,scala.collection.Iterable<W3>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#groupWith(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD)">groupWith</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W3>> other3)</code>
<div class="block">Alias for cogroup.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,W>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#join(org.apache.spark.rdd.RDD)">join</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</code>
<div class="block">Return an RDD containing all pairs of elements with matching keys in <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,W>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#join(org.apache.spark.rdd.RDD, int)">join</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</code>
<div class="block">Return an RDD containing all pairs of elements with matching keys in <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,W>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#join(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">join</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">Return an RDD containing all pairs of elements with matching keys in <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#keys()">keys</a></strong>()</code>
<div class="block">Return an RDD with the keys of each tuple.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,scala.Option<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#leftOuterJoin(org.apache.spark.rdd.RDD)">leftOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</code>
<div class="block">Perform a left outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,scala.Option<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#leftOuterJoin(org.apache.spark.rdd.RDD, int)">leftOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</code>
<div class="block">Perform a left outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,scala.Option<W>>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#leftOuterJoin(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">leftOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">Perform a left outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>scala.collection.Seq<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#lookup(K)">lookup</a></strong>(<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a> key)</code>
<div class="block">Return the list of values in the RDD for key <code>key</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#mapValues(scala.Function1)">mapValues</a></strong>(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,U> f)</code>
<div class="block">Pass each value in the key-value pair RDD through a map function without changing the keys;
this also retains the original RDD's partitioning.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#partitionBy(org.apache.spark.Partitioner)">partitionBy</a></strong>(<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">Return a copy of the RDD partitioned using the specified partitioner.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static int</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES()">RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES</a></strong>()</code> </td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#reduceByKey(scala.Function2)">reduceByKey</a></strong>(scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</code>
<div class="block">Merge the values for each key using an associative reduce function.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#reduceByKey(scala.Function2, int)">reduceByKey</a></strong>(scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func,
int numPartitions)</code>
<div class="block">Merge the values for each key using an associative reduce function.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#reduceByKey(org.apache.spark.Partitioner, scala.Function2)">reduceByKey</a></strong>(<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner,
scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</code>
<div class="block">Merge the values for each key using an associative reduce function.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#reduceByKeyLocally(scala.Function2)">reduceByKeyLocally</a></strong>(scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</code>
<div class="block">Merge the values for each key using an associative reduce function, but return the results
immediately to the master as a Map.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#reduceByKeyToDriver(scala.Function2)">reduceByKeyToDriver</a></strong>(scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</code>
<div class="block">Alias for reduceByKeyLocally</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,W>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#rightOuterJoin(org.apache.spark.rdd.RDD)">rightOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</code>
<div class="block">Perform a right outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,W>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#rightOuterJoin(org.apache.spark.rdd.RDD, int)">rightOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</code>
<div class="block">Perform a right outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,W>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#rightOuterJoin(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">rightOuterJoin</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</code>
<div class="block">Perform a right outer join of <code>this</code> and <code>other</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#sampleByKey(boolean, scala.collection.Map, long)">sampleByKey</a></strong>(boolean withReplacement,
scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object> fractions,
long seed)</code>
<div class="block">Return a subset of this RDD sampled by key (via stratified sampling).</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#sampleByKeyExact(boolean, scala.collection.Map, long)">sampleByKeyExact</a></strong>(boolean withReplacement,
scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object> fractions,
long seed)</code>
<div class="block">::Experimental::
Return a subset of this RDD sampled by key (via stratified sampling) containing exactly
math.ceil(numItems * samplingRate) for each stratum (group of pairs with the same key).</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#saveAsHadoopDataset(org.apache.hadoop.mapred.JobConf)">saveAsHadoopDataset</a></strong>(org.apache.hadoop.mapred.JobConf conf)</code>
<div class="block">Output the RDD to any Hadoop-supported storage system, using a Hadoop JobConf object for
that storage system.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#saveAsHadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, java.lang.Class)">saveAsHadoopFile</a></strong>(java.lang.String path,
java.lang.Class<?> keyClass,
java.lang.Class<?> valueClass,
java.lang.Class<? extends org.apache.hadoop.mapred.OutputFormat<?,?>> outputFormatClass,
java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec)</code>
<div class="block">Output the RDD to any Hadoop-supported file system, using a Hadoop <code>OutputFormat</code> class
supporting the key and value types K and V in this RDD.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#saveAsHadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, org.apache.hadoop.mapred.JobConf, scala.Option)">saveAsHadoopFile</a></strong>(java.lang.String path,
java.lang.Class<?> keyClass,
java.lang.Class<?> valueClass,
java.lang.Class<? extends org.apache.hadoop.mapred.OutputFormat<?,?>> outputFormatClass,
org.apache.hadoop.mapred.JobConf conf,
scala.Option<java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec>> codec)</code>
<div class="block">Output the RDD to any Hadoop-supported file system, using a Hadoop <code>OutputFormat</code> class
supporting the key and value types K and V in this RDD.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><F extends org.apache.hadoop.mapred.OutputFormat<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> <br>void</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#saveAsHadoopFile(java.lang.String, java.lang.Class, scala.reflect.ClassTag)">saveAsHadoopFile</a></strong>(java.lang.String path,
java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec,
scala.reflect.ClassTag<F> fm)</code>
<div class="block">Output the RDD to any Hadoop-supported file system, using a Hadoop <code>OutputFormat</code> class
supporting the key and value types K and V in this RDD.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><F extends org.apache.hadoop.mapred.OutputFormat<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> <br>void</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#saveAsHadoopFile(java.lang.String, scala.reflect.ClassTag)">saveAsHadoopFile</a></strong>(java.lang.String path,
scala.reflect.ClassTag<F> fm)</code>
<div class="block">Output the RDD to any Hadoop-supported file system, using a Hadoop <code>OutputFormat</code> class
supporting the key and value types K and V in this RDD.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#saveAsNewAPIHadoopDataset(org.apache.hadoop.conf.Configuration)">saveAsNewAPIHadoopDataset</a></strong>(org.apache.hadoop.conf.Configuration conf)</code>
<div class="block">Output the RDD to any Hadoop-supported storage system with new Hadoop API, using a Hadoop
Configuration object for that storage system.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#saveAsNewAPIHadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, org.apache.hadoop.conf.Configuration)">saveAsNewAPIHadoopFile</a></strong>(java.lang.String path,
java.lang.Class<?> keyClass,
java.lang.Class<?> valueClass,
java.lang.Class<? extends org.apache.hadoop.mapreduce.OutputFormat<?,?>> outputFormatClass,
org.apache.hadoop.conf.Configuration conf)</code>
<div class="block">Output the RDD to any Hadoop-supported file system, using a new Hadoop API <code>OutputFormat</code>
(mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><F extends org.apache.hadoop.mapreduce.OutputFormat<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> <br>void</code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#saveAsNewAPIHadoopFile(java.lang.String, scala.reflect.ClassTag)">saveAsNewAPIHadoopFile</a></strong>(java.lang.String path,
scala.reflect.ClassTag<F> fm)</code>
<div class="block">Output the RDD to any Hadoop-supported file system, using a new Hadoop API <code>OutputFormat</code>
(mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#subtractByKey(org.apache.spark.rdd.RDD, scala.reflect.ClassTag)">subtractByKey</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
scala.reflect.ClassTag<W> evidence$4)</code>
<div class="block">Return an RDD with the pairs from <code>this</code> whose keys are not in <code>other</code>.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#subtractByKey(org.apache.spark.rdd.RDD, int, scala.reflect.ClassTag)">subtractByKey</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions,
scala.reflect.ClassTag<W> evidence$5)</code>
<div class="block">Return an RDD with the pairs from `this` whose keys are not in `other`.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code><W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#subtractByKey(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner, scala.reflect.ClassTag)">subtractByKey</a></strong>(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> p,
scala.reflect.ClassTag<W> evidence$6)</code>
<div class="block">Return an RDD with the pairs from `this` whose keys are not in `other`.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code><a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>></code></td>
<td class="colLast"><code><strong><a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html#values()">values</a></strong>()</code>
<div class="block">Return an RDD with the values of each tuple.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a name="methods_inherited_from_class_org.apache.spark.Logging">
<!-- -->
</a>
<h3>Methods inherited from interface org.apache.spark.<a href="../../../../org/apache/spark/Logging.html" title="interface in org.apache.spark">Logging</a></h3>
<code><a href="../../../../org/apache/spark/Logging.html#initializeIfNecessary()">initializeIfNecessary</a>, <a href="../../../../org/apache/spark/Logging.html#initializeLogging()">initializeLogging</a>, <a href="../../../../org/apache/spark/Logging.html#isTraceEnabled()">isTraceEnabled</a>, <a href="../../../../org/apache/spark/Logging.html#log_()">log_</a>, <a href="../../../../org/apache/spark/Logging.html#log()">log</a>, <a href="../../../../org/apache/spark/Logging.html#logDebug(scala.Function0)">logDebug</a>, <a href="../../../../org/apache/spark/Logging.html#logDebug(scala.Function0, java.lang.Throwable)">logDebug</a>, <a href="../../../../org/apache/spark/Logging.html#logError(scala.Function0)">logError</a>, <a href="../../../../org/apache/spark/Logging.html#logError(scala.Function0, java.lang.Throwable)">logError</a>, <a href="../../../../org/apache/spark/Logging.html#logInfo(scala.Function0)">logInfo</a>, <a href="../../../../org/apache/spark/Logging.html#logInfo(scala.Function0, java.lang.Throwable)">logInfo</a>, <a href="../../../../org/apache/spark/Logging.html#logName()">logName</a>, <a href="../../../../org/apache/spark/Logging.html#logTrace(scala.Function0)">logTrace</a>, <a href="../../../../org/apache/spark/Logging.html#logTrace(scala.Function0, java.lang.Throwable)">logTrace</a>, <a href="../../../../org/apache/spark/Logging.html#logWarning(scala.Function0)">logWarning</a>, <a href="../../../../org/apache/spark/Logging.html#logWarning(scala.Function0, java.lang.Throwable)">logWarning</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor_detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="PairRDDFunctions(org.apache.spark.rdd.RDD, scala.reflect.ClassTag, scala.reflect.ClassTag, scala.math.Ordering)">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>PairRDDFunctions</h4>
<pre>public PairRDDFunctions(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> self,
scala.reflect.ClassTag<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>> kt,
scala.reflect.ClassTag<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> vt,
scala.math.Ordering<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>> ord)</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method_detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES</h4>
<pre>public static int RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES()</pre>
</li>
</ul>
<a name="disableOutputSpecValidation()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>disableOutputSpecValidation</h4>
<pre>public static scala.util.DynamicVariable<java.lang.Object> disableOutputSpecValidation()</pre>
</li>
</ul>
<a name="combineByKey(scala.Function1, scala.Function2, scala.Function2, org.apache.spark.Partitioner, boolean, org.apache.spark.serializer.Serializer)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>combineByKey</h4>
<pre>public <C> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,C>> combineByKey(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> createCombiner,
scala.Function2<C,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> mergeValue,
scala.Function2<C,C,C> mergeCombiners,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner,
boolean mapSideCombine,
<a href="../../../../org/apache/spark/serializer/Serializer.html" title="class in org.apache.spark.serializer">Serializer</a> serializer)</pre>
<div class="block">Generic function to combine the elements for each key using a custom set of aggregation
functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined type" C
Note that V and C can be different -- for example, one might group an RDD of type
(Int, Int) into an RDD of type (Int, Seq[Int]). Users provide three functions:
<p>
- <code>createCombiner</code>, which turns a V into a C (e.g., creates a one-element list)
- <code>mergeValue</code>, to merge a V into a C (e.g., adds it to the end of a list)
- <code>mergeCombiners</code>, to combine two C's into a single one.
<p>
In addition, users can control the partitioning of the output RDD, and whether to perform
map-side aggregation (if a mapper can produce multiple items with the same key).</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>createCombiner</code> - (undocumented)</dd><dd><code>mergeValue</code> - (undocumented)</dd><dd><code>mergeCombiners</code> - (undocumented)</dd><dd><code>partitioner</code> - (undocumented)</dd><dd><code>mapSideCombine</code> - (undocumented)</dd><dd><code>serializer</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="combineByKey(scala.Function1, scala.Function2, scala.Function2, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>combineByKey</h4>
<pre>public <C> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,C>> combineByKey(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> createCombiner,
scala.Function2<C,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> mergeValue,
scala.Function2<C,C,C> mergeCombiners,
int numPartitions)</pre>
<div class="block">Simplified version of combineByKey that hash-partitions the output RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>createCombiner</code> - (undocumented)</dd><dd><code>mergeValue</code> - (undocumented)</dd><dd><code>mergeCombiners</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="aggregateByKey(java.lang.Object,org.apache.spark.Partitioner,scala.Function2,scala.Function2,scala.reflect.ClassTag)">
<!-- -->
</a><a name="aggregateByKey(U, org.apache.spark.Partitioner, scala.Function2, scala.Function2, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>aggregateByKey</h4>
<pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>> aggregateByKey(U zeroValue,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner,
scala.Function2<U,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,U> seqOp,
scala.Function2<U,U,U> combOp,
scala.reflect.ClassTag<U> evidence$1)</pre>
<div class="block">Aggregate the values of each key, using given combine functions and a neutral "zero value".
This function can return a different result type, U, than the type of the values in this RDD,
V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
as in scala.TraversableOnce. The former operation is used for merging values within a
partition, and the latter is used for merging values between partitions. To avoid memory
allocation, both of these functions are allowed to modify and return their first argument
instead of creating a new U.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>zeroValue</code> - (undocumented)</dd><dd><code>partitioner</code> - (undocumented)</dd><dd><code>seqOp</code> - (undocumented)</dd><dd><code>combOp</code> - (undocumented)</dd><dd><code>evidence$1</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="aggregateByKey(java.lang.Object,int,scala.Function2,scala.Function2,scala.reflect.ClassTag)">
<!-- -->
</a><a name="aggregateByKey(U, int, scala.Function2, scala.Function2, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>aggregateByKey</h4>
<pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>> aggregateByKey(U zeroValue,
int numPartitions,
scala.Function2<U,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,U> seqOp,
scala.Function2<U,U,U> combOp,
scala.reflect.ClassTag<U> evidence$2)</pre>
<div class="block">Aggregate the values of each key, using given combine functions and a neutral "zero value".
This function can return a different result type, U, than the type of the values in this RDD,
V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
as in scala.TraversableOnce. The former operation is used for merging values within a
partition, and the latter is used for merging values between partitions. To avoid memory
allocation, both of these functions are allowed to modify and return their first argument
instead of creating a new U.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>zeroValue</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd><dd><code>seqOp</code> - (undocumented)</dd><dd><code>combOp</code> - (undocumented)</dd><dd><code>evidence$2</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="aggregateByKey(java.lang.Object,scala.Function2,scala.Function2,scala.reflect.ClassTag)">
<!-- -->
</a><a name="aggregateByKey(U, scala.Function2, scala.Function2, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>aggregateByKey</h4>
<pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>> aggregateByKey(U zeroValue,
scala.Function2<U,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,U> seqOp,
scala.Function2<U,U,U> combOp,
scala.reflect.ClassTag<U> evidence$3)</pre>
<div class="block">Aggregate the values of each key, using given combine functions and a neutral "zero value".
This function can return a different result type, U, than the type of the values in this RDD,
V. Thus, we need one operation for merging a V into a U and one operation for merging two U's,
as in scala.TraversableOnce. The former operation is used for merging values within a
partition, and the latter is used for merging values between partitions. To avoid memory
allocation, both of these functions are allowed to modify and return their first argument
instead of creating a new U.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>zeroValue</code> - (undocumented)</dd><dd><code>seqOp</code> - (undocumented)</dd><dd><code>combOp</code> - (undocumented)</dd><dd><code>evidence$3</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="foldByKey(java.lang.Object,org.apache.spark.Partitioner,scala.Function2)">
<!-- -->
</a><a name="foldByKey(V, org.apache.spark.Partitioner, scala.Function2)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>foldByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> foldByKey(<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a> zeroValue,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner,
scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</pre>
<div class="block">Merge the values for each key using an associative function and a neutral "zero value" which
may be added to the result an arbitrary number of times, and must not change the result
(e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>zeroValue</code> - (undocumented)</dd><dd><code>partitioner</code> - (undocumented)</dd><dd><code>func</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="foldByKey(java.lang.Object,int,scala.Function2)">
<!-- -->
</a><a name="foldByKey(V, int, scala.Function2)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>foldByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> foldByKey(<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a> zeroValue,
int numPartitions,
scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</pre>
<div class="block">Merge the values for each key using an associative function and a neutral "zero value" which
may be added to the result an arbitrary number of times, and must not change the result
(e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>zeroValue</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd><dd><code>func</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="foldByKey(java.lang.Object,scala.Function2)">
<!-- -->
</a><a name="foldByKey(V, scala.Function2)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>foldByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> foldByKey(<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a> zeroValue,
scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</pre>
<div class="block">Merge the values for each key using an associative function and a neutral "zero value" which
may be added to the result an arbitrary number of times, and must not change the result
(e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>zeroValue</code> - (undocumented)</dd><dd><code>func</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="sampleByKey(boolean, scala.collection.Map, long)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>sampleByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> sampleByKey(boolean withReplacement,
scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object> fractions,
long seed)</pre>
<div class="block">Return a subset of this RDD sampled by key (via stratified sampling).
<p>
Create a sample of this RDD using variable sampling rates for different keys as specified by
<code>fractions</code>, a key to sampling rate map, via simple random sampling with one pass over the
RDD, to produce a sample of size that's approximately equal to the sum of
math.ceil(numItems * samplingRate) over all key values.
<p></div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>withReplacement</code> - whether to sample with or without replacement</dd><dd><code>fractions</code> - map of specific keys to sampling rates</dd><dd><code>seed</code> - seed for the random number generator</dd>
<dt><span class="strong">Returns:</span></dt><dd>RDD containing the sampled subset</dd></dl>
</li>
</ul>
<a name="sampleByKeyExact(boolean, scala.collection.Map, long)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>sampleByKeyExact</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> sampleByKeyExact(boolean withReplacement,
scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object> fractions,
long seed)</pre>
<div class="block">::Experimental::
Return a subset of this RDD sampled by key (via stratified sampling) containing exactly
math.ceil(numItems * samplingRate) for each stratum (group of pairs with the same key).
<p>
This method differs from <code>sampleByKey</code> in that we make additional passes over the RDD to
create a sample size that's exactly equal to the sum of math.ceil(numItems * samplingRate)
over all key values with a 99.99% confidence. When sampling without replacement, we need one
additional pass over the RDD to guarantee sample size; when sampling with replacement, we need
two additional passes.
<p></div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>withReplacement</code> - whether to sample with or without replacement</dd><dd><code>fractions</code> - map of specific keys to sampling rates</dd><dd><code>seed</code> - seed for the random number generator</dd>
<dt><span class="strong">Returns:</span></dt><dd>RDD containing the sampled subset</dd></dl>
</li>
</ul>
<a name="reduceByKey(org.apache.spark.Partitioner, scala.Function2)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>reduceByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> reduceByKey(<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner,
scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</pre>
<div class="block">Merge the values for each key using an associative reduce function. This will also perform
the merging locally on each mapper before sending results to a reducer, similarly to a
"combiner" in MapReduce.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>partitioner</code> - (undocumented)</dd><dd><code>func</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="reduceByKey(scala.Function2, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>reduceByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> reduceByKey(scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func,
int numPartitions)</pre>
<div class="block">Merge the values for each key using an associative reduce function. This will also perform
the merging locally on each mapper before sending results to a reducer, similarly to a
"combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>func</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="reduceByKey(scala.Function2)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>reduceByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> reduceByKey(scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</pre>
<div class="block">Merge the values for each key using an associative reduce function. This will also perform
the merging locally on each mapper before sending results to a reducer, similarly to a
"combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
parallelism level.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>func</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="reduceByKeyLocally(scala.Function2)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>reduceByKeyLocally</h4>
<pre>public scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> reduceByKeyLocally(scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</pre>
<div class="block">Merge the values for each key using an associative reduce function, but return the results
immediately to the master as a Map. This will also perform the merging locally on each mapper
before sending results to a reducer, similarly to a "combiner" in MapReduce.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>func</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="reduceByKeyToDriver(scala.Function2)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>reduceByKeyToDriver</h4>
<pre>public scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> reduceByKeyToDriver(scala.Function2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> func)</pre>
<div class="block">Alias for reduceByKeyLocally</div>
</li>
</ul>
<a name="countByKey()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>countByKey</h4>
<pre>public scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object> countByKey()</pre>
<div class="block">Count the number of elements for each key, collecting the results to a local Map.
<p>
Note that this method should only be used if the resulting map is expected to be small, as
the whole thing is loaded into the driver's memory.
To handle very large results, consider using rdd.mapValues(_ => 1L).reduceByKey(_ + _), which
returns an RDD[T, Long] instead of a map.</div>
<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="countByKeyApprox(long, double)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>countByKeyApprox</h4>
<pre>public <a href="../../../../org/apache/spark/partial/PartialResult.html" title="class in org.apache.spark.partial">PartialResult</a><scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/partial/BoundedDouble.html" title="class in org.apache.spark.partial">BoundedDouble</a>>> countByKeyApprox(long timeout,
double confidence)</pre>
<div class="block">:: Experimental ::
Approximate version of countByKey that can return a partial result if it does
not finish within a timeout.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>timeout</code> - (undocumented)</dd><dd><code>confidence</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="countApproxDistinctByKey(int, int, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>countApproxDistinctByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object>> countApproxDistinctByKey(int p,
int sp,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">:: Experimental ::
<p>
Return approximate number of distinct values for each key in this RDD.
<p>
The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
<a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
<p>
The relative accuracy is approximately <code>1.054 / sqrt(2^p)</code>. Setting a nonzero <code>sp > p</code>
would trigger sparse representation of registers, which may reduce the memory consumption
and increase accuracy when the cardinality is small.
<p></div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>p</code> - The precision value for the normal set.
<code>p</code> must be a value between 4 and <code>sp</code> if <code>sp</code> is not zero (32 max).</dd><dd><code>sp</code> - The precision value for the sparse set, between 0 and 32.
If <code>sp</code> equals 0, the sparse representation is skipped.</dd><dd><code>partitioner</code> - Partitioner to use for the resulting RDD.</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="countApproxDistinctByKey(double, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>countApproxDistinctByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object>> countApproxDistinctByKey(double relativeSD,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">Return approximate number of distinct values for each key in this RDD.
<p>
The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
<a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
<p></div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>relativeSD</code> - Relative accuracy. Smaller values create counters that require more space.
It must be greater than 0.000017.</dd><dd><code>partitioner</code> - partitioner of the resulting RDD</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="countApproxDistinctByKey(double, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>countApproxDistinctByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object>> countApproxDistinctByKey(double relativeSD,
int numPartitions)</pre>
<div class="block">Return approximate number of distinct values for each key in this RDD.
<p>
The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
<a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
<p></div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>relativeSD</code> - Relative accuracy. Smaller values create counters that require more space.
It must be greater than 0.000017.</dd><dd><code>numPartitions</code> - number of partitions of the resulting RDD</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="countApproxDistinctByKey(double)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>countApproxDistinctByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,java.lang.Object>> countApproxDistinctByKey(double relativeSD)</pre>
<div class="block">Return approximate number of distinct values for each key in this RDD.
<p>
The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
<a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
<p></div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>relativeSD</code> - Relative accuracy. Smaller values create counters that require more space.
It must be greater than 0.000017.</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="groupByKey(org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>groupByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>>> groupByKey(<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">Group the values for each key in the RDD into a single sequence. Allows controlling the
partitioning of the resulting key-value pair RDD by passing a Partitioner.
The ordering of elements within each group is not guaranteed, and may even differ
each time the resulting RDD is evaluated.
<p>
Note: This operation may be very expensive. If you are grouping in order to perform an
aggregation (such as a sum or average) over each key, using <code>PairRDDFunctions.aggregateByKey</code>
or <code>PairRDDFunctions.reduceByKey</code> will provide much better performance.
<p>
Note: As currently implemented, groupByKey must be able to hold all the key-value pairs for any
key in memory. If a key has too many values, it can result in an <code>OutOfMemoryError</code>.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>partitioner</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="groupByKey(int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>groupByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>>> groupByKey(int numPartitions)</pre>
<div class="block">Group the values for each key in the RDD into a single sequence. Hash-partitions the
resulting RDD with into <code>numPartitions</code> partitions. The ordering of elements within
each group is not guaranteed, and may even differ each time the resulting RDD is evaluated.
<p>
Note: This operation may be very expensive. If you are grouping in order to perform an
aggregation (such as a sum or average) over each key, using <code>PairRDDFunctions.aggregateByKey</code>
or <code>PairRDDFunctions.reduceByKey</code> will provide much better performance.
<p>
Note: As currently implemented, groupByKey must be able to hold all the key-value pairs for any
key in memory. If a key has too many values, it can result in an <code>OutOfMemoryError</code>.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="partitionBy(org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>partitionBy</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> partitionBy(<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">Return a copy of the RDD partitioned using the specified partitioner.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>partitioner</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="join(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>join</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,W>>> join(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">Return an RDD containing all pairs of elements with matching keys in <code>this</code> and <code>other</code>. Each
pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in <code>this</code> and
(k, v2) is in <code>other</code>. Uses the given Partitioner to partition the output RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>partitioner</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="leftOuterJoin(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>leftOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,scala.Option<W>>>> leftOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">Perform a left outer join of <code>this</code> and <code>other</code>. For each element (k, v) in <code>this</code>, the
resulting RDD will either contain all pairs (k, (v, Some(w))) for w in <code>other</code>, or the
pair (k, (v, None)) if no elements in <code>other</code> have key k. Uses the given Partitioner to
partition the output RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>partitioner</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="rightOuterJoin(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>rightOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,W>>> rightOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">Perform a right outer join of <code>this</code> and <code>other</code>. For each element (k, w) in <code>other</code>, the
resulting RDD will either contain all pairs (k, (Some(v), w)) for v in <code>this</code>, or the
pair (k, (None, w)) if no elements in <code>this</code> have key k. Uses the given Partitioner to
partition the output RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>partitioner</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="fullOuterJoin(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>fullOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.Option<W>>>> fullOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">Perform a full outer join of <code>this</code> and <code>other</code>. For each element (k, v) in <code>this</code>, the
resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in <code>other</code>, or
the pair (k, (Some(v), None)) if no elements in <code>other</code> have key k. Similarly, for each
element (k, w) in <code>other</code>, the resulting RDD will either contain all pairs
(k, (Some(v), Some(w))) for v in <code>this</code>, or the pair (k, (None, Some(w))) if no elements
in <code>this</code> have key k. Uses the given Partitioner to partition the output RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>partitioner</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="combineByKey(scala.Function1, scala.Function2, scala.Function2)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>combineByKey</h4>
<pre>public <C> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,C>> combineByKey(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> createCombiner,
scala.Function2<C,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,C> mergeValue,
scala.Function2<C,C,C> mergeCombiners)</pre>
</li>
</ul>
<a name="groupByKey()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>groupByKey</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>>> groupByKey()</pre>
<div class="block">Group the values for each key in the RDD into a single sequence. Hash-partitions the
resulting RDD with the existing partitioner/parallelism level. The ordering of elements
within each group is not guaranteed, and may even differ each time the resulting RDD is
evaluated.
<p>
Note: This operation may be very expensive. If you are grouping in order to perform an
aggregation (such as a sum or average) over each key, using <code>PairRDDFunctions.aggregateByKey</code>
or <code>PairRDDFunctions.reduceByKey</code> will provide much better performance.</div>
<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="join(org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>join</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,W>>> join(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</pre>
<div class="block">Return an RDD containing all pairs of elements with matching keys in <code>this</code> and <code>other</code>. Each
pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in <code>this</code> and
(k, v2) is in <code>other</code>. Performs a hash join across the cluster.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="join(org.apache.spark.rdd.RDD, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>join</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,W>>> join(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</pre>
<div class="block">Return an RDD containing all pairs of elements with matching keys in <code>this</code> and <code>other</code>. Each
pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in <code>this</code> and
(k, v2) is in <code>other</code>. Performs a hash join across the cluster.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="leftOuterJoin(org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>leftOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,scala.Option<W>>>> leftOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</pre>
<div class="block">Perform a left outer join of <code>this</code> and <code>other</code>. For each element (k, v) in <code>this</code>, the
resulting RDD will either contain all pairs (k, (v, Some(w))) for w in <code>other</code>, or the
pair (k, (v, None)) if no elements in <code>other</code> have key k. Hash-partitions the output
using the existing partitioner/parallelism level.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="leftOuterJoin(org.apache.spark.rdd.RDD, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>leftOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,scala.Option<W>>>> leftOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</pre>
<div class="block">Perform a left outer join of <code>this</code> and <code>other</code>. For each element (k, v) in <code>this</code>, the
resulting RDD will either contain all pairs (k, (v, Some(w))) for w in <code>other</code>, or the
pair (k, (v, None)) if no elements in <code>other</code> have key k. Hash-partitions the output
into <code>numPartitions</code> partitions.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="rightOuterJoin(org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>rightOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,W>>> rightOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</pre>
<div class="block">Perform a right outer join of <code>this</code> and <code>other</code>. For each element (k, w) in <code>other</code>, the
resulting RDD will either contain all pairs (k, (Some(v), w)) for v in <code>this</code>, or the
pair (k, (None, w)) if no elements in <code>this</code> have key k. Hash-partitions the resulting
RDD using the existing partitioner/parallelism level.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="rightOuterJoin(org.apache.spark.rdd.RDD, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>rightOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,W>>> rightOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</pre>
<div class="block">Perform a right outer join of <code>this</code> and <code>other</code>. For each element (k, w) in <code>other</code>, the
resulting RDD will either contain all pairs (k, (Some(v), w)) for v in <code>this</code>, or the
pair (k, (None, w)) if no elements in <code>this</code> have key k. Hash-partitions the resulting
RDD into the given number of partitions.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="fullOuterJoin(org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>fullOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.Option<W>>>> fullOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</pre>
<div class="block">Perform a full outer join of <code>this</code> and <code>other</code>. For each element (k, v) in <code>this</code>, the
resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in <code>other</code>, or
the pair (k, (Some(v), None)) if no elements in <code>other</code> have key k. Similarly, for each
element (k, w) in <code>other</code>, the resulting RDD will either contain all pairs
(k, (Some(v), Some(w))) for v in <code>this</code>, or the pair (k, (None, Some(w))) if no elements
in <code>this</code> have key k. Hash-partitions the resulting RDD using the existing partitioner/
parallelism level.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="fullOuterJoin(org.apache.spark.rdd.RDD, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>fullOuterJoin</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.Option<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.Option<W>>>> fullOuterJoin(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</pre>
<div class="block">Perform a full outer join of <code>this</code> and <code>other</code>. For each element (k, v) in <code>this</code>, the
resulting RDD will either contain all pairs (k, (Some(v), Some(w))) for w in <code>other</code>, or
the pair (k, (Some(v), None)) if no elements in <code>other</code> have key k. Similarly, for each
element (k, w) in <code>other</code>, the resulting RDD will either contain all pairs
(k, (Some(v), Some(w))) for v in <code>this</code>, or the pair (k, (None, Some(w))) if no elements
in <code>this</code> have key k. Hash-partitions the resulting RDD into the given number of partitions.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="collectAsMap()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>collectAsMap</h4>
<pre>public scala.collection.Map<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> collectAsMap()</pre>
<div class="block">Return the key-value pairs in this RDD to the master as a Map.
<p>
Warning: this doesn't return a multimap (so if you have multiple values to the same key, only
one value per key is preserved in the map returned)</div>
<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="mapValues(scala.Function1)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>mapValues</h4>
<pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>> mapValues(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,U> f)</pre>
<div class="block">Pass each value in the key-value pair RDD through a map function without changing the keys;
this also retains the original RDD's partitioning.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>f</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="flatMapValues(scala.Function1)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>flatMapValues</h4>
<pre>public <U> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,U>> flatMapValues(scala.Function1<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>,scala.collection.TraversableOnce<U>> f)</pre>
<div class="block">Pass each value in the key-value pair RDD through a flatMap function without changing the
keys; this also retains the original RDD's partitioning.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>f</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W1,W2,W3> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple4<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>,scala.collection.Iterable<W3>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W3>> other3,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
<div class="block">For each key k in <code>this</code> or <code>other1</code> or <code>other2</code> or <code>other3</code>,
return a resulting RDD that contains a tuple with the list of values
for that key in <code>this</code>, <code>other1</code>, <code>other2</code> and <code>other3</code>.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other1</code> - (undocumented)</dd><dd><code>other2</code> - (undocumented)</dd><dd><code>other3</code> - (undocumented)</dd><dd><code>partitioner</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.Partitioner)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W1,W2> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple3<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> partitioner)</pre>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W1,W2,W3> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple4<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>,scala.collection.Iterable<W3>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W3>> other3)</pre>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</pre>
<div class="block">For each key k in <code>this</code> or <code>other</code>, return a resulting RDD that contains a tuple with the
list of values for that key in <code>this</code> as well as <code>other</code>.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W1,W2> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple3<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2)</pre>
<div class="block">For each key k in <code>this</code> or <code>other1</code> or <code>other2</code>, return a resulting RDD that contains a
tuple with the list of values for that key in <code>this</code>, <code>other1</code> and <code>other2</code>.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other1</code> - (undocumented)</dd><dd><code>other2</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions)</pre>
<div class="block">For each key k in <code>this</code> or <code>other</code>, return a resulting RDD that contains a tuple with the
list of values for that key in <code>this</code> as well as <code>other</code>.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W1,W2> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple3<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
int numPartitions)</pre>
<div class="block">For each key k in <code>this</code> or <code>other1</code> or <code>other2</code>, return a resulting RDD that contains a
tuple with the list of values for that key in <code>this</code>, <code>other1</code> and <code>other2</code>.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other1</code> - (undocumented)</dd><dd><code>other2</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="cogroup(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, int)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>cogroup</h4>
<pre>public <W1,W2,W3> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple4<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>,scala.collection.Iterable<W3>>>> cogroup(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W3>> other3,
int numPartitions)</pre>
<div class="block">For each key k in <code>this</code> or <code>other1</code> or <code>other2</code> or <code>other3</code>,
return a resulting RDD that contains a tuple with the list of values
for that key in <code>this</code>, <code>other1</code>, <code>other2</code> and <code>other3</code>.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other1</code> - (undocumented)</dd><dd><code>other2</code> - (undocumented)</dd><dd><code>other3</code> - (undocumented)</dd><dd><code>numPartitions</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="groupWith(org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>groupWith</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple2<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W>>>> groupWith(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other)</pre>
<div class="block">Alias for cogroup.</div>
</li>
</ul>
<a name="groupWith(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>groupWith</h4>
<pre>public <W1,W2> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple3<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>>>> groupWith(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2)</pre>
<div class="block">Alias for cogroup.</div>
</li>
</ul>
<a name="groupWith(org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD, org.apache.spark.rdd.RDD)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>groupWith</h4>
<pre>public <W1,W2,W3> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,scala.Tuple4<scala.collection.Iterable<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>,scala.collection.Iterable<W1>,scala.collection.Iterable<W2>,scala.collection.Iterable<W3>>>> groupWith(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W1>> other1,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W2>> other2,
<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W3>> other3)</pre>
<div class="block">Alias for cogroup.</div>
</li>
</ul>
<a name="subtractByKey(org.apache.spark.rdd.RDD, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>subtractByKey</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> subtractByKey(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
scala.reflect.ClassTag<W> evidence$4)</pre>
<div class="block">Return an RDD with the pairs from <code>this</code> whose keys are not in <code>other</code>.
<p>
Uses <code>this</code> partitioner/partition size, because even if <code>other</code> is huge, the resulting
RDD will be <= us.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>other</code> - (undocumented)</dd><dd><code>evidence$4</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="subtractByKey(org.apache.spark.rdd.RDD, int, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>subtractByKey</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> subtractByKey(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
int numPartitions,
scala.reflect.ClassTag<W> evidence$5)</pre>
<div class="block">Return an RDD with the pairs from `this` whose keys are not in `other`.</div>
</li>
</ul>
<a name="subtractByKey(org.apache.spark.rdd.RDD, org.apache.spark.Partitioner, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>subtractByKey</h4>
<pre>public <W> <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> subtractByKey(<a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><scala.Tuple2<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,W>> other,
<a href="../../../../org/apache/spark/Partitioner.html" title="class in org.apache.spark">Partitioner</a> p,
scala.reflect.ClassTag<W> evidence$6)</pre>
<div class="block">Return an RDD with the pairs from `this` whose keys are not in `other`.</div>
</li>
</ul>
<a name="lookup(java.lang.Object)">
<!-- -->
</a><a name="lookup(K)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>lookup</h4>
<pre>public scala.collection.Seq<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> lookup(<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a> key)</pre>
<div class="block">Return the list of values in the RDD for key <code>key</code>. This operation is done efficiently if the
RDD has a known partitioner by only searching the partition that the key maps to.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>key</code> - (undocumented)</dd>
<dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="saveAsHadoopFile(java.lang.String, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>saveAsHadoopFile</h4>
<pre>public <F extends org.apache.hadoop.mapred.OutputFormat<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> void saveAsHadoopFile(java.lang.String path,
scala.reflect.ClassTag<F> fm)</pre>
<div class="block">Output the RDD to any Hadoop-supported file system, using a Hadoop <code>OutputFormat</code> class
supporting the key and value types K and V in this RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>fm</code> - (undocumented)</dd></dl>
</li>
</ul>
<a name="saveAsHadoopFile(java.lang.String, java.lang.Class, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>saveAsHadoopFile</h4>
<pre>public <F extends org.apache.hadoop.mapred.OutputFormat<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> void saveAsHadoopFile(java.lang.String path,
java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec,
scala.reflect.ClassTag<F> fm)</pre>
<div class="block">Output the RDD to any Hadoop-supported file system, using a Hadoop <code>OutputFormat</code> class
supporting the key and value types K and V in this RDD. Compress the result with the
supplied codec.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>codec</code> - (undocumented)</dd><dd><code>fm</code> - (undocumented)</dd></dl>
</li>
</ul>
<a name="saveAsNewAPIHadoopFile(java.lang.String, scala.reflect.ClassTag)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>saveAsNewAPIHadoopFile</h4>
<pre>public <F extends org.apache.hadoop.mapreduce.OutputFormat<<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>,<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>>> void saveAsNewAPIHadoopFile(java.lang.String path,
scala.reflect.ClassTag<F> fm)</pre>
<div class="block">Output the RDD to any Hadoop-supported file system, using a new Hadoop API <code>OutputFormat</code>
(mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>fm</code> - (undocumented)</dd></dl>
</li>
</ul>
<a name="saveAsNewAPIHadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, org.apache.hadoop.conf.Configuration)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>saveAsNewAPIHadoopFile</h4>
<pre>public void saveAsNewAPIHadoopFile(java.lang.String path,
java.lang.Class<?> keyClass,
java.lang.Class<?> valueClass,
java.lang.Class<? extends org.apache.hadoop.mapreduce.OutputFormat<?,?>> outputFormatClass,
org.apache.hadoop.conf.Configuration conf)</pre>
<div class="block">Output the RDD to any Hadoop-supported file system, using a new Hadoop API <code>OutputFormat</code>
(mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>keyClass</code> - (undocumented)</dd><dd><code>valueClass</code> - (undocumented)</dd><dd><code>outputFormatClass</code> - (undocumented)</dd><dd><code>conf</code> - (undocumented)</dd></dl>
</li>
</ul>
<a name="saveAsHadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, java.lang.Class)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>saveAsHadoopFile</h4>
<pre>public void saveAsHadoopFile(java.lang.String path,
java.lang.Class<?> keyClass,
java.lang.Class<?> valueClass,
java.lang.Class<? extends org.apache.hadoop.mapred.OutputFormat<?,?>> outputFormatClass,
java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec> codec)</pre>
<div class="block">Output the RDD to any Hadoop-supported file system, using a Hadoop <code>OutputFormat</code> class
supporting the key and value types K and V in this RDD. Compress with the supplied codec.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>keyClass</code> - (undocumented)</dd><dd><code>valueClass</code> - (undocumented)</dd><dd><code>outputFormatClass</code> - (undocumented)</dd><dd><code>codec</code> - (undocumented)</dd></dl>
</li>
</ul>
<a name="saveAsHadoopFile(java.lang.String, java.lang.Class, java.lang.Class, java.lang.Class, org.apache.hadoop.mapred.JobConf, scala.Option)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>saveAsHadoopFile</h4>
<pre>public void saveAsHadoopFile(java.lang.String path,
java.lang.Class<?> keyClass,
java.lang.Class<?> valueClass,
java.lang.Class<? extends org.apache.hadoop.mapred.OutputFormat<?,?>> outputFormatClass,
org.apache.hadoop.mapred.JobConf conf,
scala.Option<java.lang.Class<? extends org.apache.hadoop.io.compress.CompressionCodec>> codec)</pre>
<div class="block">Output the RDD to any Hadoop-supported file system, using a Hadoop <code>OutputFormat</code> class
supporting the key and value types K and V in this RDD.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>path</code> - (undocumented)</dd><dd><code>keyClass</code> - (undocumented)</dd><dd><code>valueClass</code> - (undocumented)</dd><dd><code>outputFormatClass</code> - (undocumented)</dd><dd><code>conf</code> - (undocumented)</dd><dd><code>codec</code> - (undocumented)</dd></dl>
</li>
</ul>
<a name="saveAsNewAPIHadoopDataset(org.apache.hadoop.conf.Configuration)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>saveAsNewAPIHadoopDataset</h4>
<pre>public void saveAsNewAPIHadoopDataset(org.apache.hadoop.conf.Configuration conf)</pre>
<div class="block">Output the RDD to any Hadoop-supported storage system with new Hadoop API, using a Hadoop
Configuration object for that storage system. The Conf should set an OutputFormat and any
output paths required (e.g. a table name to write to) in the same way as it would be
configured for a Hadoop MapReduce job.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>conf</code> - (undocumented)</dd></dl>
</li>
</ul>
<a name="saveAsHadoopDataset(org.apache.hadoop.mapred.JobConf)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>saveAsHadoopDataset</h4>
<pre>public void saveAsHadoopDataset(org.apache.hadoop.mapred.JobConf conf)</pre>
<div class="block">Output the RDD to any Hadoop-supported storage system, using a Hadoop JobConf object for
that storage system. The JobConf should set an OutputFormat and any output paths required
(e.g. a table name to write to) in the same way as it would be configured for a Hadoop
MapReduce job.</div>
<dl><dt><span class="strong">Parameters:</span></dt><dd><code>conf</code> - (undocumented)</dd></dl>
</li>
</ul>
<a name="keys()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>keys</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">K</a>> keys()</pre>
<div class="block">Return an RDD with the keys of each tuple.</div>
<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
<a name="values()">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>values</h4>
<pre>public <a href="../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../org/apache/spark/rdd/PairRDDFunctions.html" title="type parameter in PairRDDFunctions">V</a>> values()</pre>
<div class="block">Return an RDD with the values of each tuple.</div>
<dl><dt><span class="strong">Returns:</span></dt><dd>(undocumented)</dd></dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar_bottom">
<!-- -->
</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../org/apache/spark/rdd/OrderedRDDFunctions.html" title="class in org.apache.spark.rdd"><span class="strong">Prev Class</span></a></li>
<li><a href="../../../../org/apache/spark/rdd/PartitionCoalescer.html" title="class in org.apache.spark.rdd"><span class="strong">Next Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../index.html?org/apache/spark/rdd/PairRDDFunctions.html" target="_top">Frames</a></li>
<li><a href="PairRDDFunctions.html" target="_top">No Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../allclasses-noframe.html">All Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary: </li>
<li>Nested | </li>
<li>Field | </li>
<li><a href="#constructor_summary">Constr</a> | </li>
<li><a href="#method_summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail: </li>
<li>Field | </li>
<li><a href="#constructor_detail">Constr</a> | </li>
<li><a href="#method_detail">Method</a></li>
</ul>
</div>
<a name="skip-navbar_bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<script defer="defer" type="text/javascript" src="../../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../../lib/api-javadocs.js"></script></body>
</html>