summaryrefslogtreecommitdiff
path: root/site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html
diff options
context:
space:
mode:
authorReynold Xin <rxin@apache.org>2015-09-08 23:20:31 +0000
committerReynold Xin <rxin@apache.org>2015-09-08 23:20:31 +0000
commit443d7fc272a34a818df4dd589bb251ec1087ae11 (patch)
tree58b176846888d8824cd113146bd59568ea5354f6 /site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html
parent1037fcd3d980ca1bf8e79ccfecd1f5234545b6ff (diff)
downloadspark-website-443d7fc272a34a818df4dd589bb251ec1087ae11.tar.gz
spark-website-443d7fc272a34a818df4dd589bb251ec1087ae11.tar.bz2
spark-website-443d7fc272a34a818df4dd589bb251ec1087ae11.zip
Added 1.5.0 docs.
Diffstat (limited to 'site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html')
-rw-r--r--site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html938
1 files changed, 938 insertions, 0 deletions
diff --git a/site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html b/site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html
new file mode 100644
index 000000000..de8b4b144
--- /dev/null
+++ b/site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html
@@ -0,0 +1,938 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>pyspark.mllib.linalg.distributed &mdash; PySpark master documentation</title>
+
+ <link rel="stylesheet" href="../../../../_static/nature.css" type="text/css" />
+ <link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '../../../../',
+ VERSION: 'master',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="../../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../../_static/doctools.js"></script>
+ <link rel="top" title="PySpark master documentation" href="../../../../index.html" />
+ <link rel="up" title="pyspark.mllib.linalg" href="../linalg.html" />
+ </head>
+ <body role="document">
+ <div class="related" role="navigation" aria-label="related navigation">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="nav-item nav-item-0"><a href="../../../../index.html">PySpark master documentation</a> &raquo;</li>
+ <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> &raquo;</li>
+ <li class="nav-item nav-item-2"><a href="../linalg.html" accesskey="U">pyspark.mllib.linalg</a> &raquo;</li>
+ </ul>
+ </div>
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="bodywrapper">
+ <div class="body" role="main">
+
+ <h1>Source code for pyspark.mllib.linalg.distributed</h1><div class="highlight"><pre>
+<span class="c">#</span>
+<span class="c"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
+<span class="c"># contributor license agreements. See the NOTICE file distributed with</span>
+<span class="c"># this work for additional information regarding copyright ownership.</span>
+<span class="c"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
+<span class="c"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
+<span class="c"># the License. You may obtain a copy of the License at</span>
+<span class="c">#</span>
+<span class="c"># http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c">#</span>
+<span class="c"># Unless required by applicable law or agreed to in writing, software</span>
+<span class="c"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
+<span class="c"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
+<span class="c"># See the License for the specific language governing permissions and</span>
+<span class="c"># limitations under the License.</span>
+<span class="c">#</span>
+
+<span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">Package for distributed linear algebra.</span>
+<span class="sd">&quot;&quot;&quot;</span>
+
+<span class="kn">import</span> <span class="nn">sys</span>
+
+<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version</span> <span class="o">&gt;=</span> <span class="s">&#39;3&#39;</span><span class="p">:</span>
+ <span class="nb">long</span> <span class="o">=</span> <span class="nb">int</span>
+
+<span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaObject</span>
+
+<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">RDD</span>
+<span class="kn">from</span> <span class="nn">pyspark.mllib.common</span> <span class="kn">import</span> <span class="n">callMLlibFunc</span><span class="p">,</span> <span class="n">JavaModelWrapper</span>
+<span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">_convert_to_vector</span><span class="p">,</span> <span class="n">Matrix</span>
+
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s">&#39;DistributedMatrix&#39;</span><span class="p">,</span> <span class="s">&#39;RowMatrix&#39;</span><span class="p">,</span> <span class="s">&#39;IndexedRow&#39;</span><span class="p">,</span>
+ <span class="s">&#39;IndexedRowMatrix&#39;</span><span class="p">,</span> <span class="s">&#39;MatrixEntry&#39;</span><span class="p">,</span> <span class="s">&#39;CoordinateMatrix&#39;</span><span class="p">,</span>
+ <span class="s">&#39;BlockMatrix&#39;</span><span class="p">]</span>
+
+
+<div class="viewcode-block" id="DistributedMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.DistributedMatrix">[docs]</a><span class="k">class</span> <span class="nc">DistributedMatrix</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> .. note:: Experimental</span>
+
+<span class="sd"> Represents a distributively stored matrix backed by one or</span>
+<span class="sd"> more RDDs.</span>
+
+<span class="sd"> &quot;&quot;&quot;</span>
+<div class="viewcode-block" id="DistributedMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.DistributedMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;Get or compute the number of rows.&quot;&quot;&quot;</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span>
+</div>
+<div class="viewcode-block" id="DistributedMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.DistributedMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;Get or compute the number of cols.&quot;&quot;&quot;</span>
+ <span class="k">raise</span> <span class="ne">NotImplementedError</span>
+
+</div></div>
+<div class="viewcode-block" id="RowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix">[docs]</a><span class="k">class</span> <span class="nc">RowMatrix</span><span class="p">(</span><span class="n">DistributedMatrix</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> .. note:: Experimental</span>
+
+<span class="sd"> Represents a row-oriented distributed Matrix with no meaningful</span>
+<span class="sd"> row indices.</span>
+
+<span class="sd"> :param rows: An RDD of vectors.</span>
+<span class="sd"> :param numRows: Number of rows in the matrix. A non-positive</span>
+<span class="sd"> value means unknown, at which point the number</span>
+<span class="sd"> of rows will be determined by the number of</span>
+<span class="sd"> records in the `rows` RDD.</span>
+<span class="sd"> :param numCols: Number of columns in the matrix. A non-positive</span>
+<span class="sd"> value means unknown, at which point the number</span>
+<span class="sd"> of columns will be determined by the size of</span>
+<span class="sd"> the first row.</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rows</span><span class="p">,</span> <span class="n">numRows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numCols</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Note: This docstring is not shown publicly.</span>
+
+<span class="sd"> Create a wrapper over a Java RowMatrix.</span>
+
+<span class="sd"> Publicly, we require that `rows` be an RDD. However, for</span>
+<span class="sd"> internal usage, `rows` can also be a Java RowMatrix</span>
+<span class="sd"> object, in which case we can wrap it directly. This</span>
+<span class="sd"> assists in clean matrix conversions.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])</span>
+<span class="sd"> &gt;&gt;&gt; mat = RowMatrix(rows)</span>
+
+<span class="sd"> &gt;&gt;&gt; mat_diff = RowMatrix(rows)</span>
+<span class="sd"> &gt;&gt;&gt; (mat_diff._java_matrix_wrapper._java_model ==</span>
+<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> False</span>
+
+<span class="sd"> &gt;&gt;&gt; mat_same = RowMatrix(mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> &gt;&gt;&gt; (mat_same._java_matrix_wrapper._java_model ==</span>
+<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> True</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">rows</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
+ <span class="n">rows</span> <span class="o">=</span> <span class="n">rows</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_vector</span><span class="p">)</span>
+ <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">&quot;createRowMatrix&quot;</span><span class="p">,</span> <span class="n">rows</span><span class="p">,</span> <span class="nb">long</span><span class="p">(</span><span class="n">numRows</span><span class="p">),</span> <span class="nb">int</span><span class="p">(</span><span class="n">numCols</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">rows</span><span class="p">,</span> <span class="n">JavaObject</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="n">rows</span><span class="o">.</span><span class="n">getClass</span><span class="p">()</span><span class="o">.</span><span class="n">getSimpleName</span><span class="p">()</span> <span class="o">==</span> <span class="s">&quot;RowMatrix&quot;</span><span class="p">):</span>
+ <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">rows</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">&quot;rows should be an RDD of vectors, got </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">rows</span><span class="p">))</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span> <span class="o">=</span> <span class="n">JavaModelWrapper</span><span class="p">(</span><span class="n">java_matrix</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">rows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Rows of the RowMatrix stored as an RDD of vectors.</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = RowMatrix(sc.parallelize([[1, 2, 3], [4, 5, 6]]))</span>
+<span class="sd"> &gt;&gt;&gt; rows = mat.rows</span>
+<span class="sd"> &gt;&gt;&gt; rows.first()</span>
+<span class="sd"> DenseVector([1.0, 2.0, 3.0])</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;rows&quot;</span><span class="p">)</span>
+
+<div class="viewcode-block" id="RowMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Get or compute the number of rows.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([[1, 2, 3], [4, 5, 6],</span>
+<span class="sd"> ... [7, 8, 9], [10, 11, 12]])</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = RowMatrix(rows)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 4</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = RowMatrix(rows, 7, 6)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 7</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numRows&quot;</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="RowMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Get or compute the number of cols.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([[1, 2, 3], [4, 5, 6],</span>
+<span class="sd"> ... [7, 8, 9], [10, 11, 12]])</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = RowMatrix(rows)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 3</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = RowMatrix(rows, 7, 6)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 6</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numCols&quot;</span><span class="p">)</span>
+
+</div></div>
+<div class="viewcode-block" id="IndexedRow"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRow">[docs]</a><span class="k">class</span> <span class="nc">IndexedRow</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> .. note:: Experimental</span>
+
+<span class="sd"> Represents a row of an IndexedRowMatrix.</span>
+
+<span class="sd"> Just a wrapper over a (long, vector) tuple.</span>
+
+<span class="sd"> :param index: The index for the given row.</span>
+<span class="sd"> :param vector: The row in the matrix at the given index.</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">,</span> <span class="n">vector</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">vector</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">vector</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s">&quot;IndexedRow(</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">vector</span><span class="p">)</span>
+
+</div>
+<span class="k">def</span> <span class="nf">_convert_to_indexed_row</span><span class="p">(</span><span class="n">row</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="n">IndexedRow</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">row</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">row</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">IndexedRow</span><span class="p">(</span><span class="o">*</span><span class="n">row</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">&quot;Cannot convert type </span><span class="si">%s</span><span class="s"> into IndexedRow&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">row</span><span class="p">))</span>
+
+
+<div class="viewcode-block" id="IndexedRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix">[docs]</a><span class="k">class</span> <span class="nc">IndexedRowMatrix</span><span class="p">(</span><span class="n">DistributedMatrix</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> .. note:: Experimental</span>
+
+<span class="sd"> Represents a row-oriented distributed Matrix with indexed rows.</span>
+
+<span class="sd"> :param rows: An RDD of IndexedRows or (long, vector) tuples.</span>
+<span class="sd"> :param numRows: Number of rows in the matrix. A non-positive</span>
+<span class="sd"> value means unknown, at which point the number</span>
+<span class="sd"> of rows will be determined by the max row</span>
+<span class="sd"> index plus one.</span>
+<span class="sd"> :param numCols: Number of columns in the matrix. A non-positive</span>
+<span class="sd"> value means unknown, at which point the number</span>
+<span class="sd"> of columns will be determined by the size of</span>
+<span class="sd"> the first row.</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rows</span><span class="p">,</span> <span class="n">numRows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numCols</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Note: This docstring is not shown publicly.</span>
+
+<span class="sd"> Create a wrapper over a Java IndexedRowMatrix.</span>
+
+<span class="sd"> Publicly, we require that `rows` be an RDD. However, for</span>
+<span class="sd"> internal usage, `rows` can also be a Java IndexedRowMatrix</span>
+<span class="sd"> object, in which case we can wrap it directly. This</span>
+<span class="sd"> assists in clean matrix conversions.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span>
+<span class="sd"> ... IndexedRow(1, [4, 5, 6])])</span>
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(rows)</span>
+
+<span class="sd"> &gt;&gt;&gt; mat_diff = IndexedRowMatrix(rows)</span>
+<span class="sd"> &gt;&gt;&gt; (mat_diff._java_matrix_wrapper._java_model ==</span>
+<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> False</span>
+
+<span class="sd"> &gt;&gt;&gt; mat_same = IndexedRowMatrix(mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> &gt;&gt;&gt; (mat_same._java_matrix_wrapper._java_model ==</span>
+<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> True</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">rows</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
+ <span class="n">rows</span> <span class="o">=</span> <span class="n">rows</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_indexed_row</span><span class="p">)</span>
+ <span class="c"># We use DataFrames for serialization of IndexedRows from</span>
+ <span class="c"># Python, so first convert the RDD to a DataFrame on this</span>
+ <span class="c"># side. This will convert each IndexedRow to a Row</span>
+ <span class="c"># containing the &#39;index&#39; and &#39;vector&#39; values, which can</span>
+ <span class="c"># both be easily serialized. We will convert back to</span>
+ <span class="c"># IndexedRows on the Scala side.</span>
+ <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">&quot;createIndexedRowMatrix&quot;</span><span class="p">,</span> <span class="n">rows</span><span class="o">.</span><span class="n">toDF</span><span class="p">(),</span>
+ <span class="nb">long</span><span class="p">(</span><span class="n">numRows</span><span class="p">),</span> <span class="nb">int</span><span class="p">(</span><span class="n">numCols</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">rows</span><span class="p">,</span> <span class="n">JavaObject</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="n">rows</span><span class="o">.</span><span class="n">getClass</span><span class="p">()</span><span class="o">.</span><span class="n">getSimpleName</span><span class="p">()</span> <span class="o">==</span> <span class="s">&quot;IndexedRowMatrix&quot;</span><span class="p">):</span>
+ <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">rows</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">&quot;rows should be an RDD of IndexedRows or (long, vector) tuples, &quot;</span>
+ <span class="s">&quot;got </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">rows</span><span class="p">))</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span> <span class="o">=</span> <span class="n">JavaModelWrapper</span><span class="p">(</span><span class="n">java_matrix</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">rows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Rows of the IndexedRowMatrix stored as an RDD of IndexedRows.</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(sc.parallelize([IndexedRow(0, [1, 2, 3]),</span>
+<span class="sd"> ... IndexedRow(1, [4, 5, 6])]))</span>
+<span class="sd"> &gt;&gt;&gt; rows = mat.rows</span>
+<span class="sd"> &gt;&gt;&gt; rows.first()</span>
+<span class="sd"> IndexedRow(0, [1.0,2.0,3.0])</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="c"># We use DataFrames for serialization of IndexedRows from</span>
+ <span class="c"># Java, so we first convert the RDD of rows to a DataFrame</span>
+ <span class="c"># on the Scala/Java side. Then we map each Row in the</span>
+ <span class="c"># DataFrame back to an IndexedRow on this side.</span>
+ <span class="n">rows_df</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">&quot;getIndexedRows&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">_java_model</span><span class="p">)</span>
+ <span class="n">rows</span> <span class="o">=</span> <span class="n">rows_df</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">IndexedRow</span><span class="p">(</span><span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
+ <span class="k">return</span> <span class="n">rows</span>
+
+<div class="viewcode-block" id="IndexedRowMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Get or compute the number of rows.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span>
+<span class="sd"> ... IndexedRow(1, [4, 5, 6]),</span>
+<span class="sd"> ... IndexedRow(2, [7, 8, 9]),</span>
+<span class="sd"> ... IndexedRow(3, [10, 11, 12])])</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(rows)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 4</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(rows, 7, 6)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 7</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numRows&quot;</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="IndexedRowMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Get or compute the number of cols.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span>
+<span class="sd"> ... IndexedRow(1, [4, 5, 6]),</span>
+<span class="sd"> ... IndexedRow(2, [7, 8, 9]),</span>
+<span class="sd"> ... IndexedRow(3, [10, 11, 12])])</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(rows)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 3</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(rows, 7, 6)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 6</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numCols&quot;</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="IndexedRowMatrix.toRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.toRowMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toRowMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Convert this matrix to a RowMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span>
+<span class="sd"> ... IndexedRow(6, [4, 5, 6])])</span>
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(rows).toRowMatrix()</span>
+<span class="sd"> &gt;&gt;&gt; mat.rows.collect()</span>
+<span class="sd"> [DenseVector([1.0, 2.0, 3.0]), DenseVector([4.0, 5.0, 6.0])]</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="n">java_row_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toRowMatrix&quot;</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">RowMatrix</span><span class="p">(</span><span class="n">java_row_matrix</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="IndexedRowMatrix.toCoordinateMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.toCoordinateMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toCoordinateMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Convert this matrix to a CoordinateMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([IndexedRow(0, [1, 0]),</span>
+<span class="sd"> ... IndexedRow(6, [0, 5])])</span>
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(rows).toCoordinateMatrix()</span>
+<span class="sd"> &gt;&gt;&gt; mat.entries.take(3)</span>
+<span class="sd"> [MatrixEntry(0, 0, 1.0), MatrixEntry(0, 1, 0.0), MatrixEntry(6, 0, 0.0)]</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="n">java_coordinate_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toCoordinateMatrix&quot;</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">CoordinateMatrix</span><span class="p">(</span><span class="n">java_coordinate_matrix</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="IndexedRowMatrix.toBlockMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.toBlockMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toBlockMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="o">=</span><span class="mi">1024</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="o">=</span><span class="mi">1024</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Convert this matrix to a BlockMatrix.</span>
+
+<span class="sd"> :param rowsPerBlock: Number of rows that make up each block.</span>
+<span class="sd"> The blocks forming the final rows are not</span>
+<span class="sd"> required to have the given number of rows.</span>
+<span class="sd"> :param colsPerBlock: Number of columns that make up each block.</span>
+<span class="sd"> The blocks forming the final columns are not</span>
+<span class="sd"> required to have the given number of columns.</span>
+
+<span class="sd"> &gt;&gt;&gt; rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span>
+<span class="sd"> ... IndexedRow(6, [4, 5, 6])])</span>
+<span class="sd"> &gt;&gt;&gt; mat = IndexedRowMatrix(rows).toBlockMatrix()</span>
+
+<span class="sd"> &gt;&gt;&gt; # This IndexedRowMatrix will have 7 effective rows, due to</span>
+<span class="sd"> &gt;&gt;&gt; # the highest row index being 6, and the ensuing</span>
+<span class="sd"> &gt;&gt;&gt; # BlockMatrix will have 7 rows as well.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 7</span>
+
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 3</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="n">java_block_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toBlockMatrix&quot;</span><span class="p">,</span>
+ <span class="n">rowsPerBlock</span><span class="p">,</span>
+ <span class="n">colsPerBlock</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">BlockMatrix</span><span class="p">(</span><span class="n">java_block_matrix</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="p">)</span>
+
+</div></div>
+<div class="viewcode-block" id="MatrixEntry"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.MatrixEntry">[docs]</a><span class="k">class</span> <span class="nc">MatrixEntry</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> .. note:: Experimental</span>
+
+<span class="sd"> Represents an entry of a CoordinateMatrix.</span>
+
+<span class="sd"> Just a wrapper over a (long, long, float) tuple.</span>
+
+<span class="sd"> :param i: The row index of the matrix.</span>
+<span class="sd"> :param j: The column index of the matrix.</span>
+<span class="sd"> :param value: The (i, j)th entry of the matrix, as a float.</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">i</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">j</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">j</span><span class="p">)</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">value</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+
+ <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="k">return</span> <span class="s">&quot;MatrixEntry(</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">i</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">j</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">)</span>
+
+</div>
+<span class="k">def</span> <span class="nf">_convert_to_matrix_entry</span><span class="p">(</span><span class="n">entry</span><span class="p">):</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">entry</span><span class="p">,</span> <span class="n">MatrixEntry</span><span class="p">):</span>
+ <span class="k">return</span> <span class="n">entry</span>
+ <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">entry</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">entry</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
+ <span class="k">return</span> <span class="n">MatrixEntry</span><span class="p">(</span><span class="o">*</span><span class="n">entry</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">&quot;Cannot convert type </span><span class="si">%s</span><span class="s"> into MatrixEntry&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">entry</span><span class="p">))</span>
+
+
+<div class="viewcode-block" id="CoordinateMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix">[docs]</a><span class="k">class</span> <span class="nc">CoordinateMatrix</span><span class="p">(</span><span class="n">DistributedMatrix</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> .. note:: Experimental</span>
+
+<span class="sd"> Represents a matrix in coordinate format.</span>
+
+<span class="sd"> :param entries: An RDD of MatrixEntry inputs or</span>
+<span class="sd"> (long, long, float) tuples.</span>
+<span class="sd"> :param numRows: Number of rows in the matrix. A non-positive</span>
+<span class="sd"> value means unknown, at which point the number</span>
+<span class="sd"> of rows will be determined by the max row</span>
+<span class="sd"> index plus one.</span>
+<span class="sd"> :param numCols: Number of columns in the matrix. A non-positive</span>
+<span class="sd"> value means unknown, at which point the number</span>
+<span class="sd"> of columns will be determined by the max row</span>
+<span class="sd"> index plus one.</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">entries</span><span class="p">,</span> <span class="n">numRows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numCols</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Note: This docstring is not shown publicly.</span>
+
+<span class="sd"> Create a wrapper over a Java CoordinateMatrix.</span>
+
+<span class="sd"> Publicly, we require that `rows` be an RDD. However, for</span>
+<span class="sd"> internal usage, `rows` can also be a Java CoordinateMatrix</span>
+<span class="sd"> object, in which case we can wrap it directly. This</span>
+<span class="sd"> assists in clean matrix conversions.</span>
+
+<span class="sd"> &gt;&gt;&gt; entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span>
+<span class="sd"> ... MatrixEntry(6, 4, 2.1)])</span>
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(entries)</span>
+
+<span class="sd"> &gt;&gt;&gt; mat_diff = CoordinateMatrix(entries)</span>
+<span class="sd"> &gt;&gt;&gt; (mat_diff._java_matrix_wrapper._java_model ==</span>
+<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> False</span>
+
+<span class="sd"> &gt;&gt;&gt; mat_same = CoordinateMatrix(mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> &gt;&gt;&gt; (mat_same._java_matrix_wrapper._java_model ==</span>
+<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> True</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">entries</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
+ <span class="n">entries</span> <span class="o">=</span> <span class="n">entries</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_matrix_entry</span><span class="p">)</span>
+ <span class="c"># We use DataFrames for serialization of MatrixEntry entries</span>
+ <span class="c"># from Python, so first convert the RDD to a DataFrame on</span>
+ <span class="c"># this side. This will convert each MatrixEntry to a Row</span>
+ <span class="c"># containing the &#39;i&#39;, &#39;j&#39;, and &#39;value&#39; values, which can</span>
+ <span class="c"># each be easily serialized. We will convert back to</span>
+ <span class="c"># MatrixEntry inputs on the Scala side.</span>
+ <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">&quot;createCoordinateMatrix&quot;</span><span class="p">,</span> <span class="n">entries</span><span class="o">.</span><span class="n">toDF</span><span class="p">(),</span>
+ <span class="nb">long</span><span class="p">(</span><span class="n">numRows</span><span class="p">),</span> <span class="nb">long</span><span class="p">(</span><span class="n">numCols</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">entries</span><span class="p">,</span> <span class="n">JavaObject</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="n">entries</span><span class="o">.</span><span class="n">getClass</span><span class="p">()</span><span class="o">.</span><span class="n">getSimpleName</span><span class="p">()</span> <span class="o">==</span> <span class="s">&quot;CoordinateMatrix&quot;</span><span class="p">):</span>
+ <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">entries</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">&quot;entries should be an RDD of MatrixEntry entries or &quot;</span>
+ <span class="s">&quot;(long, long, float) tuples, got </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">entries</span><span class="p">))</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span> <span class="o">=</span> <span class="n">JavaModelWrapper</span><span class="p">(</span><span class="n">java_matrix</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">entries</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Entries of the CoordinateMatrix stored as an RDD of</span>
+<span class="sd"> MatrixEntries.</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(sc.parallelize([MatrixEntry(0, 0, 1.2),</span>
+<span class="sd"> ... MatrixEntry(6, 4, 2.1)]))</span>
+<span class="sd"> &gt;&gt;&gt; entries = mat.entries</span>
+<span class="sd"> &gt;&gt;&gt; entries.first()</span>
+<span class="sd"> MatrixEntry(0, 0, 1.2)</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="c"># We use DataFrames for serialization of MatrixEntry entries</span>
+ <span class="c"># from Java, so we first convert the RDD of entries to a</span>
+ <span class="c"># DataFrame on the Scala/Java side. Then we map each Row in</span>
+ <span class="c"># the DataFrame back to a MatrixEntry on this side.</span>
+ <span class="n">entries_df</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">&quot;getMatrixEntries&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">_java_model</span><span class="p">)</span>
+ <span class="n">entries</span> <span class="o">=</span> <span class="n">entries_df</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">MatrixEntry</span><span class="p">(</span><span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">row</span><span class="p">[</span><span class="mi">2</span><span class="p">]))</span>
+ <span class="k">return</span> <span class="n">entries</span>
+
+<div class="viewcode-block" id="CoordinateMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Get or compute the number of rows.</span>
+
+<span class="sd"> &gt;&gt;&gt; entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span>
+<span class="sd"> ... MatrixEntry(1, 0, 2),</span>
+<span class="sd"> ... MatrixEntry(2, 1, 3.7)])</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(entries)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 3</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(entries, 7, 6)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 7</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numRows&quot;</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="CoordinateMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Get or compute the number of cols.</span>
+
+<span class="sd"> &gt;&gt;&gt; entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span>
+<span class="sd"> ... MatrixEntry(1, 0, 2),</span>
+<span class="sd"> ... MatrixEntry(2, 1, 3.7)])</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(entries)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 2</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(entries, 7, 6)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 6</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numCols&quot;</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="CoordinateMatrix.toRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.toRowMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toRowMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Convert this matrix to a RowMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span>
+<span class="sd"> ... MatrixEntry(6, 4, 2.1)])</span>
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(entries).toRowMatrix()</span>
+
+<span class="sd"> &gt;&gt;&gt; # This CoordinateMatrix will have 7 effective rows, due to</span>
+<span class="sd"> &gt;&gt;&gt; # the highest row index being 6, but the ensuing RowMatrix</span>
+<span class="sd"> &gt;&gt;&gt; # will only have 2 rows since there are only entries on 2</span>
+<span class="sd"> &gt;&gt;&gt; # unique rows.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 2</span>
+
+<span class="sd"> &gt;&gt;&gt; # This CoordinateMatrix will have 5 columns, due to the</span>
+<span class="sd"> &gt;&gt;&gt; # highest column index being 4, and the ensuing RowMatrix</span>
+<span class="sd"> &gt;&gt;&gt; # will have 5 columns as well.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 5</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="n">java_row_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toRowMatrix&quot;</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">RowMatrix</span><span class="p">(</span><span class="n">java_row_matrix</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="CoordinateMatrix.toIndexedRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.toIndexedRowMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toIndexedRowMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Convert this matrix to an IndexedRowMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span>
+<span class="sd"> ... MatrixEntry(6, 4, 2.1)])</span>
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(entries).toIndexedRowMatrix()</span>
+
+<span class="sd"> &gt;&gt;&gt; # This CoordinateMatrix will have 7 effective rows, due to</span>
+<span class="sd"> &gt;&gt;&gt; # the highest row index being 6, and the ensuing</span>
+<span class="sd"> &gt;&gt;&gt; # IndexedRowMatrix will have 7 rows as well.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 7</span>
+
+<span class="sd"> &gt;&gt;&gt; # This CoordinateMatrix will have 5 columns, due to the</span>
+<span class="sd"> &gt;&gt;&gt; # highest column index being 4, and the ensuing</span>
+<span class="sd"> &gt;&gt;&gt; # IndexedRowMatrix will have 5 columns as well.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 5</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="n">java_indexed_row_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toIndexedRowMatrix&quot;</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">IndexedRowMatrix</span><span class="p">(</span><span class="n">java_indexed_row_matrix</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="CoordinateMatrix.toBlockMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.toBlockMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toBlockMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="o">=</span><span class="mi">1024</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="o">=</span><span class="mi">1024</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Convert this matrix to a BlockMatrix.</span>
+
+<span class="sd"> :param rowsPerBlock: Number of rows that make up each block.</span>
+<span class="sd"> The blocks forming the final rows are not</span>
+<span class="sd"> required to have the given number of rows.</span>
+<span class="sd"> :param colsPerBlock: Number of columns that make up each block.</span>
+<span class="sd"> The blocks forming the final columns are not</span>
+<span class="sd"> required to have the given number of columns.</span>
+
+<span class="sd"> &gt;&gt;&gt; entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span>
+<span class="sd"> ... MatrixEntry(6, 4, 2.1)])</span>
+<span class="sd"> &gt;&gt;&gt; mat = CoordinateMatrix(entries).toBlockMatrix()</span>
+
+<span class="sd"> &gt;&gt;&gt; # This CoordinateMatrix will have 7 effective rows, due to</span>
+<span class="sd"> &gt;&gt;&gt; # the highest row index being 6, and the ensuing</span>
+<span class="sd"> &gt;&gt;&gt; # BlockMatrix will have 7 rows as well.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 7</span>
+
+<span class="sd"> &gt;&gt;&gt; # This CoordinateMatrix will have 5 columns, due to the</span>
+<span class="sd"> &gt;&gt;&gt; # highest column index being 4, and the ensuing</span>
+<span class="sd"> &gt;&gt;&gt; # BlockMatrix will have 5 columns as well.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 5</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="n">java_block_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toBlockMatrix&quot;</span><span class="p">,</span>
+ <span class="n">rowsPerBlock</span><span class="p">,</span>
+ <span class="n">colsPerBlock</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">BlockMatrix</span><span class="p">(</span><span class="n">java_block_matrix</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="p">)</span>
+
+</div></div>
+<span class="k">def</span> <span class="nf">_convert_to_matrix_block_tuple</span><span class="p">(</span><span class="n">block</span><span class="p">):</span>
+ <span class="k">if</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">block</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">block</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span>
+ <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">==</span> <span class="mi">2</span>
+ <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">Matrix</span><span class="p">)):</span>
+ <span class="n">blockRowIndex</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span>
+ <span class="n">blockColIndex</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">])</span>
+ <span class="n">subMatrix</span> <span class="o">=</span> <span class="n">block</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
+ <span class="k">return</span> <span class="p">((</span><span class="n">blockRowIndex</span><span class="p">,</span> <span class="n">blockColIndex</span><span class="p">),</span> <span class="n">subMatrix</span><span class="p">)</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">&quot;Cannot convert type </span><span class="si">%s</span><span class="s"> into a sub-matrix block tuple&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">block</span><span class="p">))</span>
+
+
+<div class="viewcode-block" id="BlockMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix">[docs]</a><span class="k">class</span> <span class="nc">BlockMatrix</span><span class="p">(</span><span class="n">DistributedMatrix</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> .. note:: Experimental</span>
+
+<span class="sd"> Represents a distributed matrix in blocks of local matrices.</span>
+
+<span class="sd"> :param blocks: An RDD of sub-matrix blocks</span>
+<span class="sd"> ((blockRowIndex, blockColIndex), sub-matrix) that</span>
+<span class="sd"> form this distributed matrix. If multiple blocks</span>
+<span class="sd"> with the same index exist, the results for</span>
+<span class="sd"> operations like add and multiply will be</span>
+<span class="sd"> unpredictable.</span>
+<span class="sd"> :param rowsPerBlock: Number of rows that make up each block.</span>
+<span class="sd"> The blocks forming the final rows are not</span>
+<span class="sd"> required to have the given number of rows.</span>
+<span class="sd"> :param colsPerBlock: Number of columns that make up each block.</span>
+<span class="sd"> The blocks forming the final columns are not</span>
+<span class="sd"> required to have the given number of columns.</span>
+<span class="sd"> :param numRows: Number of rows of this matrix. If the supplied</span>
+<span class="sd"> value is less than or equal to zero, the number</span>
+<span class="sd"> of rows will be calculated when `numRows` is</span>
+<span class="sd"> invoked.</span>
+<span class="sd"> :param numCols: Number of columns of this matrix. If the supplied</span>
+<span class="sd"> value is less than or equal to zero, the number</span>
+<span class="sd"> of columns will be calculated when `numCols` is</span>
+<span class="sd"> invoked.</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">blocks</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="p">,</span> <span class="n">numRows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numCols</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Note: This docstring is not shown publicly.</span>
+
+<span class="sd"> Create a wrapper over a Java BlockMatrix.</span>
+
+<span class="sd"> Publicly, we require that `blocks` be an RDD. However, for</span>
+<span class="sd"> internal usage, `blocks` can also be a Java BlockMatrix</span>
+<span class="sd"> object, in which case we can wrap it directly. This</span>
+<span class="sd"> assists in clean matrix conversions.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2)</span>
+
+<span class="sd"> &gt;&gt;&gt; mat_diff = BlockMatrix(blocks, 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; (mat_diff._java_matrix_wrapper._java_model ==</span>
+<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> False</span>
+
+<span class="sd"> &gt;&gt;&gt; mat_same = BlockMatrix(mat._java_matrix_wrapper._java_model, 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; (mat_same._java_matrix_wrapper._java_model ==</span>
+<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span>
+<span class="sd"> True</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">blocks</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
+ <span class="n">blocks</span> <span class="o">=</span> <span class="n">blocks</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_matrix_block_tuple</span><span class="p">)</span>
+ <span class="c"># We use DataFrames for serialization of sub-matrix blocks</span>
+ <span class="c"># from Python, so first convert the RDD to a DataFrame on</span>
+ <span class="c"># this side. This will convert each sub-matrix block</span>
+ <span class="c"># tuple to a Row containing the &#39;blockRowIndex&#39;,</span>
+ <span class="c"># &#39;blockColIndex&#39;, and &#39;subMatrix&#39; values, which can</span>
+ <span class="c"># each be easily serialized. We will convert back to</span>
+ <span class="c"># ((blockRowIndex, blockColIndex), sub-matrix) tuples on</span>
+ <span class="c"># the Scala side.</span>
+ <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">&quot;createBlockMatrix&quot;</span><span class="p">,</span> <span class="n">blocks</span><span class="o">.</span><span class="n">toDF</span><span class="p">(),</span>
+ <span class="nb">int</span><span class="p">(</span><span class="n">rowsPerBlock</span><span class="p">),</span> <span class="nb">int</span><span class="p">(</span><span class="n">colsPerBlock</span><span class="p">),</span>
+ <span class="nb">long</span><span class="p">(</span><span class="n">numRows</span><span class="p">),</span> <span class="nb">long</span><span class="p">(</span><span class="n">numCols</span><span class="p">))</span>
+ <span class="k">elif</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">blocks</span><span class="p">,</span> <span class="n">JavaObject</span><span class="p">)</span>
+ <span class="ow">and</span> <span class="n">blocks</span><span class="o">.</span><span class="n">getClass</span><span class="p">()</span><span class="o">.</span><span class="n">getSimpleName</span><span class="p">()</span> <span class="o">==</span> <span class="s">&quot;BlockMatrix&quot;</span><span class="p">):</span>
+ <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">blocks</span>
+ <span class="k">else</span><span class="p">:</span>
+ <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">&quot;blocks should be an RDD of sub-matrix blocks as &quot;</span>
+ <span class="s">&quot;((int, int), matrix) tuples, got </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">blocks</span><span class="p">))</span>
+
+ <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span> <span class="o">=</span> <span class="n">JavaModelWrapper</span><span class="p">(</span><span class="n">java_matrix</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">blocks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> The RDD of sub-matrix blocks</span>
+<span class="sd"> ((blockRowIndex, blockColIndex), sub-matrix) that form this</span>
+<span class="sd"> distributed matrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(</span>
+<span class="sd"> ... sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]), 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; blocks = mat.blocks</span>
+<span class="sd"> &gt;&gt;&gt; blocks.first()</span>
+<span class="sd"> ((0, 0), DenseMatrix(3, 2, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 0))</span>
+
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="c"># We use DataFrames for serialization of sub-matrix blocks</span>
+ <span class="c"># from Java, so we first convert the RDD of blocks to a</span>
+ <span class="c"># DataFrame on the Scala/Java side. Then we map each Row in</span>
+ <span class="c"># the DataFrame back to a sub-matrix block on this side.</span>
+ <span class="n">blocks_df</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">&quot;getMatrixBlocks&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">_java_model</span><span class="p">)</span>
+ <span class="n">blocks</span> <span class="o">=</span> <span class="n">blocks_df</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="p">((</span><span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">],</span> <span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">]),</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
+ <span class="k">return</span> <span class="n">blocks</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">rowsPerBlock</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Number of rows that make up each block.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; mat.rowsPerBlock</span>
+<span class="sd"> 3</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;rowsPerBlock&quot;</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">colsPerBlock</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Number of columns that make up each block.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; mat.colsPerBlock</span>
+<span class="sd"> 2</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;colsPerBlock&quot;</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">numRowBlocks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Number of rows of blocks in the BlockMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; mat.numRowBlocks</span>
+<span class="sd"> 2</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numRowBlocks&quot;</span><span class="p">)</span>
+
+ <span class="nd">@property</span>
+ <span class="k">def</span> <span class="nf">numColBlocks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Number of columns of blocks in the BlockMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; mat.numColBlocks</span>
+<span class="sd"> 1</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numColBlocks&quot;</span><span class="p">)</span>
+
+<div class="viewcode-block" id="BlockMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Get or compute the number of rows.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 6</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2, 7, 6)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 7</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numRows&quot;</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="BlockMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Get or compute the number of cols.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 2</span>
+
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2, 7, 6)</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 6</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;numCols&quot;</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="BlockMatrix.toLocalMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.toLocalMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toLocalMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Collect the distributed matrix on the driver as a DenseMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2).toLocalMatrix()</span>
+
+<span class="sd"> &gt;&gt;&gt; # This BlockMatrix will have 6 effective rows, due to</span>
+<span class="sd"> &gt;&gt;&gt; # having two sub-matrix blocks stacked, each with 3 rows.</span>
+<span class="sd"> &gt;&gt;&gt; # The ensuing DenseMatrix will also have 6 rows.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows)</span>
+<span class="sd"> 6</span>
+
+<span class="sd"> &gt;&gt;&gt; # This BlockMatrix will have 2 effective columns, due to</span>
+<span class="sd"> &gt;&gt;&gt; # having two sub-matrix blocks stacked, each with 2</span>
+<span class="sd"> &gt;&gt;&gt; # columns. The ensuing DenseMatrix will also have 2 columns.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols)</span>
+<span class="sd"> 2</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toLocalMatrix&quot;</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="BlockMatrix.toIndexedRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.toIndexedRowMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toIndexedRowMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Convert this matrix to an IndexedRowMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span>
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 3, 2).toIndexedRowMatrix()</span>
+
+<span class="sd"> &gt;&gt;&gt; # This BlockMatrix will have 6 effective rows, due to</span>
+<span class="sd"> &gt;&gt;&gt; # having two sub-matrix blocks stacked, each with 3 rows.</span>
+<span class="sd"> &gt;&gt;&gt; # The ensuing IndexedRowMatrix will also have 6 rows.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numRows())</span>
+<span class="sd"> 6</span>
+
+<span class="sd"> &gt;&gt;&gt; # This BlockMatrix will have 2 effective columns, due to</span>
+<span class="sd"> &gt;&gt;&gt; # having two sub-matrix blocks stacked, each with 2 columns.</span>
+<span class="sd"> &gt;&gt;&gt; # The ensuing IndexedRowMatrix will also have 2 columns.</span>
+<span class="sd"> &gt;&gt;&gt; print(mat.numCols())</span>
+<span class="sd"> 2</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="n">java_indexed_row_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toIndexedRowMatrix&quot;</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">IndexedRowMatrix</span><span class="p">(</span><span class="n">java_indexed_row_matrix</span><span class="p">)</span>
+</div>
+<div class="viewcode-block" id="BlockMatrix.toCoordinateMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.toCoordinateMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toCoordinateMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+ <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd"> Convert this matrix to a CoordinateMatrix.</span>
+
+<span class="sd"> &gt;&gt;&gt; blocks = sc.parallelize([((0, 0), Matrices.dense(1, 2, [1, 2])),</span>
+<span class="sd"> ... ((1, 0), Matrices.dense(1, 2, [7, 8]))])</span>
+<span class="sd"> &gt;&gt;&gt; mat = BlockMatrix(blocks, 1, 2).toCoordinateMatrix()</span>
+<span class="sd"> &gt;&gt;&gt; mat.entries.take(3)</span>
+<span class="sd"> [MatrixEntry(0, 0, 1.0), MatrixEntry(0, 1, 2.0), MatrixEntry(1, 0, 7.0)]</span>
+<span class="sd"> &quot;&quot;&quot;</span>
+ <span class="n">java_coordinate_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">&quot;toCoordinateMatrix&quot;</span><span class="p">)</span>
+ <span class="k">return</span> <span class="n">CoordinateMatrix</span><span class="p">(</span><span class="n">java_coordinate_matrix</span><span class="p">)</span>
+
+</div></div>
+<span class="k">def</span> <span class="nf">_test</span><span class="p">():</span>
+ <span class="kn">import</span> <span class="nn">doctest</span>
+ <span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">SparkContext</span>
+ <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SQLContext</span>
+ <span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">Matrices</span>
+ <span class="kn">import</span> <span class="nn">pyspark.mllib.linalg.distributed</span>
+ <span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">distributed</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
+ <span class="n">globs</span><span class="p">[</span><span class="s">&#39;sc&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="p">(</span><span class="s">&#39;local[2]&#39;</span><span class="p">,</span> <span class="s">&#39;PythonTest&#39;</span><span class="p">,</span> <span class="n">batchSize</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
+ <span class="n">globs</span><span class="p">[</span><span class="s">&#39;sqlContext&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">SQLContext</span><span class="p">(</span><span class="n">globs</span><span class="p">[</span><span class="s">&#39;sc&#39;</span><span class="p">])</span>
+ <span class="n">globs</span><span class="p">[</span><span class="s">&#39;Matrices&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">Matrices</span>
+ <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span><span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span><span class="p">)</span>
+ <span class="n">globs</span><span class="p">[</span><span class="s">&#39;sc&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
+ <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
+ <span class="nb">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
+
+<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">&quot;__main__&quot;</span><span class="p">:</span>
+ <span class="n">_test</span><span class="p">()</span>
+</pre></div>
+
+ </div>
+ </div>
+ </div>
+ <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
+ <div class="sphinxsidebarwrapper">
+ <p class="logo"><a href="../../../../index.html">
+ <img class="logo" src="../../../../_static/spark-logo-hd.png" alt="Logo"/>
+ </a></p>
+<div id="searchbox" style="display: none" role="search">
+ <h3>Quick search</h3>
+ <form class="search" action="../../../../search.html" method="get">
+ <input type="text" name="q" />
+ <input type="submit" value="Go" />
+ <input type="hidden" name="check_keywords" value="yes" />
+ <input type="hidden" name="area" value="default" />
+ </form>
+ <p class="searchtip" style="font-size: 90%">
+ Enter search terms or a module, class or function name.
+ </p>
+</div>
+<script type="text/javascript">$('#searchbox').show(0);</script>
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related" role="navigation" aria-label="related navigation">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="nav-item nav-item-0"><a href="../../../../index.html">PySpark master documentation</a> &raquo;</li>
+ <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> &raquo;</li>
+ <li class="nav-item nav-item-2"><a href="../linalg.html" >pyspark.mllib.linalg</a> &raquo;</li>
+ </ul>
+ </div>
+ <div class="footer" role="contentinfo">
+ &copy; Copyright .
+ Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.3.1.
+ </div>
+ </body>
+</html> \ No newline at end of file