diff options
author | Reynold Xin <rxin@apache.org> | 2015-09-08 23:20:31 +0000 |
---|---|---|
committer | Reynold Xin <rxin@apache.org> | 2015-09-08 23:20:31 +0000 |
commit | 443d7fc272a34a818df4dd589bb251ec1087ae11 (patch) | |
tree | 58b176846888d8824cd113146bd59568ea5354f6 /site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html | |
parent | 1037fcd3d980ca1bf8e79ccfecd1f5234545b6ff (diff) | |
download | spark-website-443d7fc272a34a818df4dd589bb251ec1087ae11.tar.gz spark-website-443d7fc272a34a818df4dd589bb251ec1087ae11.tar.bz2 spark-website-443d7fc272a34a818df4dd589bb251ec1087ae11.zip |
Added 1.5.0 docs.
Diffstat (limited to 'site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html')
-rw-r--r-- | site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html | 938 |
1 files changed, 938 insertions, 0 deletions
diff --git a/site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html b/site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html new file mode 100644 index 000000000..de8b4b144 --- /dev/null +++ b/site/docs/1.5.0/api/python/_modules/pyspark/mllib/linalg/distributed.html @@ -0,0 +1,938 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + + +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + + <title>pyspark.mllib.linalg.distributed — PySpark master documentation</title> + + <link rel="stylesheet" href="../../../../_static/nature.css" type="text/css" /> + <link rel="stylesheet" href="../../../../_static/pygments.css" type="text/css" /> + + <script type="text/javascript"> + var DOCUMENTATION_OPTIONS = { + URL_ROOT: '../../../../', + VERSION: 'master', + COLLAPSE_INDEX: false, + FILE_SUFFIX: '.html', + HAS_SOURCE: true + }; + </script> + <script type="text/javascript" src="../../../../_static/jquery.js"></script> + <script type="text/javascript" src="../../../../_static/underscore.js"></script> + <script type="text/javascript" src="../../../../_static/doctools.js"></script> + <link rel="top" title="PySpark master documentation" href="../../../../index.html" /> + <link rel="up" title="pyspark.mllib.linalg" href="../linalg.html" /> + </head> + <body role="document"> + <div class="related" role="navigation" aria-label="related navigation"> + <h3>Navigation</h3> + <ul> + <li class="nav-item nav-item-0"><a href="../../../../index.html">PySpark master documentation</a> »</li> + <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../linalg.html" accesskey="U">pyspark.mllib.linalg</a> »</li> + </ul> + </div> + + <div class="document"> + <div class="documentwrapper"> + <div class="bodywrapper"> + <div class="body" role="main"> + + <h1>Source code for pyspark.mllib.linalg.distributed</h1><div class="highlight"><pre> +<span class="c">#</span> +<span class="c"># Licensed to the Apache Software Foundation (ASF) under one or more</span> +<span class="c"># contributor license agreements. See the NOTICE file distributed with</span> +<span class="c"># this work for additional information regarding copyright ownership.</span> +<span class="c"># The ASF licenses this file to You under the Apache License, Version 2.0</span> +<span class="c"># (the "License"); you may not use this file except in compliance with</span> +<span class="c"># the License. You may obtain a copy of the License at</span> +<span class="c">#</span> +<span class="c"># http://www.apache.org/licenses/LICENSE-2.0</span> +<span class="c">#</span> +<span class="c"># Unless required by applicable law or agreed to in writing, software</span> +<span class="c"># distributed under the License is distributed on an "AS IS" BASIS,</span> +<span class="c"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> +<span class="c"># See the License for the specific language governing permissions and</span> +<span class="c"># limitations under the License.</span> +<span class="c">#</span> + +<span class="sd">"""</span> +<span class="sd">Package for distributed linear algebra.</span> +<span class="sd">"""</span> + +<span class="kn">import</span> <span class="nn">sys</span> + +<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version</span> <span class="o">>=</span> <span class="s">'3'</span><span class="p">:</span> + <span class="nb">long</span> <span class="o">=</span> <span class="nb">int</span> + +<span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaObject</span> + +<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">RDD</span> +<span class="kn">from</span> <span class="nn">pyspark.mllib.common</span> <span class="kn">import</span> <span class="n">callMLlibFunc</span><span class="p">,</span> <span class="n">JavaModelWrapper</span> +<span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">_convert_to_vector</span><span class="p">,</span> <span class="n">Matrix</span> + + +<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span><span class="s">'DistributedMatrix'</span><span class="p">,</span> <span class="s">'RowMatrix'</span><span class="p">,</span> <span class="s">'IndexedRow'</span><span class="p">,</span> + <span class="s">'IndexedRowMatrix'</span><span class="p">,</span> <span class="s">'MatrixEntry'</span><span class="p">,</span> <span class="s">'CoordinateMatrix'</span><span class="p">,</span> + <span class="s">'BlockMatrix'</span><span class="p">]</span> + + +<div class="viewcode-block" id="DistributedMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.DistributedMatrix">[docs]</a><span class="k">class</span> <span class="nc">DistributedMatrix</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> .. note:: Experimental</span> + +<span class="sd"> Represents a distributively stored matrix backed by one or</span> +<span class="sd"> more RDDs.</span> + +<span class="sd"> """</span> +<div class="viewcode-block" id="DistributedMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.DistributedMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""Get or compute the number of rows."""</span> + <span class="k">raise</span> <span class="ne">NotImplementedError</span> +</div> +<div class="viewcode-block" id="DistributedMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.DistributedMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""Get or compute the number of cols."""</span> + <span class="k">raise</span> <span class="ne">NotImplementedError</span> + +</div></div> +<div class="viewcode-block" id="RowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix">[docs]</a><span class="k">class</span> <span class="nc">RowMatrix</span><span class="p">(</span><span class="n">DistributedMatrix</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> .. note:: Experimental</span> + +<span class="sd"> Represents a row-oriented distributed Matrix with no meaningful</span> +<span class="sd"> row indices.</span> + +<span class="sd"> :param rows: An RDD of vectors.</span> +<span class="sd"> :param numRows: Number of rows in the matrix. A non-positive</span> +<span class="sd"> value means unknown, at which point the number</span> +<span class="sd"> of rows will be determined by the number of</span> +<span class="sd"> records in the `rows` RDD.</span> +<span class="sd"> :param numCols: Number of columns in the matrix. A non-positive</span> +<span class="sd"> value means unknown, at which point the number</span> +<span class="sd"> of columns will be determined by the size of</span> +<span class="sd"> the first row.</span> +<span class="sd"> """</span> + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rows</span><span class="p">,</span> <span class="n">numRows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numCols</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Note: This docstring is not shown publicly.</span> + +<span class="sd"> Create a wrapper over a Java RowMatrix.</span> + +<span class="sd"> Publicly, we require that `rows` be an RDD. However, for</span> +<span class="sd"> internal usage, `rows` can also be a Java RowMatrix</span> +<span class="sd"> object, in which case we can wrap it directly. This</span> +<span class="sd"> assists in clean matrix conversions.</span> + +<span class="sd"> >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])</span> +<span class="sd"> >>> mat = RowMatrix(rows)</span> + +<span class="sd"> >>> mat_diff = RowMatrix(rows)</span> +<span class="sd"> >>> (mat_diff._java_matrix_wrapper._java_model ==</span> +<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> False</span> + +<span class="sd"> >>> mat_same = RowMatrix(mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> >>> (mat_same._java_matrix_wrapper._java_model ==</span> +<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> True</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">rows</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span> + <span class="n">rows</span> <span class="o">=</span> <span class="n">rows</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_vector</span><span class="p">)</span> + <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">"createRowMatrix"</span><span class="p">,</span> <span class="n">rows</span><span class="p">,</span> <span class="nb">long</span><span class="p">(</span><span class="n">numRows</span><span class="p">),</span> <span class="nb">int</span><span class="p">(</span><span class="n">numCols</span><span class="p">))</span> + <span class="k">elif</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">rows</span><span class="p">,</span> <span class="n">JavaObject</span><span class="p">)</span> + <span class="ow">and</span> <span class="n">rows</span><span class="o">.</span><span class="n">getClass</span><span class="p">()</span><span class="o">.</span><span class="n">getSimpleName</span><span class="p">()</span> <span class="o">==</span> <span class="s">"RowMatrix"</span><span class="p">):</span> + <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">rows</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">"rows should be an RDD of vectors, got </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">rows</span><span class="p">))</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span> <span class="o">=</span> <span class="n">JavaModelWrapper</span><span class="p">(</span><span class="n">java_matrix</span><span class="p">)</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">rows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Rows of the RowMatrix stored as an RDD of vectors.</span> + +<span class="sd"> >>> mat = RowMatrix(sc.parallelize([[1, 2, 3], [4, 5, 6]]))</span> +<span class="sd"> >>> rows = mat.rows</span> +<span class="sd"> >>> rows.first()</span> +<span class="sd"> DenseVector([1.0, 2.0, 3.0])</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"rows"</span><span class="p">)</span> + +<div class="viewcode-block" id="RowMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Get or compute the number of rows.</span> + +<span class="sd"> >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],</span> +<span class="sd"> ... [7, 8, 9], [10, 11, 12]])</span> + +<span class="sd"> >>> mat = RowMatrix(rows)</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 4</span> + +<span class="sd"> >>> mat = RowMatrix(rows, 7, 6)</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 7</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numRows"</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="RowMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Get or compute the number of cols.</span> + +<span class="sd"> >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],</span> +<span class="sd"> ... [7, 8, 9], [10, 11, 12]])</span> + +<span class="sd"> >>> mat = RowMatrix(rows)</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 3</span> + +<span class="sd"> >>> mat = RowMatrix(rows, 7, 6)</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 6</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numCols"</span><span class="p">)</span> + +</div></div> +<div class="viewcode-block" id="IndexedRow"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRow">[docs]</a><span class="k">class</span> <span class="nc">IndexedRow</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> .. note:: Experimental</span> + +<span class="sd"> Represents a row of an IndexedRowMatrix.</span> + +<span class="sd"> Just a wrapper over a (long, vector) tuple.</span> + +<span class="sd"> :param index: The index for the given row.</span> +<span class="sd"> :param vector: The row in the matrix at the given index.</span> +<span class="sd"> """</span> + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">,</span> <span class="n">vector</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">index</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">vector</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">vector</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s">"IndexedRow(</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">vector</span><span class="p">)</span> + +</div> +<span class="k">def</span> <span class="nf">_convert_to_indexed_row</span><span class="p">(</span><span class="n">row</span><span class="p">):</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="n">IndexedRow</span><span class="p">):</span> + <span class="k">return</span> <span class="n">row</span> + <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">row</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> + <span class="k">return</span> <span class="n">IndexedRow</span><span class="p">(</span><span class="o">*</span><span class="n">row</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">"Cannot convert type </span><span class="si">%s</span><span class="s"> into IndexedRow"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">row</span><span class="p">))</span> + + +<div class="viewcode-block" id="IndexedRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix">[docs]</a><span class="k">class</span> <span class="nc">IndexedRowMatrix</span><span class="p">(</span><span class="n">DistributedMatrix</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> .. note:: Experimental</span> + +<span class="sd"> Represents a row-oriented distributed Matrix with indexed rows.</span> + +<span class="sd"> :param rows: An RDD of IndexedRows or (long, vector) tuples.</span> +<span class="sd"> :param numRows: Number of rows in the matrix. A non-positive</span> +<span class="sd"> value means unknown, at which point the number</span> +<span class="sd"> of rows will be determined by the max row</span> +<span class="sd"> index plus one.</span> +<span class="sd"> :param numCols: Number of columns in the matrix. A non-positive</span> +<span class="sd"> value means unknown, at which point the number</span> +<span class="sd"> of columns will be determined by the size of</span> +<span class="sd"> the first row.</span> +<span class="sd"> """</span> + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rows</span><span class="p">,</span> <span class="n">numRows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numCols</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Note: This docstring is not shown publicly.</span> + +<span class="sd"> Create a wrapper over a Java IndexedRowMatrix.</span> + +<span class="sd"> Publicly, we require that `rows` be an RDD. However, for</span> +<span class="sd"> internal usage, `rows` can also be a Java IndexedRowMatrix</span> +<span class="sd"> object, in which case we can wrap it directly. This</span> +<span class="sd"> assists in clean matrix conversions.</span> + +<span class="sd"> >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span> +<span class="sd"> ... IndexedRow(1, [4, 5, 6])])</span> +<span class="sd"> >>> mat = IndexedRowMatrix(rows)</span> + +<span class="sd"> >>> mat_diff = IndexedRowMatrix(rows)</span> +<span class="sd"> >>> (mat_diff._java_matrix_wrapper._java_model ==</span> +<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> False</span> + +<span class="sd"> >>> mat_same = IndexedRowMatrix(mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> >>> (mat_same._java_matrix_wrapper._java_model ==</span> +<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> True</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">rows</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span> + <span class="n">rows</span> <span class="o">=</span> <span class="n">rows</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_indexed_row</span><span class="p">)</span> + <span class="c"># We use DataFrames for serialization of IndexedRows from</span> + <span class="c"># Python, so first convert the RDD to a DataFrame on this</span> + <span class="c"># side. This will convert each IndexedRow to a Row</span> + <span class="c"># containing the 'index' and 'vector' values, which can</span> + <span class="c"># both be easily serialized. We will convert back to</span> + <span class="c"># IndexedRows on the Scala side.</span> + <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">"createIndexedRowMatrix"</span><span class="p">,</span> <span class="n">rows</span><span class="o">.</span><span class="n">toDF</span><span class="p">(),</span> + <span class="nb">long</span><span class="p">(</span><span class="n">numRows</span><span class="p">),</span> <span class="nb">int</span><span class="p">(</span><span class="n">numCols</span><span class="p">))</span> + <span class="k">elif</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">rows</span><span class="p">,</span> <span class="n">JavaObject</span><span class="p">)</span> + <span class="ow">and</span> <span class="n">rows</span><span class="o">.</span><span class="n">getClass</span><span class="p">()</span><span class="o">.</span><span class="n">getSimpleName</span><span class="p">()</span> <span class="o">==</span> <span class="s">"IndexedRowMatrix"</span><span class="p">):</span> + <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">rows</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">"rows should be an RDD of IndexedRows or (long, vector) tuples, "</span> + <span class="s">"got </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">rows</span><span class="p">))</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span> <span class="o">=</span> <span class="n">JavaModelWrapper</span><span class="p">(</span><span class="n">java_matrix</span><span class="p">)</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">rows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Rows of the IndexedRowMatrix stored as an RDD of IndexedRows.</span> + +<span class="sd"> >>> mat = IndexedRowMatrix(sc.parallelize([IndexedRow(0, [1, 2, 3]),</span> +<span class="sd"> ... IndexedRow(1, [4, 5, 6])]))</span> +<span class="sd"> >>> rows = mat.rows</span> +<span class="sd"> >>> rows.first()</span> +<span class="sd"> IndexedRow(0, [1.0,2.0,3.0])</span> +<span class="sd"> """</span> + <span class="c"># We use DataFrames for serialization of IndexedRows from</span> + <span class="c"># Java, so we first convert the RDD of rows to a DataFrame</span> + <span class="c"># on the Scala/Java side. Then we map each Row in the</span> + <span class="c"># DataFrame back to an IndexedRow on this side.</span> + <span class="n">rows_df</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">"getIndexedRows"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">_java_model</span><span class="p">)</span> + <span class="n">rows</span> <span class="o">=</span> <span class="n">rows_df</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">IndexedRow</span><span class="p">(</span><span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span> + <span class="k">return</span> <span class="n">rows</span> + +<div class="viewcode-block" id="IndexedRowMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Get or compute the number of rows.</span> + +<span class="sd"> >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span> +<span class="sd"> ... IndexedRow(1, [4, 5, 6]),</span> +<span class="sd"> ... IndexedRow(2, [7, 8, 9]),</span> +<span class="sd"> ... IndexedRow(3, [10, 11, 12])])</span> + +<span class="sd"> >>> mat = IndexedRowMatrix(rows)</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 4</span> + +<span class="sd"> >>> mat = IndexedRowMatrix(rows, 7, 6)</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 7</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numRows"</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="IndexedRowMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Get or compute the number of cols.</span> + +<span class="sd"> >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span> +<span class="sd"> ... IndexedRow(1, [4, 5, 6]),</span> +<span class="sd"> ... IndexedRow(2, [7, 8, 9]),</span> +<span class="sd"> ... IndexedRow(3, [10, 11, 12])])</span> + +<span class="sd"> >>> mat = IndexedRowMatrix(rows)</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 3</span> + +<span class="sd"> >>> mat = IndexedRowMatrix(rows, 7, 6)</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 6</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numCols"</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="IndexedRowMatrix.toRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.toRowMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toRowMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Convert this matrix to a RowMatrix.</span> + +<span class="sd"> >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span> +<span class="sd"> ... IndexedRow(6, [4, 5, 6])])</span> +<span class="sd"> >>> mat = IndexedRowMatrix(rows).toRowMatrix()</span> +<span class="sd"> >>> mat.rows.collect()</span> +<span class="sd"> [DenseVector([1.0, 2.0, 3.0]), DenseVector([4.0, 5.0, 6.0])]</span> +<span class="sd"> """</span> + <span class="n">java_row_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toRowMatrix"</span><span class="p">)</span> + <span class="k">return</span> <span class="n">RowMatrix</span><span class="p">(</span><span class="n">java_row_matrix</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="IndexedRowMatrix.toCoordinateMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.toCoordinateMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toCoordinateMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Convert this matrix to a CoordinateMatrix.</span> + +<span class="sd"> >>> rows = sc.parallelize([IndexedRow(0, [1, 0]),</span> +<span class="sd"> ... IndexedRow(6, [0, 5])])</span> +<span class="sd"> >>> mat = IndexedRowMatrix(rows).toCoordinateMatrix()</span> +<span class="sd"> >>> mat.entries.take(3)</span> +<span class="sd"> [MatrixEntry(0, 0, 1.0), MatrixEntry(0, 1, 0.0), MatrixEntry(6, 0, 0.0)]</span> +<span class="sd"> """</span> + <span class="n">java_coordinate_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toCoordinateMatrix"</span><span class="p">)</span> + <span class="k">return</span> <span class="n">CoordinateMatrix</span><span class="p">(</span><span class="n">java_coordinate_matrix</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="IndexedRowMatrix.toBlockMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix.toBlockMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toBlockMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="o">=</span><span class="mi">1024</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="o">=</span><span class="mi">1024</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Convert this matrix to a BlockMatrix.</span> + +<span class="sd"> :param rowsPerBlock: Number of rows that make up each block.</span> +<span class="sd"> The blocks forming the final rows are not</span> +<span class="sd"> required to have the given number of rows.</span> +<span class="sd"> :param colsPerBlock: Number of columns that make up each block.</span> +<span class="sd"> The blocks forming the final columns are not</span> +<span class="sd"> required to have the given number of columns.</span> + +<span class="sd"> >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),</span> +<span class="sd"> ... IndexedRow(6, [4, 5, 6])])</span> +<span class="sd"> >>> mat = IndexedRowMatrix(rows).toBlockMatrix()</span> + +<span class="sd"> >>> # This IndexedRowMatrix will have 7 effective rows, due to</span> +<span class="sd"> >>> # the highest row index being 6, and the ensuing</span> +<span class="sd"> >>> # BlockMatrix will have 7 rows as well.</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 7</span> + +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 3</span> +<span class="sd"> """</span> + <span class="n">java_block_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toBlockMatrix"</span><span class="p">,</span> + <span class="n">rowsPerBlock</span><span class="p">,</span> + <span class="n">colsPerBlock</span><span class="p">)</span> + <span class="k">return</span> <span class="n">BlockMatrix</span><span class="p">(</span><span class="n">java_block_matrix</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="p">)</span> + +</div></div> +<div class="viewcode-block" id="MatrixEntry"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.MatrixEntry">[docs]</a><span class="k">class</span> <span class="nc">MatrixEntry</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> .. note:: Experimental</span> + +<span class="sd"> Represents an entry of a CoordinateMatrix.</span> + +<span class="sd"> Just a wrapper over a (long, long, float) tuple.</span> + +<span class="sd"> :param i: The row index of the matrix.</span> +<span class="sd"> :param j: The column index of the matrix.</span> +<span class="sd"> :param value: The (i, j)th entry of the matrix, as a float.</span> +<span class="sd"> """</span> + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">i</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">j</span> <span class="o">=</span> <span class="nb">long</span><span class="p">(</span><span class="n">j</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">value</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> + + <span class="k">def</span> <span class="nf">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="s">"MatrixEntry(</span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">, </span><span class="si">%s</span><span class="s">)"</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">i</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">j</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">value</span><span class="p">)</span> + +</div> +<span class="k">def</span> <span class="nf">_convert_to_matrix_entry</span><span class="p">(</span><span class="n">entry</span><span class="p">):</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">entry</span><span class="p">,</span> <span class="n">MatrixEntry</span><span class="p">):</span> + <span class="k">return</span> <span class="n">entry</span> + <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">entry</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">entry</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span> + <span class="k">return</span> <span class="n">MatrixEntry</span><span class="p">(</span><span class="o">*</span><span class="n">entry</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">"Cannot convert type </span><span class="si">%s</span><span class="s"> into MatrixEntry"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">entry</span><span class="p">))</span> + + +<div class="viewcode-block" id="CoordinateMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix">[docs]</a><span class="k">class</span> <span class="nc">CoordinateMatrix</span><span class="p">(</span><span class="n">DistributedMatrix</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> .. note:: Experimental</span> + +<span class="sd"> Represents a matrix in coordinate format.</span> + +<span class="sd"> :param entries: An RDD of MatrixEntry inputs or</span> +<span class="sd"> (long, long, float) tuples.</span> +<span class="sd"> :param numRows: Number of rows in the matrix. A non-positive</span> +<span class="sd"> value means unknown, at which point the number</span> +<span class="sd"> of rows will be determined by the max row</span> +<span class="sd"> index plus one.</span> +<span class="sd"> :param numCols: Number of columns in the matrix. A non-positive</span> +<span class="sd"> value means unknown, at which point the number</span> +<span class="sd"> of columns will be determined by the max row</span> +<span class="sd"> index plus one.</span> +<span class="sd"> """</span> + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">entries</span><span class="p">,</span> <span class="n">numRows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numCols</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Note: This docstring is not shown publicly.</span> + +<span class="sd"> Create a wrapper over a Java CoordinateMatrix.</span> + +<span class="sd"> Publicly, we require that `rows` be an RDD. However, for</span> +<span class="sd"> internal usage, `rows` can also be a Java CoordinateMatrix</span> +<span class="sd"> object, in which case we can wrap it directly. This</span> +<span class="sd"> assists in clean matrix conversions.</span> + +<span class="sd"> >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span> +<span class="sd"> ... MatrixEntry(6, 4, 2.1)])</span> +<span class="sd"> >>> mat = CoordinateMatrix(entries)</span> + +<span class="sd"> >>> mat_diff = CoordinateMatrix(entries)</span> +<span class="sd"> >>> (mat_diff._java_matrix_wrapper._java_model ==</span> +<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> False</span> + +<span class="sd"> >>> mat_same = CoordinateMatrix(mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> >>> (mat_same._java_matrix_wrapper._java_model ==</span> +<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> True</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">entries</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span> + <span class="n">entries</span> <span class="o">=</span> <span class="n">entries</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_matrix_entry</span><span class="p">)</span> + <span class="c"># We use DataFrames for serialization of MatrixEntry entries</span> + <span class="c"># from Python, so first convert the RDD to a DataFrame on</span> + <span class="c"># this side. This will convert each MatrixEntry to a Row</span> + <span class="c"># containing the 'i', 'j', and 'value' values, which can</span> + <span class="c"># each be easily serialized. We will convert back to</span> + <span class="c"># MatrixEntry inputs on the Scala side.</span> + <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">"createCoordinateMatrix"</span><span class="p">,</span> <span class="n">entries</span><span class="o">.</span><span class="n">toDF</span><span class="p">(),</span> + <span class="nb">long</span><span class="p">(</span><span class="n">numRows</span><span class="p">),</span> <span class="nb">long</span><span class="p">(</span><span class="n">numCols</span><span class="p">))</span> + <span class="k">elif</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">entries</span><span class="p">,</span> <span class="n">JavaObject</span><span class="p">)</span> + <span class="ow">and</span> <span class="n">entries</span><span class="o">.</span><span class="n">getClass</span><span class="p">()</span><span class="o">.</span><span class="n">getSimpleName</span><span class="p">()</span> <span class="o">==</span> <span class="s">"CoordinateMatrix"</span><span class="p">):</span> + <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">entries</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">"entries should be an RDD of MatrixEntry entries or "</span> + <span class="s">"(long, long, float) tuples, got </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">entries</span><span class="p">))</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span> <span class="o">=</span> <span class="n">JavaModelWrapper</span><span class="p">(</span><span class="n">java_matrix</span><span class="p">)</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">entries</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Entries of the CoordinateMatrix stored as an RDD of</span> +<span class="sd"> MatrixEntries.</span> + +<span class="sd"> >>> mat = CoordinateMatrix(sc.parallelize([MatrixEntry(0, 0, 1.2),</span> +<span class="sd"> ... MatrixEntry(6, 4, 2.1)]))</span> +<span class="sd"> >>> entries = mat.entries</span> +<span class="sd"> >>> entries.first()</span> +<span class="sd"> MatrixEntry(0, 0, 1.2)</span> +<span class="sd"> """</span> + <span class="c"># We use DataFrames for serialization of MatrixEntry entries</span> + <span class="c"># from Java, so we first convert the RDD of entries to a</span> + <span class="c"># DataFrame on the Scala/Java side. Then we map each Row in</span> + <span class="c"># the DataFrame back to a MatrixEntry on this side.</span> + <span class="n">entries_df</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">"getMatrixEntries"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">_java_model</span><span class="p">)</span> + <span class="n">entries</span> <span class="o">=</span> <span class="n">entries_df</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">MatrixEntry</span><span class="p">(</span><span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">row</span><span class="p">[</span><span class="mi">2</span><span class="p">]))</span> + <span class="k">return</span> <span class="n">entries</span> + +<div class="viewcode-block" id="CoordinateMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Get or compute the number of rows.</span> + +<span class="sd"> >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span> +<span class="sd"> ... MatrixEntry(1, 0, 2),</span> +<span class="sd"> ... MatrixEntry(2, 1, 3.7)])</span> + +<span class="sd"> >>> mat = CoordinateMatrix(entries)</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 3</span> + +<span class="sd"> >>> mat = CoordinateMatrix(entries, 7, 6)</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 7</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numRows"</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="CoordinateMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Get or compute the number of cols.</span> + +<span class="sd"> >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span> +<span class="sd"> ... MatrixEntry(1, 0, 2),</span> +<span class="sd"> ... MatrixEntry(2, 1, 3.7)])</span> + +<span class="sd"> >>> mat = CoordinateMatrix(entries)</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 2</span> + +<span class="sd"> >>> mat = CoordinateMatrix(entries, 7, 6)</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 6</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numCols"</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="CoordinateMatrix.toRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.toRowMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toRowMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Convert this matrix to a RowMatrix.</span> + +<span class="sd"> >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span> +<span class="sd"> ... MatrixEntry(6, 4, 2.1)])</span> +<span class="sd"> >>> mat = CoordinateMatrix(entries).toRowMatrix()</span> + +<span class="sd"> >>> # This CoordinateMatrix will have 7 effective rows, due to</span> +<span class="sd"> >>> # the highest row index being 6, but the ensuing RowMatrix</span> +<span class="sd"> >>> # will only have 2 rows since there are only entries on 2</span> +<span class="sd"> >>> # unique rows.</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 2</span> + +<span class="sd"> >>> # This CoordinateMatrix will have 5 columns, due to the</span> +<span class="sd"> >>> # highest column index being 4, and the ensuing RowMatrix</span> +<span class="sd"> >>> # will have 5 columns as well.</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 5</span> +<span class="sd"> """</span> + <span class="n">java_row_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toRowMatrix"</span><span class="p">)</span> + <span class="k">return</span> <span class="n">RowMatrix</span><span class="p">(</span><span class="n">java_row_matrix</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="CoordinateMatrix.toIndexedRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.toIndexedRowMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toIndexedRowMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Convert this matrix to an IndexedRowMatrix.</span> + +<span class="sd"> >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span> +<span class="sd"> ... MatrixEntry(6, 4, 2.1)])</span> +<span class="sd"> >>> mat = CoordinateMatrix(entries).toIndexedRowMatrix()</span> + +<span class="sd"> >>> # This CoordinateMatrix will have 7 effective rows, due to</span> +<span class="sd"> >>> # the highest row index being 6, and the ensuing</span> +<span class="sd"> >>> # IndexedRowMatrix will have 7 rows as well.</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 7</span> + +<span class="sd"> >>> # This CoordinateMatrix will have 5 columns, due to the</span> +<span class="sd"> >>> # highest column index being 4, and the ensuing</span> +<span class="sd"> >>> # IndexedRowMatrix will have 5 columns as well.</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 5</span> +<span class="sd"> """</span> + <span class="n">java_indexed_row_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toIndexedRowMatrix"</span><span class="p">)</span> + <span class="k">return</span> <span class="n">IndexedRowMatrix</span><span class="p">(</span><span class="n">java_indexed_row_matrix</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="CoordinateMatrix.toBlockMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.CoordinateMatrix.toBlockMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toBlockMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="o">=</span><span class="mi">1024</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="o">=</span><span class="mi">1024</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Convert this matrix to a BlockMatrix.</span> + +<span class="sd"> :param rowsPerBlock: Number of rows that make up each block.</span> +<span class="sd"> The blocks forming the final rows are not</span> +<span class="sd"> required to have the given number of rows.</span> +<span class="sd"> :param colsPerBlock: Number of columns that make up each block.</span> +<span class="sd"> The blocks forming the final columns are not</span> +<span class="sd"> required to have the given number of columns.</span> + +<span class="sd"> >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),</span> +<span class="sd"> ... MatrixEntry(6, 4, 2.1)])</span> +<span class="sd"> >>> mat = CoordinateMatrix(entries).toBlockMatrix()</span> + +<span class="sd"> >>> # This CoordinateMatrix will have 7 effective rows, due to</span> +<span class="sd"> >>> # the highest row index being 6, and the ensuing</span> +<span class="sd"> >>> # BlockMatrix will have 7 rows as well.</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 7</span> + +<span class="sd"> >>> # This CoordinateMatrix will have 5 columns, due to the</span> +<span class="sd"> >>> # highest column index being 4, and the ensuing</span> +<span class="sd"> >>> # BlockMatrix will have 5 columns as well.</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 5</span> +<span class="sd"> """</span> + <span class="n">java_block_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toBlockMatrix"</span><span class="p">,</span> + <span class="n">rowsPerBlock</span><span class="p">,</span> + <span class="n">colsPerBlock</span><span class="p">)</span> + <span class="k">return</span> <span class="n">BlockMatrix</span><span class="p">(</span><span class="n">java_block_matrix</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="p">)</span> + +</div></div> +<span class="k">def</span> <span class="nf">_convert_to_matrix_block_tuple</span><span class="p">(</span><span class="n">block</span><span class="p">):</span> + <span class="k">if</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">block</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">block</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span> + <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">tuple</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">==</span> <span class="mi">2</span> + <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">Matrix</span><span class="p">)):</span> + <span class="n">blockRowIndex</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span> + <span class="n">blockColIndex</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">block</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">])</span> + <span class="n">subMatrix</span> <span class="o">=</span> <span class="n">block</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> + <span class="k">return</span> <span class="p">((</span><span class="n">blockRowIndex</span><span class="p">,</span> <span class="n">blockColIndex</span><span class="p">),</span> <span class="n">subMatrix</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">"Cannot convert type </span><span class="si">%s</span><span class="s"> into a sub-matrix block tuple"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">block</span><span class="p">))</span> + + +<div class="viewcode-block" id="BlockMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix">[docs]</a><span class="k">class</span> <span class="nc">BlockMatrix</span><span class="p">(</span><span class="n">DistributedMatrix</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> .. note:: Experimental</span> + +<span class="sd"> Represents a distributed matrix in blocks of local matrices.</span> + +<span class="sd"> :param blocks: An RDD of sub-matrix blocks</span> +<span class="sd"> ((blockRowIndex, blockColIndex), sub-matrix) that</span> +<span class="sd"> form this distributed matrix. If multiple blocks</span> +<span class="sd"> with the same index exist, the results for</span> +<span class="sd"> operations like add and multiply will be</span> +<span class="sd"> unpredictable.</span> +<span class="sd"> :param rowsPerBlock: Number of rows that make up each block.</span> +<span class="sd"> The blocks forming the final rows are not</span> +<span class="sd"> required to have the given number of rows.</span> +<span class="sd"> :param colsPerBlock: Number of columns that make up each block.</span> +<span class="sd"> The blocks forming the final columns are not</span> +<span class="sd"> required to have the given number of columns.</span> +<span class="sd"> :param numRows: Number of rows of this matrix. If the supplied</span> +<span class="sd"> value is less than or equal to zero, the number</span> +<span class="sd"> of rows will be calculated when `numRows` is</span> +<span class="sd"> invoked.</span> +<span class="sd"> :param numCols: Number of columns of this matrix. If the supplied</span> +<span class="sd"> value is less than or equal to zero, the number</span> +<span class="sd"> of columns will be calculated when `numCols` is</span> +<span class="sd"> invoked.</span> +<span class="sd"> """</span> + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">blocks</span><span class="p">,</span> <span class="n">rowsPerBlock</span><span class="p">,</span> <span class="n">colsPerBlock</span><span class="p">,</span> <span class="n">numRows</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numCols</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Note: This docstring is not shown publicly.</span> + +<span class="sd"> Create a wrapper over a Java BlockMatrix.</span> + +<span class="sd"> Publicly, we require that `blocks` be an RDD. However, for</span> +<span class="sd"> internal usage, `blocks` can also be a Java BlockMatrix</span> +<span class="sd"> object, in which case we can wrap it directly. This</span> +<span class="sd"> assists in clean matrix conversions.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2)</span> + +<span class="sd"> >>> mat_diff = BlockMatrix(blocks, 3, 2)</span> +<span class="sd"> >>> (mat_diff._java_matrix_wrapper._java_model ==</span> +<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> False</span> + +<span class="sd"> >>> mat_same = BlockMatrix(mat._java_matrix_wrapper._java_model, 3, 2)</span> +<span class="sd"> >>> (mat_same._java_matrix_wrapper._java_model ==</span> +<span class="sd"> ... mat._java_matrix_wrapper._java_model)</span> +<span class="sd"> True</span> +<span class="sd"> """</span> + <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">blocks</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span> + <span class="n">blocks</span> <span class="o">=</span> <span class="n">blocks</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_matrix_block_tuple</span><span class="p">)</span> + <span class="c"># We use DataFrames for serialization of sub-matrix blocks</span> + <span class="c"># from Python, so first convert the RDD to a DataFrame on</span> + <span class="c"># this side. This will convert each sub-matrix block</span> + <span class="c"># tuple to a Row containing the 'blockRowIndex',</span> + <span class="c"># 'blockColIndex', and 'subMatrix' values, which can</span> + <span class="c"># each be easily serialized. We will convert back to</span> + <span class="c"># ((blockRowIndex, blockColIndex), sub-matrix) tuples on</span> + <span class="c"># the Scala side.</span> + <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">"createBlockMatrix"</span><span class="p">,</span> <span class="n">blocks</span><span class="o">.</span><span class="n">toDF</span><span class="p">(),</span> + <span class="nb">int</span><span class="p">(</span><span class="n">rowsPerBlock</span><span class="p">),</span> <span class="nb">int</span><span class="p">(</span><span class="n">colsPerBlock</span><span class="p">),</span> + <span class="nb">long</span><span class="p">(</span><span class="n">numRows</span><span class="p">),</span> <span class="nb">long</span><span class="p">(</span><span class="n">numCols</span><span class="p">))</span> + <span class="k">elif</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">blocks</span><span class="p">,</span> <span class="n">JavaObject</span><span class="p">)</span> + <span class="ow">and</span> <span class="n">blocks</span><span class="o">.</span><span class="n">getClass</span><span class="p">()</span><span class="o">.</span><span class="n">getSimpleName</span><span class="p">()</span> <span class="o">==</span> <span class="s">"BlockMatrix"</span><span class="p">):</span> + <span class="n">java_matrix</span> <span class="o">=</span> <span class="n">blocks</span> + <span class="k">else</span><span class="p">:</span> + <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s">"blocks should be an RDD of sub-matrix blocks as "</span> + <span class="s">"((int, int), matrix) tuples, got </span><span class="si">%s</span><span class="s">"</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">blocks</span><span class="p">))</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span> <span class="o">=</span> <span class="n">JavaModelWrapper</span><span class="p">(</span><span class="n">java_matrix</span><span class="p">)</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">blocks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> The RDD of sub-matrix blocks</span> +<span class="sd"> ((blockRowIndex, blockColIndex), sub-matrix) that form this</span> +<span class="sd"> distributed matrix.</span> + +<span class="sd"> >>> mat = BlockMatrix(</span> +<span class="sd"> ... sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]), 3, 2)</span> +<span class="sd"> >>> blocks = mat.blocks</span> +<span class="sd"> >>> blocks.first()</span> +<span class="sd"> ((0, 0), DenseMatrix(3, 2, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 0))</span> + +<span class="sd"> """</span> + <span class="c"># We use DataFrames for serialization of sub-matrix blocks</span> + <span class="c"># from Java, so we first convert the RDD of blocks to a</span> + <span class="c"># DataFrame on the Scala/Java side. Then we map each Row in</span> + <span class="c"># the DataFrame back to a sub-matrix block on this side.</span> + <span class="n">blocks_df</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s">"getMatrixBlocks"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">_java_model</span><span class="p">)</span> + <span class="n">blocks</span> <span class="o">=</span> <span class="n">blocks_df</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="p">((</span><span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">],</span> <span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">]),</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span> + <span class="k">return</span> <span class="n">blocks</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">rowsPerBlock</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Number of rows that make up each block.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2)</span> +<span class="sd"> >>> mat.rowsPerBlock</span> +<span class="sd"> 3</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"rowsPerBlock"</span><span class="p">)</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">colsPerBlock</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Number of columns that make up each block.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2)</span> +<span class="sd"> >>> mat.colsPerBlock</span> +<span class="sd"> 2</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"colsPerBlock"</span><span class="p">)</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">numRowBlocks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Number of rows of blocks in the BlockMatrix.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2)</span> +<span class="sd"> >>> mat.numRowBlocks</span> +<span class="sd"> 2</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numRowBlocks"</span><span class="p">)</span> + + <span class="nd">@property</span> + <span class="k">def</span> <span class="nf">numColBlocks</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Number of columns of blocks in the BlockMatrix.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2)</span> +<span class="sd"> >>> mat.numColBlocks</span> +<span class="sd"> 1</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numColBlocks"</span><span class="p">)</span> + +<div class="viewcode-block" id="BlockMatrix.numRows"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.numRows">[docs]</a> <span class="k">def</span> <span class="nf">numRows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Get or compute the number of rows.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> + +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2)</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 6</span> + +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2, 7, 6)</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 7</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numRows"</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="BlockMatrix.numCols"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.numCols">[docs]</a> <span class="k">def</span> <span class="nf">numCols</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Get or compute the number of cols.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> + +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2)</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 2</span> + +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2, 7, 6)</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 6</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"numCols"</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="BlockMatrix.toLocalMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.toLocalMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toLocalMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Collect the distributed matrix on the driver as a DenseMatrix.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2).toLocalMatrix()</span> + +<span class="sd"> >>> # This BlockMatrix will have 6 effective rows, due to</span> +<span class="sd"> >>> # having two sub-matrix blocks stacked, each with 3 rows.</span> +<span class="sd"> >>> # The ensuing DenseMatrix will also have 6 rows.</span> +<span class="sd"> >>> print(mat.numRows)</span> +<span class="sd"> 6</span> + +<span class="sd"> >>> # This BlockMatrix will have 2 effective columns, due to</span> +<span class="sd"> >>> # having two sub-matrix blocks stacked, each with 2</span> +<span class="sd"> >>> # columns. The ensuing DenseMatrix will also have 2 columns.</span> +<span class="sd"> >>> print(mat.numCols)</span> +<span class="sd"> 2</span> +<span class="sd"> """</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toLocalMatrix"</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="BlockMatrix.toIndexedRowMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.toIndexedRowMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toIndexedRowMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Convert this matrix to an IndexedRowMatrix.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])</span> +<span class="sd"> >>> mat = BlockMatrix(blocks, 3, 2).toIndexedRowMatrix()</span> + +<span class="sd"> >>> # This BlockMatrix will have 6 effective rows, due to</span> +<span class="sd"> >>> # having two sub-matrix blocks stacked, each with 3 rows.</span> +<span class="sd"> >>> # The ensuing IndexedRowMatrix will also have 6 rows.</span> +<span class="sd"> >>> print(mat.numRows())</span> +<span class="sd"> 6</span> + +<span class="sd"> >>> # This BlockMatrix will have 2 effective columns, due to</span> +<span class="sd"> >>> # having two sub-matrix blocks stacked, each with 2 columns.</span> +<span class="sd"> >>> # The ensuing IndexedRowMatrix will also have 2 columns.</span> +<span class="sd"> >>> print(mat.numCols())</span> +<span class="sd"> 2</span> +<span class="sd"> """</span> + <span class="n">java_indexed_row_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toIndexedRowMatrix"</span><span class="p">)</span> + <span class="k">return</span> <span class="n">IndexedRowMatrix</span><span class="p">(</span><span class="n">java_indexed_row_matrix</span><span class="p">)</span> +</div> +<div class="viewcode-block" id="BlockMatrix.toCoordinateMatrix"><a class="viewcode-back" href="../../../../pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix.toCoordinateMatrix">[docs]</a> <span class="k">def</span> <span class="nf">toCoordinateMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Convert this matrix to a CoordinateMatrix.</span> + +<span class="sd"> >>> blocks = sc.parallelize([((0, 0), Matrices.dense(1, 2, [1, 2])),</span> +<span class="sd"> ... ((1, 0), Matrices.dense(1, 2, [7, 8]))])</span> +<span class="sd"> >>> mat = BlockMatrix(blocks, 1, 2).toCoordinateMatrix()</span> +<span class="sd"> >>> mat.entries.take(3)</span> +<span class="sd"> [MatrixEntry(0, 0, 1.0), MatrixEntry(0, 1, 2.0), MatrixEntry(1, 0, 7.0)]</span> +<span class="sd"> """</span> + <span class="n">java_coordinate_matrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_matrix_wrapper</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s">"toCoordinateMatrix"</span><span class="p">)</span> + <span class="k">return</span> <span class="n">CoordinateMatrix</span><span class="p">(</span><span class="n">java_coordinate_matrix</span><span class="p">)</span> + +</div></div> +<span class="k">def</span> <span class="nf">_test</span><span class="p">():</span> + <span class="kn">import</span> <span class="nn">doctest</span> + <span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">SparkContext</span> + <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SQLContext</span> + <span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">Matrices</span> + <span class="kn">import</span> <span class="nn">pyspark.mllib.linalg.distributed</span> + <span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">distributed</span><span class="o">.</span><span class="n">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> + <span class="n">globs</span><span class="p">[</span><span class="s">'sc'</span><span class="p">]</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="p">(</span><span class="s">'local[2]'</span><span class="p">,</span> <span class="s">'PythonTest'</span><span class="p">,</span> <span class="n">batchSize</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> + <span class="n">globs</span><span class="p">[</span><span class="s">'sqlContext'</span><span class="p">]</span> <span class="o">=</span> <span class="n">SQLContext</span><span class="p">(</span><span class="n">globs</span><span class="p">[</span><span class="s">'sc'</span><span class="p">])</span> + <span class="n">globs</span><span class="p">[</span><span class="s">'Matrices'</span><span class="p">]</span> <span class="o">=</span> <span class="n">Matrices</span> + <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span><span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span><span class="p">)</span> + <span class="n">globs</span><span class="p">[</span><span class="s">'sc'</span><span class="p">]</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span> + <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span> + <span class="nb">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> + +<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">"__main__"</span><span class="p">:</span> + <span class="n">_test</span><span class="p">()</span> +</pre></div> + + </div> + </div> + </div> + <div class="sphinxsidebar" role="navigation" aria-label="main navigation"> + <div class="sphinxsidebarwrapper"> + <p class="logo"><a href="../../../../index.html"> + <img class="logo" src="../../../../_static/spark-logo-hd.png" alt="Logo"/> + </a></p> +<div id="searchbox" style="display: none" role="search"> + <h3>Quick search</h3> + <form class="search" action="../../../../search.html" method="get"> + <input type="text" name="q" /> + <input type="submit" value="Go" /> + <input type="hidden" name="check_keywords" value="yes" /> + <input type="hidden" name="area" value="default" /> + </form> + <p class="searchtip" style="font-size: 90%"> + Enter search terms or a module, class or function name. + </p> +</div> +<script type="text/javascript">$('#searchbox').show(0);</script> + </div> + </div> + <div class="clearer"></div> + </div> + <div class="related" role="navigation" aria-label="related navigation"> + <h3>Navigation</h3> + <ul> + <li class="nav-item nav-item-0"><a href="../../../../index.html">PySpark master documentation</a> »</li> + <li class="nav-item nav-item-1"><a href="../../../index.html" >Module code</a> »</li> + <li class="nav-item nav-item-2"><a href="../linalg.html" >pyspark.mllib.linalg</a> »</li> + </ul> + </div> + <div class="footer" role="contentinfo"> + © Copyright . + Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.3.1. + </div> + </body> +</html>
\ No newline at end of file |