diff options
Diffstat (limited to 'docs')
-rw-r--r-- | docs/mllib-data-types.md | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md index 11033bf4f9..f0e8d54956 100644 --- a/docs/mllib-data-types.md +++ b/docs/mllib-data-types.md @@ -494,6 +494,9 @@ rowMat = mat.toRowMatrix() # Convert to a CoordinateMatrix. coordinateMat = mat.toCoordinateMatrix() + +# Convert to a BlockMatrix. +blockMat = mat.toBlockMatrix() {% endhighlight %} </div> @@ -594,6 +597,9 @@ rowMat = mat.toRowMatrix() # Convert to an IndexedRowMatrix. indexedRowMat = mat.toIndexedRowMatrix() + +# Convert to a BlockMatrix. +blockMat = mat.toBlockMatrix() {% endhighlight %} </div> @@ -661,4 +667,39 @@ matA.validate(); BlockMatrix ata = matA.transpose().multiply(matA); {% endhighlight %} </div> + +<div data-lang="python" markdown="1"> + +A [`BlockMatrix`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.BlockMatrix) +can be created from an `RDD` of sub-matrix blocks, where a sub-matrix block is a +`((blockRowIndex, blockColIndex), sub-matrix)` tuple. + +{% highlight python %} +from pyspark.mllib.linalg import Matrices +from pyspark.mllib.linalg.distributed import BlockMatrix + +# Create an RDD of sub-matrix blocks. +blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])), + ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]) + +# Create a BlockMatrix from an RDD of sub-matrix blocks. +mat = BlockMatrix(blocks, 3, 2) + +# Get its size. +m = mat.numRows() # 6 +n = mat.numCols() # 2 + +# Get the blocks as an RDD of sub-matrix blocks. +blocksRDD = mat.blocks + +# Convert to a LocalMatrix. +localMat = mat.toLocalMatrix() + +# Convert to an IndexedRowMatrix. +indexedRowMat = mat.toIndexedRowMatrix() + +# Convert to a CoordinateMatrix. +coordinateMat = mat.toCoordinateMatrix() +{% endhighlight %} +</div> </div> |