summaryrefslogblamecommitdiff
path: root/site/docs/1.5.0/api/java/org/apache/spark/ml/attribute/package-summary.html
blob: 25d74b6d602dc25e4c967a617d187f1e6b3c661e (plain) (tree)
1
2
3
4
5
6
7



                                                                                                      
                                                                                
                                            
                                       





























                                                                                               
                                                                                           
























































                                                                                                                                                                      
















































































                                                                                                                                                                                           
                                                                                           


























                                                                                                                                                                                               
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (version 1.7.0_51) on Wed Sep 16 15:55:13 PDT 2015 -->
<title>org.apache.spark.ml.attribute</title>
<meta name="date" content="2015-09-16">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
</head>
<body>
<script type="text/javascript"><!--
    if (location.href.indexOf('is-external=true') == -1) {
        parent.document.title="org.apache.spark.ml.attribute";
    }
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar_top">
<!--   -->
</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
<!--   -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li class="navBarCell1Rev">Package</li>
<li>Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../org/apache/spark/ml/package-summary.html">Prev Package</a></li>
<li><a href="../../../../../org/apache/spark/ml/classification/package-summary.html">Next Package</a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/spark/ml/attribute/package-summary.html" target="_top">Frames</a></li>
<li><a href="package-summary.html" target="_top">No Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../allclasses-noframe.html">All Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
  allClassesLink = document.getElementById("allclasses_navbar_top");
  if(window==top) {
    allClassesLink.style.display = "block";
  }
  else {
    allClassesLink.style.display = "none";
  }
  //-->
</script>
</div>
<a name="skip-navbar_top">
<!--   -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<div class="header">
<h1 title="Package" class="title">Package&nbsp;org.apache.spark.ml.attribute</h1>
<div class="docSummary">
<div class="block">ML attributes</div>
</div>
<p>See:&nbsp;<a href="#package_description">Description</a></p>
</div>
<div class="contentContainer">
<ul class="blockList">
<li class="blockList">
<table class="packageSummary" border="0" cellpadding="3" cellspacing="0" summary="Class Summary table, listing classes, and an explanation">
<caption><span>Class Summary</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Class</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tbody>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/Attribute.html" title="class in org.apache.spark.ml.attribute">Attribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 Abstract class for ML attributes.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/AttributeGroup.html" title="class in org.apache.spark.ml.attribute">AttributeGroup</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 Attributes that describe a vector ML column.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/AttributeType.html" title="class in org.apache.spark.ml.attribute">AttributeType</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 An enum-like type for attribute types: <code>AttributeType$.Numeric</code>, <code>AttributeType$.Nominal</code>,
 and <code>AttributeType$.Binary</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/BinaryAttribute.html" title="class in org.apache.spark.ml.attribute">BinaryAttribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 A binary attribute.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/NominalAttribute.html" title="class in org.apache.spark.ml.attribute">NominalAttribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 A nominal attribute.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/NumericAttribute.html" title="class in org.apache.spark.ml.attribute">NumericAttribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 A numeric attribute with optional summary statistics.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/UnresolvedAttribute.html" title="class in org.apache.spark.ml.attribute">UnresolvedAttribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 An unresolved attribute.</div>
</td>
</tr>
</tbody>
</table>
</li>
</ul>
<a name="package_description">
<!--   -->
</a>
<h2 title="Package org.apache.spark.ml.attribute Description">Package org.apache.spark.ml.attribute Description</h2>
<div class="block"><h2>ML attributes</h2>

 The ML pipeline API uses <a href="../../../../../org/apache/spark/sql/DataFrame.html" title="class in org.apache.spark.sql"><code>DataFrame</code></a>s as ML datasets.
 Each dataset consists of typed columns, e.g., string, double, vector, etc.
 However, knowing only the column type may not be sufficient to handle the data properly.
 For instance, a double column with values 0.0, 1.0, 2.0, ... may represent some label indices,
 which cannot be treated as numeric values in ML algorithms, and, for another instance, we may
 want to know the names and types of features stored in a vector column.
 ML attributes are used to provide additional information to describe columns in a dataset.

 <h3>ML columns</h3>

 A column with ML attributes attached is called an ML column.
 The data in ML columns are stored as double values, i.e., an ML column is either a scalar column
 of double values or a vector column.
 Columns of other types must be encoded into ML columns using transformers.
 We use <a href="../../../../../org/apache/spark/ml/attribute/Attribute.html" title="class in org.apache.spark.ml.attribute"><code>Attribute</code></a> to describe a scalar ML column, and
 <a href="../../../../../org/apache/spark/ml/attribute/AttributeGroup.html" title="class in org.apache.spark.ml.attribute"><code>AttributeGroup</code></a> to describe a vector ML column.
 ML attributes are stored in the metadata field of the column schema.</div>
</div>
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar_bottom">
<!--   -->
</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
<!--   -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li class="navBarCell1Rev">Package</li>
<li>Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../org/apache/spark/ml/package-summary.html">Prev Package</a></li>
<li><a href="../../../../../org/apache/spark/ml/classification/package-summary.html">Next Package</a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/spark/ml/attribute/package-summary.html" target="_top">Frames</a></li>
<li><a href="package-summary.html" target="_top">No Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../allclasses-noframe.html">All Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
  allClassesLink = document.getElementById("allclasses_navbar_bottom");
  if(window==top) {
    allClassesLink.style.display = "block";
  }
  else {
    allClassesLink.style.display = "none";
  }
  //-->
</script>
</div>
<a name="skip-navbar_bottom">
<!--   -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<script defer="defer" type="text/javascript" src="../../../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../../../lib/api-javadocs.js"></script></body>
</html>