summaryrefslogtreecommitdiff
path: root/site/docs/1.5.0/api/java/org/apache/spark/ml/attribute/package-summary.html
blob: 25d74b6d602dc25e4c967a617d187f1e6b3c661e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (version 1.7.0_51) on Wed Sep 16 15:55:13 PDT 2015 -->
<title>org.apache.spark.ml.attribute</title>
<meta name="date" content="2015-09-16">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
</head>
<body>
<script type="text/javascript"><!--
    if (location.href.indexOf('is-external=true') == -1) {
        parent.document.title="org.apache.spark.ml.attribute";
    }
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar_top">
<!--   -->
</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
<!--   -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li class="navBarCell1Rev">Package</li>
<li>Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../org/apache/spark/ml/package-summary.html">Prev Package</a></li>
<li><a href="../../../../../org/apache/spark/ml/classification/package-summary.html">Next Package</a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/spark/ml/attribute/package-summary.html" target="_top">Frames</a></li>
<li><a href="package-summary.html" target="_top">No Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../allclasses-noframe.html">All Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
  allClassesLink = document.getElementById("allclasses_navbar_top");
  if(window==top) {
    allClassesLink.style.display = "block";
  }
  else {
    allClassesLink.style.display = "none";
  }
  //-->
</script>
</div>
<a name="skip-navbar_top">
<!--   -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<div class="header">
<h1 title="Package" class="title">Package&nbsp;org.apache.spark.ml.attribute</h1>
<div class="docSummary">
<div class="block">ML attributes</div>
</div>
<p>See:&nbsp;<a href="#package_description">Description</a></p>
</div>
<div class="contentContainer">
<ul class="blockList">
<li class="blockList">
<table class="packageSummary" border="0" cellpadding="3" cellspacing="0" summary="Class Summary table, listing classes, and an explanation">
<caption><span>Class Summary</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Class</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tbody>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/Attribute.html" title="class in org.apache.spark.ml.attribute">Attribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 Abstract class for ML attributes.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/AttributeGroup.html" title="class in org.apache.spark.ml.attribute">AttributeGroup</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 Attributes that describe a vector ML column.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/AttributeType.html" title="class in org.apache.spark.ml.attribute">AttributeType</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 An enum-like type for attribute types: <code>AttributeType$.Numeric</code>, <code>AttributeType$.Nominal</code>,
 and <code>AttributeType$.Binary</code>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/BinaryAttribute.html" title="class in org.apache.spark.ml.attribute">BinaryAttribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 A binary attribute.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/NominalAttribute.html" title="class in org.apache.spark.ml.attribute">NominalAttribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 A nominal attribute.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/NumericAttribute.html" title="class in org.apache.spark.ml.attribute">NumericAttribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 A numeric attribute with optional summary statistics.</div>
</td>
</tr>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../org/apache/spark/ml/attribute/UnresolvedAttribute.html" title="class in org.apache.spark.ml.attribute">UnresolvedAttribute</a></td>
<td class="colLast">
<div class="block">:: DeveloperApi ::
 An unresolved attribute.</div>
</td>
</tr>
</tbody>
</table>
</li>
</ul>
<a name="package_description">
<!--   -->
</a>
<h2 title="Package org.apache.spark.ml.attribute Description">Package org.apache.spark.ml.attribute Description</h2>
<div class="block"><h2>ML attributes</h2>

 The ML pipeline API uses <a href="../../../../../org/apache/spark/sql/DataFrame.html" title="class in org.apache.spark.sql"><code>DataFrame</code></a>s as ML datasets.
 Each dataset consists of typed columns, e.g., string, double, vector, etc.
 However, knowing only the column type may not be sufficient to handle the data properly.
 For instance, a double column with values 0.0, 1.0, 2.0, ... may represent some label indices,
 which cannot be treated as numeric values in ML algorithms, and, for another instance, we may
 want to know the names and types of features stored in a vector column.
 ML attributes are used to provide additional information to describe columns in a dataset.

 <h3>ML columns</h3>

 A column with ML attributes attached is called an ML column.
 The data in ML columns are stored as double values, i.e., an ML column is either a scalar column
 of double values or a vector column.
 Columns of other types must be encoded into ML columns using transformers.
 We use <a href="../../../../../org/apache/spark/ml/attribute/Attribute.html" title="class in org.apache.spark.ml.attribute"><code>Attribute</code></a> to describe a scalar ML column, and
 <a href="../../../../../org/apache/spark/ml/attribute/AttributeGroup.html" title="class in org.apache.spark.ml.attribute"><code>AttributeGroup</code></a> to describe a vector ML column.
 ML attributes are stored in the metadata field of the column schema.</div>
</div>
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar_bottom">
<!--   -->
</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
<!--   -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li class="navBarCell1Rev">Package</li>
<li>Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../org/apache/spark/ml/package-summary.html">Prev Package</a></li>
<li><a href="../../../../../org/apache/spark/ml/classification/package-summary.html">Next Package</a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/spark/ml/attribute/package-summary.html" target="_top">Frames</a></li>
<li><a href="package-summary.html" target="_top">No Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../allclasses-noframe.html">All Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
  allClassesLink = document.getElementById("allclasses_navbar_bottom");
  if(window==top) {
    allClassesLink.style.display = "block";
  }
  else {
    allClassesLink.style.display = "none";
  }
  //-->
</script>
</div>
<a name="skip-navbar_bottom">
<!--   -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<script defer="defer" type="text/javascript" src="../../../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../../../lib/api-javadocs.js"></script></body>
</html>