aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/pom.xml6
-rw-r--r--examples/src/main/resources/kv1.txt500
-rw-r--r--examples/src/main/resources/people.txt3
-rw-r--r--examples/src/main/scala/org/apache/spark/sql/examples/HiveFromSpark.scala64
-rw-r--r--examples/src/main/scala/org/apache/spark/sql/examples/RDDRelation.scala71
5 files changed, 644 insertions, 0 deletions
diff --git a/examples/pom.xml b/examples/pom.xml
index 382a38d940..a5569ff5e7 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -72,6 +72,12 @@
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
+ <artifactId>spark-hive_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
diff --git a/examples/src/main/resources/kv1.txt b/examples/src/main/resources/kv1.txt
new file mode 100644
index 0000000000..9825414ecf
--- /dev/null
+++ b/examples/src/main/resources/kv1.txt
@@ -0,0 +1,500 @@
+238val_238
+86val_86
+311val_311
+27val_27
+165val_165
+409val_409
+255val_255
+278val_278
+98val_98
+484val_484
+265val_265
+193val_193
+401val_401
+150val_150
+273val_273
+224val_224
+369val_369
+66val_66
+128val_128
+213val_213
+146val_146
+406val_406
+429val_429
+374val_374
+152val_152
+469val_469
+145val_145
+495val_495
+37val_37
+327val_327
+281val_281
+277val_277
+209val_209
+15val_15
+82val_82
+403val_403
+166val_166
+417val_417
+430val_430
+252val_252
+292val_292
+219val_219
+287val_287
+153val_153
+193val_193
+338val_338
+446val_446
+459val_459
+394val_394
+237val_237
+482val_482
+174val_174
+413val_413
+494val_494
+207val_207
+199val_199
+466val_466
+208val_208
+174val_174
+399val_399
+396val_396
+247val_247
+417val_417
+489val_489
+162val_162
+377val_377
+397val_397
+309val_309
+365val_365
+266val_266
+439val_439
+342val_342
+367val_367
+325val_325
+167val_167
+195val_195
+475val_475
+17val_17
+113val_113
+155val_155
+203val_203
+339val_339
+0val_0
+455val_455
+128val_128
+311val_311
+316val_316
+57val_57
+302val_302
+205val_205
+149val_149
+438val_438
+345val_345
+129val_129
+170val_170
+20val_20
+489val_489
+157val_157
+378val_378
+221val_221
+92val_92
+111val_111
+47val_47
+72val_72
+4val_4
+280val_280
+35val_35
+427val_427
+277val_277
+208val_208
+356val_356
+399val_399
+169val_169
+382val_382
+498val_498
+125val_125
+386val_386
+437val_437
+469val_469
+192val_192
+286val_286
+187val_187
+176val_176
+54val_54
+459val_459
+51val_51
+138val_138
+103val_103
+239val_239
+213val_213
+216val_216
+430val_430
+278val_278
+176val_176
+289val_289
+221val_221
+65val_65
+318val_318
+332val_332
+311val_311
+275val_275
+137val_137
+241val_241
+83val_83
+333val_333
+180val_180
+284val_284
+12val_12
+230val_230
+181val_181
+67val_67
+260val_260
+404val_404
+384val_384
+489val_489
+353val_353
+373val_373
+272val_272
+138val_138
+217val_217
+84val_84
+348val_348
+466val_466
+58val_58
+8val_8
+411val_411
+230val_230
+208val_208
+348val_348
+24val_24
+463val_463
+431val_431
+179val_179
+172val_172
+42val_42
+129val_129
+158val_158
+119val_119
+496val_496
+0val_0
+322val_322
+197val_197
+468val_468
+393val_393
+454val_454
+100val_100
+298val_298
+199val_199
+191val_191
+418val_418
+96val_96
+26val_26
+165val_165
+327val_327
+230val_230
+205val_205
+120val_120
+131val_131
+51val_51
+404val_404
+43val_43
+436val_436
+156val_156
+469val_469
+468val_468
+308val_308
+95val_95
+196val_196
+288val_288
+481val_481
+457val_457
+98val_98
+282val_282
+197val_197
+187val_187
+318val_318
+318val_318
+409val_409
+470val_470
+137val_137
+369val_369
+316val_316
+169val_169
+413val_413
+85val_85
+77val_77
+0val_0
+490val_490
+87val_87
+364val_364
+179val_179
+118val_118
+134val_134
+395val_395
+282val_282
+138val_138
+238val_238
+419val_419
+15val_15
+118val_118
+72val_72
+90val_90
+307val_307
+19val_19
+435val_435
+10val_10
+277val_277
+273val_273
+306val_306
+224val_224
+309val_309
+389val_389
+327val_327
+242val_242
+369val_369
+392val_392
+272val_272
+331val_331
+401val_401
+242val_242
+452val_452
+177val_177
+226val_226
+5val_5
+497val_497
+402val_402
+396val_396
+317val_317
+395val_395
+58val_58
+35val_35
+336val_336
+95val_95
+11val_11
+168val_168
+34val_34
+229val_229
+233val_233
+143val_143
+472val_472
+322val_322
+498val_498
+160val_160
+195val_195
+42val_42
+321val_321
+430val_430
+119val_119
+489val_489
+458val_458
+78val_78
+76val_76
+41val_41
+223val_223
+492val_492
+149val_149
+449val_449
+218val_218
+228val_228
+138val_138
+453val_453
+30val_30
+209val_209
+64val_64
+468val_468
+76val_76
+74val_74
+342val_342
+69val_69
+230val_230
+33val_33
+368val_368
+103val_103
+296val_296
+113val_113
+216val_216
+367val_367
+344val_344
+167val_167
+274val_274
+219val_219
+239val_239
+485val_485
+116val_116
+223val_223
+256val_256
+263val_263
+70val_70
+487val_487
+480val_480
+401val_401
+288val_288
+191val_191
+5val_5
+244val_244
+438val_438
+128val_128
+467val_467
+432val_432
+202val_202
+316val_316
+229val_229
+469val_469
+463val_463
+280val_280
+2val_2
+35val_35
+283val_283
+331val_331
+235val_235
+80val_80
+44val_44
+193val_193
+321val_321
+335val_335
+104val_104
+466val_466
+366val_366
+175val_175
+403val_403
+483val_483
+53val_53
+105val_105
+257val_257
+406val_406
+409val_409
+190val_190
+406val_406
+401val_401
+114val_114
+258val_258
+90val_90
+203val_203
+262val_262
+348val_348
+424val_424
+12val_12
+396val_396
+201val_201
+217val_217
+164val_164
+431val_431
+454val_454
+478val_478
+298val_298
+125val_125
+431val_431
+164val_164
+424val_424
+187val_187
+382val_382
+5val_5
+70val_70
+397val_397
+480val_480
+291val_291
+24val_24
+351val_351
+255val_255
+104val_104
+70val_70
+163val_163
+438val_438
+119val_119
+414val_414
+200val_200
+491val_491
+237val_237
+439val_439
+360val_360
+248val_248
+479val_479
+305val_305
+417val_417
+199val_199
+444val_444
+120val_120
+429val_429
+169val_169
+443val_443
+323val_323
+325val_325
+277val_277
+230val_230
+478val_478
+178val_178
+468val_468
+310val_310
+317val_317
+333val_333
+493val_493
+460val_460
+207val_207
+249val_249
+265val_265
+480val_480
+83val_83
+136val_136
+353val_353
+172val_172
+214val_214
+462val_462
+233val_233
+406val_406
+133val_133
+175val_175
+189val_189
+454val_454
+375val_375
+401val_401
+421val_421
+407val_407
+384val_384
+256val_256
+26val_26
+134val_134
+67val_67
+384val_384
+379val_379
+18val_18
+462val_462
+492val_492
+100val_100
+298val_298
+9val_9
+341val_341
+498val_498
+146val_146
+458val_458
+362val_362
+186val_186
+285val_285
+348val_348
+167val_167
+18val_18
+273val_273
+183val_183
+281val_281
+344val_344
+97val_97
+469val_469
+315val_315
+84val_84
+28val_28
+37val_37
+448val_448
+152val_152
+348val_348
+307val_307
+194val_194
+414val_414
+477val_477
+222val_222
+126val_126
+90val_90
+169val_169
+403val_403
+400val_400
+200val_200
+97val_97
diff --git a/examples/src/main/resources/people.txt b/examples/src/main/resources/people.txt
new file mode 100644
index 0000000000..3bcace4a44
--- /dev/null
+++ b/examples/src/main/resources/people.txt
@@ -0,0 +1,3 @@
+Michael, 29
+Andy, 30
+Justin, 19
diff --git a/examples/src/main/scala/org/apache/spark/sql/examples/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/sql/examples/HiveFromSpark.scala
new file mode 100644
index 0000000000..abcc1f04d4
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/sql/examples/HiveFromSpark.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.examples
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql._
+import org.apache.spark.sql.hive.LocalHiveContext
+
+object HiveFromSpark {
+ case class Record(key: Int, value: String)
+
+ def main(args: Array[String]) {
+ val sc = new SparkContext("local", "HiveFromSpark")
+
+ // A local hive context creates an instance of the Hive Metastore in process, storing the
+ // the warehouse data in the current directory. This location can be overridden by
+ // specifying a second parameter to the constructor.
+ val hiveContext = new LocalHiveContext(sc)
+ import hiveContext._
+
+ sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+ sql("LOAD DATA LOCAL INPATH 'src/main/resources/kv1.txt' INTO TABLE src")
+
+ // Queries are expressed in HiveQL
+ println("Result of 'SELECT *': ")
+ sql("SELECT * FROM src").collect.foreach(println)
+
+ // Aggregation queries are also supported.
+ val count = sql("SELECT COUNT(*) FROM src").collect().head.getInt(0)
+ println(s"COUNT(*): $count")
+
+ // The results of SQL queries are themselves RDDs and support all normal RDD functions. The
+ // items in the RDD are of type Row, which allows you to access each column by ordinal.
+ val rddFromSql = sql("SELECT key, value FROM src WHERE key < 10 ORDER BY key")
+
+ println("Result of RDD.map:")
+ val rddAsStrings = rddFromSql.map {
+ case Row(key: Int, value: String) => s"Key: $key, Value: $value"
+ }
+
+ // You can also register RDDs as temporary tables within a HiveContext.
+ val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
+ rdd.registerAsTable("records")
+
+ // Queries can then join RDD data with data stored in Hive.
+ println("Result of SELECT *:")
+ sql("SELECT * FROM records r JOIN src s ON r.key = s.key").collect().foreach(println)
+ }
+}
diff --git a/examples/src/main/scala/org/apache/spark/sql/examples/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/sql/examples/RDDRelation.scala
new file mode 100644
index 0000000000..8210ad977f
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/sql/examples/RDDRelation.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.examples
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+
+// One method for defining the schema of an RDD is to make a case class with the desired column
+// names and types.
+case class Record(key: Int, value: String)
+
+object RDDRelation {
+ def main(args: Array[String]) {
+ val sc = new SparkContext("local", "RDDRelation")
+ val sqlContext = new SQLContext(sc)
+
+ // Importing the SQL context gives access to all the SQL functions and implicit conversions.
+ import sqlContext._
+
+ val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i")))
+ // Any RDD containing case classes can be registered as a table. The schema of the table is
+ // automatically inferred using scala reflection.
+ rdd.registerAsTable("records")
+
+ // Once tables have been registered, you can run SQL queries over them.
+ println("Result of SELECT *:")
+ sql("SELECT * FROM records").collect().foreach(println)
+
+ // Aggregation queries are also supported.
+ val count = sql("SELECT COUNT(*) FROM records").collect().head.getInt(0)
+ println(s"COUNT(*): $count")
+
+ // The results of SQL queries are themselves RDDs and support all normal RDD functions. The
+ // items in the RDD are of type Row, which allows you to access each column by ordinal.
+ val rddFromSql = sql("SELECT key, value FROM records WHERE key < 10")
+
+ println("Result of RDD.map:")
+ rddFromSql.map(row => s"Key: ${row(0)}, Value: ${row(1)}").collect.foreach(println)
+
+ // Queries can also be written using a LINQ-like Scala DSL.
+ rdd.where('key === 1).orderBy('value.asc).select('key).collect().foreach(println)
+
+ // Write out an RDD as a parquet file.
+ rdd.saveAsParquetFile("pair.parquet")
+
+ // Read in parquet file. Parquet files are self-describing so the schmema is preserved.
+ val parquetFile = sqlContext.parquetFile("pair.parquet")
+
+ // Queries can be run using the DSL on parequet files just like the original RDD.
+ parquetFile.where('key === 1).select('value as 'a).collect().foreach(println)
+
+ // These files can also be registered as tables.
+ parquetFile.registerAsTable("parquetFile")
+ sql("SELECT * FROM parquetFile").collect().foreach(println)
+ }
+}