diff options
Diffstat (limited to 'examples/src/main')
4 files changed, 638 insertions, 0 deletions
diff --git a/examples/src/main/resources/kv1.txt b/examples/src/main/resources/kv1.txt new file mode 100644 index 0000000000..9825414ecf --- /dev/null +++ b/examples/src/main/resources/kv1.txt @@ -0,0 +1,500 @@ +238val_238 +86val_86 +311val_311 +27val_27 +165val_165 +409val_409 +255val_255 +278val_278 +98val_98 +484val_484 +265val_265 +193val_193 +401val_401 +150val_150 +273val_273 +224val_224 +369val_369 +66val_66 +128val_128 +213val_213 +146val_146 +406val_406 +429val_429 +374val_374 +152val_152 +469val_469 +145val_145 +495val_495 +37val_37 +327val_327 +281val_281 +277val_277 +209val_209 +15val_15 +82val_82 +403val_403 +166val_166 +417val_417 +430val_430 +252val_252 +292val_292 +219val_219 +287val_287 +153val_153 +193val_193 +338val_338 +446val_446 +459val_459 +394val_394 +237val_237 +482val_482 +174val_174 +413val_413 +494val_494 +207val_207 +199val_199 +466val_466 +208val_208 +174val_174 +399val_399 +396val_396 +247val_247 +417val_417 +489val_489 +162val_162 +377val_377 +397val_397 +309val_309 +365val_365 +266val_266 +439val_439 +342val_342 +367val_367 +325val_325 +167val_167 +195val_195 +475val_475 +17val_17 +113val_113 +155val_155 +203val_203 +339val_339 +0val_0 +455val_455 +128val_128 +311val_311 +316val_316 +57val_57 +302val_302 +205val_205 +149val_149 +438val_438 +345val_345 +129val_129 +170val_170 +20val_20 +489val_489 +157val_157 +378val_378 +221val_221 +92val_92 +111val_111 +47val_47 +72val_72 +4val_4 +280val_280 +35val_35 +427val_427 +277val_277 +208val_208 +356val_356 +399val_399 +169val_169 +382val_382 +498val_498 +125val_125 +386val_386 +437val_437 +469val_469 +192val_192 +286val_286 +187val_187 +176val_176 +54val_54 +459val_459 +51val_51 +138val_138 +103val_103 +239val_239 +213val_213 +216val_216 +430val_430 +278val_278 +176val_176 +289val_289 +221val_221 +65val_65 +318val_318 +332val_332 +311val_311 +275val_275 +137val_137 +241val_241 +83val_83 +333val_333 +180val_180 +284val_284 +12val_12 +230val_230 +181val_181 +67val_67 +260val_260 +404val_404 +384val_384 +489val_489 +353val_353 +373val_373 +272val_272 +138val_138 +217val_217 +84val_84 +348val_348 +466val_466 +58val_58 +8val_8 +411val_411 +230val_230 +208val_208 +348val_348 +24val_24 +463val_463 +431val_431 +179val_179 +172val_172 +42val_42 +129val_129 +158val_158 +119val_119 +496val_496 +0val_0 +322val_322 +197val_197 +468val_468 +393val_393 +454val_454 +100val_100 +298val_298 +199val_199 +191val_191 +418val_418 +96val_96 +26val_26 +165val_165 +327val_327 +230val_230 +205val_205 +120val_120 +131val_131 +51val_51 +404val_404 +43val_43 +436val_436 +156val_156 +469val_469 +468val_468 +308val_308 +95val_95 +196val_196 +288val_288 +481val_481 +457val_457 +98val_98 +282val_282 +197val_197 +187val_187 +318val_318 +318val_318 +409val_409 +470val_470 +137val_137 +369val_369 +316val_316 +169val_169 +413val_413 +85val_85 +77val_77 +0val_0 +490val_490 +87val_87 +364val_364 +179val_179 +118val_118 +134val_134 +395val_395 +282val_282 +138val_138 +238val_238 +419val_419 +15val_15 +118val_118 +72val_72 +90val_90 +307val_307 +19val_19 +435val_435 +10val_10 +277val_277 +273val_273 +306val_306 +224val_224 +309val_309 +389val_389 +327val_327 +242val_242 +369val_369 +392val_392 +272val_272 +331val_331 +401val_401 +242val_242 +452val_452 +177val_177 +226val_226 +5val_5 +497val_497 +402val_402 +396val_396 +317val_317 +395val_395 +58val_58 +35val_35 +336val_336 +95val_95 +11val_11 +168val_168 +34val_34 +229val_229 +233val_233 +143val_143 +472val_472 +322val_322 +498val_498 +160val_160 +195val_195 +42val_42 +321val_321 +430val_430 +119val_119 +489val_489 +458val_458 +78val_78 +76val_76 +41val_41 +223val_223 +492val_492 +149val_149 +449val_449 +218val_218 +228val_228 +138val_138 +453val_453 +30val_30 +209val_209 +64val_64 +468val_468 +76val_76 +74val_74 +342val_342 +69val_69 +230val_230 +33val_33 +368val_368 +103val_103 +296val_296 +113val_113 +216val_216 +367val_367 +344val_344 +167val_167 +274val_274 +219val_219 +239val_239 +485val_485 +116val_116 +223val_223 +256val_256 +263val_263 +70val_70 +487val_487 +480val_480 +401val_401 +288val_288 +191val_191 +5val_5 +244val_244 +438val_438 +128val_128 +467val_467 +432val_432 +202val_202 +316val_316 +229val_229 +469val_469 +463val_463 +280val_280 +2val_2 +35val_35 +283val_283 +331val_331 +235val_235 +80val_80 +44val_44 +193val_193 +321val_321 +335val_335 +104val_104 +466val_466 +366val_366 +175val_175 +403val_403 +483val_483 +53val_53 +105val_105 +257val_257 +406val_406 +409val_409 +190val_190 +406val_406 +401val_401 +114val_114 +258val_258 +90val_90 +203val_203 +262val_262 +348val_348 +424val_424 +12val_12 +396val_396 +201val_201 +217val_217 +164val_164 +431val_431 +454val_454 +478val_478 +298val_298 +125val_125 +431val_431 +164val_164 +424val_424 +187val_187 +382val_382 +5val_5 +70val_70 +397val_397 +480val_480 +291val_291 +24val_24 +351val_351 +255val_255 +104val_104 +70val_70 +163val_163 +438val_438 +119val_119 +414val_414 +200val_200 +491val_491 +237val_237 +439val_439 +360val_360 +248val_248 +479val_479 +305val_305 +417val_417 +199val_199 +444val_444 +120val_120 +429val_429 +169val_169 +443val_443 +323val_323 +325val_325 +277val_277 +230val_230 +478val_478 +178val_178 +468val_468 +310val_310 +317val_317 +333val_333 +493val_493 +460val_460 +207val_207 +249val_249 +265val_265 +480val_480 +83val_83 +136val_136 +353val_353 +172val_172 +214val_214 +462val_462 +233val_233 +406val_406 +133val_133 +175val_175 +189val_189 +454val_454 +375val_375 +401val_401 +421val_421 +407val_407 +384val_384 +256val_256 +26val_26 +134val_134 +67val_67 +384val_384 +379val_379 +18val_18 +462val_462 +492val_492 +100val_100 +298val_298 +9val_9 +341val_341 +498val_498 +146val_146 +458val_458 +362val_362 +186val_186 +285val_285 +348val_348 +167val_167 +18val_18 +273val_273 +183val_183 +281val_281 +344val_344 +97val_97 +469val_469 +315val_315 +84val_84 +28val_28 +37val_37 +448val_448 +152val_152 +348val_348 +307val_307 +194val_194 +414val_414 +477val_477 +222val_222 +126val_126 +90val_90 +169val_169 +403val_403 +400val_400 +200val_200 +97val_97 diff --git a/examples/src/main/resources/people.txt b/examples/src/main/resources/people.txt new file mode 100644 index 0000000000..3bcace4a44 --- /dev/null +++ b/examples/src/main/resources/people.txt @@ -0,0 +1,3 @@ +Michael, 29 +Andy, 30 +Justin, 19 diff --git a/examples/src/main/scala/org/apache/spark/sql/examples/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/sql/examples/HiveFromSpark.scala new file mode 100644 index 0000000000..abcc1f04d4 --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/sql/examples/HiveFromSpark.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.examples + +import org.apache.spark.SparkContext +import org.apache.spark.sql._ +import org.apache.spark.sql.hive.LocalHiveContext + +object HiveFromSpark { + case class Record(key: Int, value: String) + + def main(args: Array[String]) { + val sc = new SparkContext("local", "HiveFromSpark") + + // A local hive context creates an instance of the Hive Metastore in process, storing the + // the warehouse data in the current directory. This location can be overridden by + // specifying a second parameter to the constructor. + val hiveContext = new LocalHiveContext(sc) + import hiveContext._ + + sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") + sql("LOAD DATA LOCAL INPATH 'src/main/resources/kv1.txt' INTO TABLE src") + + // Queries are expressed in HiveQL + println("Result of 'SELECT *': ") + sql("SELECT * FROM src").collect.foreach(println) + + // Aggregation queries are also supported. + val count = sql("SELECT COUNT(*) FROM src").collect().head.getInt(0) + println(s"COUNT(*): $count") + + // The results of SQL queries are themselves RDDs and support all normal RDD functions. The + // items in the RDD are of type Row, which allows you to access each column by ordinal. + val rddFromSql = sql("SELECT key, value FROM src WHERE key < 10 ORDER BY key") + + println("Result of RDD.map:") + val rddAsStrings = rddFromSql.map { + case Row(key: Int, value: String) => s"Key: $key, Value: $value" + } + + // You can also register RDDs as temporary tables within a HiveContext. + val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i"))) + rdd.registerAsTable("records") + + // Queries can then join RDD data with data stored in Hive. + println("Result of SELECT *:") + sql("SELECT * FROM records r JOIN src s ON r.key = s.key").collect().foreach(println) + } +} diff --git a/examples/src/main/scala/org/apache/spark/sql/examples/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/sql/examples/RDDRelation.scala new file mode 100644 index 0000000000..8210ad977f --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/sql/examples/RDDRelation.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.examples + +import org.apache.spark.SparkContext +import org.apache.spark.sql.SQLContext + +// One method for defining the schema of an RDD is to make a case class with the desired column +// names and types. +case class Record(key: Int, value: String) + +object RDDRelation { + def main(args: Array[String]) { + val sc = new SparkContext("local", "RDDRelation") + val sqlContext = new SQLContext(sc) + + // Importing the SQL context gives access to all the SQL functions and implicit conversions. + import sqlContext._ + + val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i"))) + // Any RDD containing case classes can be registered as a table. The schema of the table is + // automatically inferred using scala reflection. + rdd.registerAsTable("records") + + // Once tables have been registered, you can run SQL queries over them. + println("Result of SELECT *:") + sql("SELECT * FROM records").collect().foreach(println) + + // Aggregation queries are also supported. + val count = sql("SELECT COUNT(*) FROM records").collect().head.getInt(0) + println(s"COUNT(*): $count") + + // The results of SQL queries are themselves RDDs and support all normal RDD functions. The + // items in the RDD are of type Row, which allows you to access each column by ordinal. + val rddFromSql = sql("SELECT key, value FROM records WHERE key < 10") + + println("Result of RDD.map:") + rddFromSql.map(row => s"Key: ${row(0)}, Value: ${row(1)}").collect.foreach(println) + + // Queries can also be written using a LINQ-like Scala DSL. + rdd.where('key === 1).orderBy('value.asc).select('key).collect().foreach(println) + + // Write out an RDD as a parquet file. + rdd.saveAsParquetFile("pair.parquet") + + // Read in parquet file. Parquet files are self-describing so the schmema is preserved. + val parquetFile = sqlContext.parquetFile("pair.parquet") + + // Queries can be run using the DSL on parequet files just like the original RDD. + parquetFile.where('key === 1).select('value as 'a).collect().foreach(println) + + // These files can also be registered as tables. + parquetFile.registerAsTable("parquetFile") + sql("SELECT * FROM parquetFile").collect().foreach(println) + } +} |