# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # For this example, we shall use the "flights" dataset # The dataset consists of every flight departing Houston in 2011. # The data set is made up of 227,496 rows x 14 columns. # To run this example use # ./bin/sparkR --packages com.databricks:spark-csv_2.10:1.0.3 # examples/src/main/r/data-manipulation.R # Load SparkR library into your R session library(SparkR) args <- commandArgs(trailing = TRUE) if (length(args) != 1) { print("Usage: data-manipulation.R % summarize(avg(flightsDF$dep_delay), avg(flightsDF$arr_delay)) -> dailyDelayDF # Print the computed data frame head(dailyDelayDF) } # Stop the SparkContext now sparkR.stop()