public class HiveContext extends SQLContext implements Logging
Modifier and Type | Class and Description |
---|---|
protected class |
HiveContext.QueryExecution
Extends QueryExecution with hive specific features.
|
protected class |
HiveContext.SQLSession |
SQLContext.implicits$, SQLContext.SparkPlanner
Constructor and Description |
---|
HiveContext(SparkContext sc) |
Modifier and Type | Method and Description |
---|---|
void |
analyze(java.lang.String tableName)
Analyzes the given table in the current database to generate statistics, which will be
used in query optimizations.
|
protected org.apache.spark.sql.catalyst.analysis.Analyzer |
analyzer() |
protected org.apache.spark.sql.hive.HiveMetastoreCatalog |
catalog() |
protected scala.collection.immutable.Map<java.lang.String,java.lang.String> |
configure()
Overridden by child classes that need to set configuration before the client init.
|
static |
CONVERT_CTAS() |
static |
CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING() |
static |
CONVERT_METASTORE_PARQUET() |
protected boolean |
convertCTAS()
When true, a table created by a Hive CTAS statement (no USING clause) will be
converted to a data source table, using the data source set by spark.sql.sources.default.
|
protected boolean |
convertMetastoreParquet()
When true, enables an experimental feature where metastore tables that use the parquet SerDe
are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
SerDe.
|
protected boolean |
convertMetastoreParquetWithSchemaMerging()
When true, also tries to merge possibly different but compatible Parquet schemas in different
Parquet data files.
|
protected HiveContext.SQLSession |
createSession() |
protected java.lang.String |
dialectClassName() |
protected HiveContext.QueryExecution |
executePlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan plan) |
protected org.apache.spark.sql.hive.client.ClientWrapper |
executionHive()
The copy of the hive client that is used for execution.
|
protected org.apache.spark.sql.catalyst.analysis.FunctionRegistry |
functionRegistry() |
static |
HIVE_EXECUTION_VERSION() |
static |
HIVE_METASTORE_BARRIER_PREFIXES() |
static |
HIVE_METASTORE_JARS() |
static |
HIVE_METASTORE_SHARED_PREFIXES() |
static |
HIVE_METASTORE_VERSION() |
static |
HIVE_THRIFT_SERVER_ASYNC() |
protected org.apache.hadoop.hive.conf.HiveConf |
hiveconf() |
static java.lang.String |
hiveExecutionVersion()
The version of hive used internally by Spark SQL.
|
protected scala.collection.Seq<java.lang.String> |
hiveMetastoreBarrierPrefixes()
A comma separated list of class prefixes that should explicitly be reloaded for each version
of Hive that Spark SQL is communicating with.
|
protected java.lang.String |
hiveMetastoreJars()
The location of the jars that should be used to instantiate the HiveMetastoreClient.
|
protected scala.collection.Seq<java.lang.String> |
hiveMetastoreSharedPrefixes()
A comma separated list of class prefixes that should be loaded using the classloader that
is shared between Spark SQL and a specific version of Hive.
|
protected java.lang.String |
hiveMetastoreVersion()
The version of the hive client that will be used to communicate with the metastore.
|
protected boolean |
hiveThriftServerAsync() |
protected void |
invalidateTable(java.lang.String tableName) |
protected org.apache.spark.sql.hive.client.ClientInterface |
metadataHive()
The copy of the Hive client that is used to retrieve metadata from the Hive MetaStore.
|
static scala.collection.immutable.Map<java.lang.String,java.lang.String> |
newTemporaryConfiguration()
Constructs a configuration for hive, where the metastore is located in a temp directory.
|
protected org.apache.spark.sql.catalyst.plans.logical.LogicalPlan |
parseSql(java.lang.String sql) |
protected SQLContext.SparkPlanner |
planner() |
protected static scala.collection.Seq<org.apache.spark.sql.types.AtomicType> |
primitiveTypes() |
void |
refreshTable(java.lang.String tableName)
Invalidate and refresh all the cached the metadata of the given table.
|
protected scala.collection.Seq<java.lang.String> |
runSqlHive(java.lang.String sql) |
void |
setConf(java.lang.String key,
java.lang.String value)
Set the given Spark SQL configuration property.
|
protected org.apache.hadoop.hive.ql.parse.VariableSubstitution |
substitutor() |
protected static java.lang.String |
toHiveString(scala.Tuple2<java.lang.Object,DataType> a) |
protected static java.lang.String |
toHiveStructString(scala.Tuple2<java.lang.Object,DataType> a)
Hive outputs fields of structs slightly differently than top level attributes.
|
applySchema, applySchema, applySchema, applySchema, applySchemaToPythonRDD, applySchemaToPythonRDD, baseRelationToDataFrame, cacheManager, cacheTable, clearCache, conf, createDataFrame, createDataFrame, createDataFrame, createDataFrame, createDataFrame, createDataFrame, createExternalTable, createExternalTable, createExternalTable, createExternalTable, createExternalTable, createExternalTable, currentSession, ddlParser, defaultSession, detachSession, dropTempTable, emptyDataFrame, emptyResult, executeSql, experimental, getAllConfs, getConf, getConf, getOrCreate, getSchema, getSQLDialect, implicits, isCached, jdbc, jdbc, jdbc, jsonFile, jsonFile, jsonFile, jsonRDD, jsonRDD, jsonRDD, jsonRDD, jsonRDD, jsonRDD, listener, load, load, load, load, load, load, openSession, optimizer, parquetFile, parquetFile, parseDataType, prepareForExecution, range, range, range, read, setConf, setSession, sparkContext, sql, sqlParser, table, tableNames, tableNames, tables, tables, tlSession, udf, uncacheTable
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
initializeIfNecessary, initializeLogging, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning
public HiveContext(SparkContext sc)
public static java.lang.String hiveExecutionVersion()
public staticHIVE_METASTORE_VERSION()
public staticHIVE_EXECUTION_VERSION()
public staticHIVE_METASTORE_JARS()
public staticCONVERT_METASTORE_PARQUET()
public staticCONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING()
public staticCONVERT_CTAS()
public staticHIVE_METASTORE_SHARED_PREFIXES()
public staticHIVE_METASTORE_BARRIER_PREFIXES()
public staticHIVE_THRIFT_SERVER_ASYNC()
public static scala.collection.immutable.Map<java.lang.String,java.lang.String> newTemporaryConfiguration()
protected static scala.collection.Seq<org.apache.spark.sql.types.AtomicType> primitiveTypes()
protected static java.lang.String toHiveString(scala.Tuple2<java.lang.Object,DataType> a)
protected static java.lang.String toHiveStructString(scala.Tuple2<java.lang.Object,DataType> a)
protected boolean convertMetastoreParquet()
protected boolean convertMetastoreParquetWithSchemaMerging()
This configuration is only effective when "spark.sql.hive.convertMetastoreParquet" is true.
protected boolean convertCTAS()
protected java.lang.String hiveMetastoreVersion()
protected java.lang.String hiveMetastoreJars()
protected scala.collection.Seq<java.lang.String> hiveMetastoreSharedPrefixes()
protected scala.collection.Seq<java.lang.String> hiveMetastoreBarrierPrefixes()
protected boolean hiveThriftServerAsync()
protected org.apache.hadoop.hive.ql.parse.VariableSubstitution substitutor()
protected org.apache.spark.sql.hive.client.ClientWrapper executionHive()
protected org.apache.spark.sql.hive.client.ClientInterface metadataHive()
protected org.apache.spark.sql.catalyst.plans.logical.LogicalPlan parseSql(java.lang.String sql)
parseSql
in class SQLContext
protected HiveContext.QueryExecution executePlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan plan)
executePlan
in class SQLContext
public void refreshTable(java.lang.String tableName)
tableName
- (undocumented)protected void invalidateTable(java.lang.String tableName)
public void analyze(java.lang.String tableName)
Right now, it only supports Hive tables and it only updates the size of a Hive table in the Hive metastore.
tableName
- (undocumented)protected org.apache.hadoop.hive.conf.HiveConf hiveconf()
public void setConf(java.lang.String key, java.lang.String value)
SQLContext
setConf
in class SQLContext
key
- (undocumented)value
- (undocumented)protected org.apache.spark.sql.hive.HiveMetastoreCatalog catalog()
catalog
in class SQLContext
protected org.apache.spark.sql.catalyst.analysis.FunctionRegistry functionRegistry()
functionRegistry
in class SQLContext
protected org.apache.spark.sql.catalyst.analysis.Analyzer analyzer()
analyzer
in class SQLContext
protected HiveContext.SQLSession createSession()
createSession
in class SQLContext
protected scala.collection.immutable.Map<java.lang.String,java.lang.String> configure()
protected java.lang.String dialectClassName()
dialectClassName
in class SQLContext
protected scala.collection.Seq<java.lang.String> runSqlHive(java.lang.String sql)
protected SQLContext.SparkPlanner planner()
planner
in class SQLContext