From 6b1a6180e7bd45b0a0ec47de9f7c7956543f4dfa Mon Sep 17 00:00:00 2001 From: Zheng RuiFeng Date: Thu, 26 May 2016 22:39:14 -0700 Subject: [MINOR] Fix Typos 'a -> an' ## What changes were proposed in this pull request? `a` -> `an` I use regex to generate potential error lines: `grep -in ' a [aeiou]' mllib/src/main/scala/org/apache/spark/ml/*/*scala` and review them line by line. ## How was this patch tested? local build `lint-java` checking Author: Zheng RuiFeng Closes #13317 from zhengruifeng/a_an. --- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 4 ++-- .../org/apache/spark/sql/catalyst/catalog/functionResources.scala | 2 +- .../org/apache/spark/sql/catalyst/expressions/aggregate/First.scala | 2 +- .../org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala | 2 +- .../spark/sql/catalyst/expressions/aggregate/PivotFirst.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/expressions/literals.scala | 2 +- .../main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/expressions/subquery.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala | 2 +- .../spark/sql/catalyst/plans/logical/basicLogicalOperators.scala | 4 ++-- .../main/scala/org/apache/spark/sql/catalyst/plans/package.scala | 2 +- .../org/apache/spark/sql/catalyst/plans/physical/partitioning.scala | 2 +- .../src/main/scala/org/apache/spark/sql/types/Decimal.scala | 2 +- .../src/main/scala/org/apache/spark/sql/types/DecimalType.scala | 2 +- sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 2 +- .../src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala | 2 +- .../main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala | 2 +- .../org/apache/spark/sql/execution/WholeStageCodegenExec.scala | 2 +- .../spark/sql/execution/aggregate/TungstenAggregationIterator.scala | 6 +++--- .../org/apache/spark/sql/execution/basicPhysicalOperators.scala | 6 +++--- .../apache/spark/sql/execution/datasources/WriterContainer.scala | 2 +- .../scala/org/apache/spark/sql/execution/streaming/Offset.scala | 2 +- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 +- .../src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala | 2 +- .../org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala | 2 +- sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 2 +- .../org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala | 4 ++-- .../main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala | 4 ++-- 31 files changed, 39 insertions(+), 39 deletions(-) (limited to 'sql') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index cd242d78a4..c5f221d783 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -215,7 +215,7 @@ trait CheckAnalysis extends PredicateHelper { if (!RowOrdering.isOrderable(expr.dataType)) { failAnalysis( s"expression ${expr.sql} cannot be used as a grouping expression " + - s"because its data type ${expr.dataType.simpleString} is not a orderable " + + s"because its data type ${expr.dataType.simpleString} is not an orderable " + s"data type.") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala index 79c3528a52..d4350598f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala @@ -37,7 +37,7 @@ object TypeCheckResult { /** * Represents the failing result of `Expression.checkInputDataTypes`, - * with a error message to show the reason of failure. + * with an error message to show the reason of failure. */ case class TypeCheckFailure(message: String) extends TypeCheckResult { def isSuccess: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 91bdcc3b09..387e555254 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -178,8 +178,8 @@ object TypeCoercion { q transformExpressions { case a: AttributeReference => inputMap.get(a.exprId) match { - // This can happen when a Attribute reference is born in a non-leaf node, for example - // due to a call to an external script like in the Transform operator. + // This can happen when an Attribute reference is born in a non-leaf node, for + // example due to a call to an external script like in the Transform operator. // TODO: Perhaps those should actually be aliases? case None => a // Leave the same if the dataTypes match. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala index 7da1fe93c6..c66d08d2cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala @@ -26,7 +26,7 @@ object JarResource extends FunctionResourceType("jar") object FileResource extends FunctionResourceType("file") -// We do not allow users to specify a archive because it is YARN specific. +// We do not allow users to specify an archive because it is YARN specific. // When loading resources, we will throw an exception and ask users to // use --archive with spark submit. object ArchiveResource extends FunctionResourceType("archive") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala index b8ab0364dd..946b3d446a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.types._ /** * Returns the first value of `child` for a group of rows. If the first value of `child` - * is `null`, it returns `null` (respecting nulls). Even if [[First]] is used on a already + * is `null`, it returns `null` (respecting nulls). Even if [[First]] is used on an already * sorted column, if we do partial aggregation and final aggregation (when mergeExpression * is used) its result will not be deterministic (unless the input table is sorted and has * a single partition, and we use a single reducer to do the aggregation.). diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala index b05d74b49b..53b4b761ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.types._ /** * Returns the last value of `child` for a group of rows. If the last value of `child` - * is `null`, it returns `null` (respecting nulls). Even if [[Last]] is used on a already + * is `null`, it returns `null` (respecting nulls). Even if [[Last]] is used on an already * sorted column, if we do partial aggregation and final aggregation (when mergeExpression * is used) its result will not be deterministic (unless the input table is sorted and has * a single partition, and we use a single reducer to do the aggregation.). diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala index 9ead571c53..16c03c500a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala @@ -51,7 +51,7 @@ object PivotFirst { } /** - * PivotFirst is a aggregate function used in the second phase of a two phase pivot to do the + * PivotFirst is an aggregate function used in the second phase of a two phase pivot to do the * required rearrangement of values into pivoted form. * * For example on an input of diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 7e3683e482..95ed68fbb0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -182,7 +182,7 @@ case class Literal protected (value: Any, dataType: DataType) override protected def jsonFields: List[JField] = { // Turns all kinds of literal values to string in json field, as the type info is hard to - // retain in json format, e.g. {"a": 123} can be a int, or double, or decimal, etc. + // retain in json format, e.g. {"a": 123} can be an int, or double, or decimal, etc. val jsonValue = (value, dataType) match { case (null, _) => JNull case (i: Int, DateType) => JString(DateTimeUtils.toJavaDate(i).toString) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala index 93a8278528..e036982e70 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala @@ -214,7 +214,7 @@ class GenericRowWithSchema(values: Array[Any], override val schema: StructType) } /** - * A internal row implementation that uses an array of objects as the underlying storage. + * An internal row implementation that uses an array of objects as the underlying storage. * Note that, while the array is not copied, and thus could technically be mutated after creation, * this is not allowed. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala index 5001f9a41e..08cb6c0134 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala @@ -129,7 +129,7 @@ object PredicateSubquery { /** * A [[ListQuery]] expression defines the query which we want to search in an IN subquery - * expression. It should and can only be used in conjunction with a IN expression. + * expression. It should and can only be used in conjunction with an IN expression. * * For example (SQL): * {{{ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 5e998d6188..48d70099b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -926,7 +926,7 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { case e @ CaseWhen(branches, _) if branches.headOption.map(_._1) == Some(TrueLiteral) => // If the first branch is a true literal, remove the entire CaseWhen and use the value // from that. Note that CaseWhen.branches should never be empty, and as a result the - // headOption (rather than head) added above is just a extra (and unnecessary) safeguard. + // headOption (rather than head) added above is just an extra (and unnecessary) safeguard. branches.head._2 } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index d042e191a9..d687a85c18 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -59,7 +59,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging { } } - /** Get the builder (visitor) which converts a ParseTree into a AST. */ + /** Get the builder (visitor) which converts a ParseTree into an AST. */ protected def astBuilder: AstBuilder protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 8b7e21b679..898784dab1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -530,7 +530,7 @@ private[sql] object Expand { /** * Apply the all of the GroupExpressions to every input row, hence we will get - * multiple output rows for a input row. + * multiple output rows for an input row. * * @param bitmasks The bitmask set represents the grouping sets * @param groupByAliases The aliased original group by expressions @@ -572,7 +572,7 @@ private[sql] object Expand { /** * Apply a number of projections to every input row, hence we will get multiple output rows for - * a input row. + * an input row. * * @param projections to apply * @param output of all projections. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala index 42bdab42b7..b46f7a6d5a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst /** - * A a collection of common abstractions for query plans as well as + * A collection of common abstractions for query plans as well as * a base logical plan representation. */ package object plans diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala index d449088498..51d78dd123 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala @@ -67,7 +67,7 @@ case class ClusteredDistribution(clustering: Seq[Expression]) extends Distributi case class OrderedDistribution(ordering: Seq[SortOrder]) extends Distribution { require( ordering != Nil, - "The ordering expressions of a OrderedDistribution should not be Nil. " + + "The ordering expressions of an OrderedDistribution should not be Nil. " + "An AllTuples should be used to represent a distribution that only has " + "a single partition.") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 31604bad0f..52e021070e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -366,7 +366,7 @@ object Decimal { val ROUND_CEILING = BigDecimal.RoundingMode.CEILING val ROUND_FLOOR = BigDecimal.RoundingMode.FLOOR - /** Maximum number of decimal digits a Int can represent */ + /** Maximum number of decimal digits an Int can represent */ val MAX_INT_DIGITS = 9 /** Maximum number of decimal digits a Long can represent */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala index 6b7e3714e0..6500875f95 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala @@ -152,7 +152,7 @@ object DecimalType extends AbstractDataType { } /** - * Returns if dt is a DecimalType that fits inside a int + * Returns if dt is a DecimalType that fits inside an int */ def is32BitDecimalType(dt: DataType): Boolean = { dt match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 2e85e36767..c7b887ecd4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -529,7 +529,7 @@ final class DataFrameWriter private[sql](df: DataFrame) { } /** - * Saves the content of the [[DataFrame]] to a external database table via JDBC. In the case the + * Saves the content of the [[DataFrame]] to an external database table via JDBC. In the case the * table already exists in the external database, behavior of this function depends on the * save mode, specified by the `mode` function (default to throwing an exception). * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala index 1be41ffc07..4c046f7bdc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.execution.metric.SQLMetrics /** * Apply all of the GroupExpressions to every input row, hence we will get - * multiple output rows for a input row. + * multiple output rows for an input row. * @param projections The group of expressions, all of the group expressions should * output the same schema specified bye the parameter `output` * @param output The output Schema diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala index e81cd28ea3..5f0c264416 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala @@ -95,7 +95,7 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A * interfaces / internals. * * This RDD takes a [[ShuffleDependency]] (`dependency`), - * and a optional array of partition start indices as input arguments + * and an optional array of partition start indices as input arguments * (`specifiedPartitionStartIndices`). * * The `dependency` has the parent RDD of this RDD, which represents the dataset before shuffle diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index 908e22de73..2aec931894 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -445,7 +445,7 @@ case class CollapseCodegenStages(conf: SQLConf) extends Rule[SparkPlan] { } /** - * Inserts a InputAdapter on top of those that do not support codegen. + * Inserts an InputAdapter on top of those that do not support codegen. */ private def insertInputAdapter(plan: SparkPlan): SparkPlan = plan match { case j @ SortMergeJoinExec(_, _, _, _, left, right) if j.supportCodegen => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala index 243aa15deb..4b8adf5230 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala @@ -41,7 +41,7 @@ import org.apache.spark.unsafe.KVIterator * - Step 0: Do hash-based aggregation. * - Step 1: Sort all entries of the hash map based on values of grouping expressions and * spill them to disk. - * - Step 2: Create a external sorter based on the spilled sorted map entries and reset the map. + * - Step 2: Create an external sorter based on the spilled sorted map entries and reset the map. * - Step 3: Get a sorted [[KVIterator]] from the external sorter. * - Step 4: Repeat step 0 until no more input. * - Step 5: Initialize sort-based aggregation on the sorted iterator. @@ -434,12 +434,12 @@ class TungstenAggregationIterator( /////////////////////////////////////////////////////////////////////////// /** - * Generate a output row when there is no input and there is no grouping expression. + * Generate an output row when there is no input and there is no grouping expression. */ def outputForEmptyGroupingKeyWithoutInput(): UnsafeRow = { if (groupingExpressions.isEmpty) { sortBasedAggregationBuffer.copyFrom(initialAggregationBuffer) - // We create a output row and copy it. So, we can free the map. + // We create an output row and copy it. So, we can free the map. val resultCopy = generateOutput(UnsafeRow.createFromByteArray(0, 0), sortBasedAggregationBuffer).copy() hashMap.free() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 89bde6ad73..185c79f899 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -142,9 +142,9 @@ case class FilterExec(condition: Expression, child: SparkPlan) // To generate the predicates we will follow this algorithm. // For each predicate that is not IsNotNull, we will generate them one by one loading attributes - // as necessary. For each of both attributes, if there is a IsNotNull predicate we will generate - // that check *before* the predicate. After all of these predicates, we will generate the - // remaining IsNotNull checks that were not part of other predicates. + // as necessary. For each of both attributes, if there is an IsNotNull predicate we will + // generate that check *before* the predicate. After all of these predicates, we will generate + // the remaining IsNotNull checks that were not part of other predicates. // This has the property of not doing redundant IsNotNull checks and taking better advantage of // short-circuiting, not loading attributes until they are needed. // This is very perf sensitive. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala index 3b064a5bc4..61dcbebd64 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala @@ -176,7 +176,7 @@ private[sql] abstract class BaseWriterContainer( val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext]) ctor.newInstance(new Path(outputPath), context) } else { - // The specified output committer is just a OutputCommitter. + // The specified output committer is just an OutputCommitter. // So, we will use the no-argument constructor. val ctor = clazz.getDeclaredConstructor() ctor.newInstance() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala index 0f5d6445b1..2cc012840d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.execution.streaming /** - * A offset is a monotonically increasing metric used to track progress in the computation of a + * An offset is a monotonically increasing metric used to track progress in the computation of a * stream. An [[Offset]] must be comparable, and the result of `compareTo` must be consistent * with `equals` and `hashcode`. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 0b490fe71c..d89e98645b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1996,7 +1996,7 @@ object functions { /** * Computes the numeric value of the first character of the string column, and returns the - * result as a int column. + * result as an int column. * * @group string_funcs * @since 1.5.0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index f3f36efda5..ceb6862275 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -356,7 +356,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName) sessionCatalog.refreshTable(tableIdent) - // If this table is cached as a InMemoryRelation, drop the original + // If this table is cached as an InMemoryRelation, drop the original // cached version and make the new version cached lazily. val logicalPlan = sparkSession.sessionState.catalog.lookupRelation(tableIdent) // Use lookupCachedData directly since RefreshTable also takes databaseName. diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala index cef5912c62..de70fdc14e 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala @@ -157,7 +157,7 @@ object HiveThriftServer2 extends Logging { /** - * A inner sparkListener called in sc.stop to clean up the HiveThriftServer2 + * An inner sparkListener called in sc.stop to clean up the HiveThriftServer2 */ private[thriftserver] class HiveThriftServer2Listener( val server: HiveServer2, diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index d033b05d48..88f4a2d2b2 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -391,7 +391,7 @@ private[spark] object HiveUtils extends Logging { // Remote means that the metastore server is running in its own process. // When the mode is remote, configurations like "javax.jdo.option.ConnectionURL" will not be // used (because they are used by remote metastore server that talks to the database). - // Because execution Hive should always connects to a embedded derby metastore. + // Because execution Hive should always connects to an embedded derby metastore. // We have to remove the value of hive.metastore.uris. So, the execution Hive client connects // to the actual embedded derby metastore instead of the remote metastore. // You can search HiveConf.ConfVars.METASTOREURIS in the code of HiveConf (in Hive's repo). diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 9e8ff9317c..b3896484da 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -211,7 +211,7 @@ case class InsertIntoHiveTable( val warningMessage = s"$outputCommitterClass may be an output committer that writes data directly to " + "the final location. Because speculation is enabled, this output committer may " + - "cause data loss (see the case in SPARK-10063). If possible, please use a output " + + "cause data loss (see the case in SPARK-10063). If possible, please use an output " + "committer that does not have this behavior (e.g. FileOutputCommitter)." logWarning(warningMessage) } @@ -250,7 +250,7 @@ case class InsertIntoHiveTable( orderedPartitionSpec.put(entry.getName, partitionSpec.getOrElse(entry.getName, "")) } - // inheritTableSpecs is set to true. It should be set to false for a IMPORT query + // inheritTableSpecs is set to true. It should be set to false for an IMPORT query // which is currently considered as a Hive native command. val inheritTableSpecs = true // TODO: Correctly set isSkewedStoreAsSubdir. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala index 13d2bed606..0589c8ece3 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.types.StructType private[orc] object OrcFileOperator extends Logging { /** - * Retrieves a ORC file reader from a given path. The path can point to either a directory or a + * Retrieves an ORC file reader from a given path. The path can point to either a directory or a * single ORC file. If it points to an directory, it picks any non-empty ORC file within that * directory. * @@ -42,7 +42,7 @@ private[orc] object OrcFileOperator extends Logging { * ORC file if the file contains zero rows. This is OK for Hive since the schema of the * table is managed by metastore. But this becomes a problem when reading ORC files * directly from HDFS via Spark SQL, because we have to discover the schema from raw ORC - * files. So this method always tries to find a ORC file whose schema is non-empty, and + * files. So this method always tries to find an ORC file whose schema is non-empty, and * create the result reader from that file. If no such file is found, it returns `None`. * @todo Needs to consider all files when schema evolution is taken into account. */ -- cgit v1.2.3