aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorTakuya UESHIN <ueshin@databricks.com>2017-03-14 13:57:23 -0700
committerXiao Li <gatorsmile@gmail.com>2017-03-14 13:57:23 -0700
commit7ded39c223429265b23940ca8244660dbee8320c (patch)
treee52bc19910347af47ecf24deaae2431866d043b4 /sql/catalyst
parent6eac96823c7b244773bd810812b369e336a65837 (diff)
downloadspark-7ded39c223429265b23940ca8244660dbee8320c.tar.gz
spark-7ded39c223429265b23940ca8244660dbee8320c.tar.bz2
spark-7ded39c223429265b23940ca8244660dbee8320c.zip
[SPARK-19817][SQL] Make it clear that `timeZone` option is a general option in DataFrameReader/Writer.
## What changes were proposed in this pull request? As timezone setting can also affect partition values, it works for all formats, we should make it clear. ## How was this patch tested? Existing tests. Author: Takuya UESHIN <ueshin@databricks.com> Closes #17281 from ueshin/issues/SPARK-19817.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala5
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala9
4 files changed, 13 insertions, 6 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index e3631b0c07..b862deaf36 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -113,7 +113,8 @@ case class CatalogTablePartition(
*/
def toRow(partitionSchema: StructType, defaultTimeZondId: String): InternalRow = {
val caseInsensitiveProperties = CaseInsensitiveMap(storage.properties)
- val timeZoneId = caseInsensitiveProperties.getOrElse("timeZone", defaultTimeZondId)
+ val timeZoneId = caseInsensitiveProperties.getOrElse(
+ DateTimeUtils.TIMEZONE_OPTION, defaultTimeZondId)
InternalRow.fromSeq(partitionSchema.map { field =>
Cast(Literal(spec(field.name)), field.dataType, Option(timeZoneId)).eval()
})
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 5a91f9c193..5f222ec602 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -23,7 +23,7 @@ import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.commons.lang3.time.FastDateFormat
import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs, ParseModes}
+import org.apache.spark.sql.catalyst.util._
/**
* Options for parsing JSON data into Spark SQL rows.
@@ -69,7 +69,8 @@ private[sql] class JSONOptions(
val columnNameOfCorruptRecord =
parameters.getOrElse("columnNameOfCorruptRecord", defaultColumnNameOfCorruptRecord)
- val timeZone: TimeZone = TimeZone.getTimeZone(parameters.getOrElse("timeZone", defaultTimeZoneId))
+ val timeZone: TimeZone = TimeZone.getTimeZone(
+ parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
// Uses `FastDateFormat` which can be direct replacement for `SimpleDateFormat` and thread-safe.
val dateFormat: FastDateFormat =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 9e1de0fd2f..9b94c1e2b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -60,6 +60,8 @@ object DateTimeUtils {
final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
+ val TIMEZONE_OPTION = "timeZone"
+
def defaultTimeZone(): TimeZone = TimeZone.getDefault()
// Reuse the Calendar object in each thread as it is expensive to create in each method call.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index e3584909dd..19d0c8eb92 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -471,7 +471,8 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(
JsonToStruct(
schema,
- Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", "timeZone" -> tz.getID),
+ Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
+ DateTimeUtils.TIMEZONE_OPTION -> tz.getID),
Literal(jsonData2),
gmtId),
InternalRow(c.getTimeInMillis * 1000L)
@@ -523,14 +524,16 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(
StructToJson(
- Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", "timeZone" -> gmtId.get),
+ Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
+ DateTimeUtils.TIMEZONE_OPTION -> gmtId.get),
struct,
gmtId),
"""{"t":"2016-01-01T00:00:00"}"""
)
checkEvaluation(
StructToJson(
- Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", "timeZone" -> "PST"),
+ Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss",
+ DateTimeUtils.TIMEZONE_OPTION -> "PST"),
struct,
gmtId),
"""{"t":"2015-12-31T16:00:00"}"""