aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
blob: 7d3608033ba598245cbe64f35155282cc1d37f5e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.parser

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._

class DataTypeParserSuite extends SparkFunSuite {

  def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
    test(s"parse ${dataTypeString.replace("\n", "")}") {
      assert(DataTypeParser.parse(dataTypeString) === expectedDataType)
    }
  }

  def unsupported(dataTypeString: String): Unit = {
    test(s"$dataTypeString is not supported") {
      intercept[DataTypeException](DataTypeParser.parse(dataTypeString))
    }
  }

  checkDataType("int", IntegerType)
  checkDataType("integer", IntegerType)
  checkDataType("BooLean", BooleanType)
  checkDataType("tinYint", ByteType)
  checkDataType("smallINT", ShortType)
  checkDataType("INT", IntegerType)
  checkDataType("INTEGER", IntegerType)
  checkDataType("bigint", LongType)
  checkDataType("float", FloatType)
  checkDataType("dOUBle", DoubleType)
  checkDataType("decimal(10, 5)", DecimalType(10, 5))
  checkDataType("decimal", DecimalType.USER_DEFAULT)
  checkDataType("DATE", DateType)
  checkDataType("timestamp", TimestampType)
  checkDataType("string", StringType)
  checkDataType("ChaR(5)", StringType)
  checkDataType("varchAr(20)", StringType)
  checkDataType("cHaR(27)", StringType)
  checkDataType("BINARY", BinaryType)

  checkDataType("array<doublE>", ArrayType(DoubleType, true))
  checkDataType("Array<map<int, tinYint>>", ArrayType(MapType(IntegerType, ByteType, true), true))
  checkDataType(
    "array<struct<tinYint:tinyint>>",
    ArrayType(StructType(StructField("tinYint", ByteType, true) :: Nil), true)
  )
  checkDataType("MAP<int, STRING>", MapType(IntegerType, StringType, true))
  checkDataType("MAp<int, ARRAY<double>>", MapType(IntegerType, ArrayType(DoubleType), true))
  checkDataType(
    "MAP<int, struct<varchar:string>>",
    MapType(IntegerType, StructType(StructField("varchar", StringType, true) :: Nil), true)
  )

  checkDataType(
    "struct<intType: int, ts:timestamp>",
    StructType(
      StructField("intType", IntegerType, true) ::
      StructField("ts", TimestampType, true) :: Nil)
  )
  // It is fine to use the data type string as the column name.
  checkDataType(
    "Struct<int: int, timestamp:timestamp>",
    StructType(
      StructField("int", IntegerType, true) ::
      StructField("timestamp", TimestampType, true) :: Nil)
  )
  checkDataType(
    """
      |struct<
      |  struct:struct<deciMal:DECimal, anotherDecimal:decimAL(5,2)>,
      |  MAP:Map<timestamp, varchar(10)>,
      |  arrAy:Array<double>,
      |  anotherArray:Array<char(9)>>
    """.stripMargin,
    StructType(
      StructField("struct",
        StructType(
          StructField("deciMal", DecimalType.USER_DEFAULT, true) ::
          StructField("anotherDecimal", DecimalType(5, 2), true) :: Nil), true) ::
      StructField("MAP", MapType(TimestampType, StringType), true) ::
      StructField("arrAy", ArrayType(DoubleType, true), true) ::
      StructField("anotherArray", ArrayType(StringType, true), true) :: Nil)
  )
  // A column name can be a reserved word in our DDL parser and SqlParser.
  checkDataType(
    "Struct<TABLE: string, CASE:boolean>",
    StructType(
      StructField("TABLE", StringType, true) ::
      StructField("CASE", BooleanType, true) :: Nil)
  )
  // Use backticks to quote column names having special characters.
  checkDataType(
    "struct<`x+y`:int, `!@#$%^&*()`:string, `1_2.345<>:\"`:varchar(20)>",
    StructType(
      StructField("x+y", IntegerType, true) ::
      StructField("!@#$%^&*()", StringType, true) ::
      StructField("1_2.345<>:\"", StringType, true) :: Nil)
  )
  // Empty struct.
  checkDataType("strUCt<>", StructType(Nil))

  unsupported("it is not a data type")
  unsupported("struct<x+y: int, 1.1:timestamp>")
  unsupported("struct<x: int")
  unsupported("struct<x int, y string>")
  unsupported("struct<`x``y` int>")
}