diff options
author | Davies Liu <davies@databricks.com> | 2015-07-23 18:31:13 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-23 18:31:13 -0700 |
commit | 8a94eb23d53e291441e3144a1b800fe054457040 (patch) | |
tree | d9594f55c4d07d8e8bd1da4226db5186269c9f93 /python/pyspark/sql | |
parent | bebe3f7b45f7b0a96f20d5af9b80633fd40cff06 (diff) | |
download | spark-8a94eb23d53e291441e3144a1b800fe054457040.tar.gz spark-8a94eb23d53e291441e3144a1b800fe054457040.tar.bz2 spark-8a94eb23d53e291441e3144a1b800fe054457040.zip |
[SPARK-9069] [SPARK-9264] [SQL] remove unlimited precision support for DecimalType
Romove Decimal.Unlimited (change to support precision up to 38, to match with Hive and other databases).
In order to keep backward source compatibility, Decimal.Unlimited is still there, but change to Decimal(38, 18).
If no precision and scale is provide, it's Decimal(10, 0) as before.
Author: Davies Liu <davies@databricks.com>
Closes #7605 from davies/decimal_unlimited and squashes the following commits:
aa3f115 [Davies Liu] fix tests and style
fb0d20d [Davies Liu] address comments
bfaae35 [Davies Liu] fix style
df93657 [Davies Liu] address comments and clean up
06727fd [Davies Liu] Merge branch 'master' of github.com:apache/spark into decimal_unlimited
4c28969 [Davies Liu] fix tests
8d783cc [Davies Liu] fix tests
788631c [Davies Liu] fix double with decimal in Union/except
1779bde [Davies Liu] fix scala style
c9c7c78 [Davies Liu] remove Decimal.Unlimited
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/types.py | 36 |
1 files changed, 21 insertions, 15 deletions
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 10ad89ea14..b97d50c945 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -194,30 +194,33 @@ class TimestampType(AtomicType): class DecimalType(FractionalType): """Decimal (decimal.Decimal) data type. + + The DecimalType must have fixed precision (the maximum total number of digits) + and scale (the number of digits on the right of dot). For example, (5, 2) can + support the value from [-999.99 to 999.99]. + + The precision can be up to 38, the scale must less or equal to precision. + + When create a DecimalType, the default precision and scale is (10, 0). When infer + schema from decimal.Decimal objects, it will be DecimalType(38, 18). + + :param precision: the maximum total number of digits (default: 10) + :param scale: the number of digits on right side of dot. (default: 0) """ - def __init__(self, precision=None, scale=None): + def __init__(self, precision=10, scale=0): self.precision = precision self.scale = scale - self.hasPrecisionInfo = precision is not None + self.hasPrecisionInfo = True # this is public API def simpleString(self): - if self.hasPrecisionInfo: - return "decimal(%d,%d)" % (self.precision, self.scale) - else: - return "decimal(10,0)" + return "decimal(%d,%d)" % (self.precision, self.scale) def jsonValue(self): - if self.hasPrecisionInfo: - return "decimal(%d,%d)" % (self.precision, self.scale) - else: - return "decimal" + return "decimal(%d,%d)" % (self.precision, self.scale) def __repr__(self): - if self.hasPrecisionInfo: - return "DecimalType(%d,%d)" % (self.precision, self.scale) - else: - return "DecimalType()" + return "DecimalType(%d,%d)" % (self.precision, self.scale) class DoubleType(FractionalType): @@ -761,7 +764,10 @@ def _infer_type(obj): return obj.__UDT__ dataType = _type_mappings.get(type(obj)) - if dataType is not None: + if dataType is DecimalType: + # the precision and scale of `obj` may be different from row to row. + return DecimalType(38, 18) + elif dataType is not None: return dataType() if isinstance(obj, dict): |