From d8afd45f8949e0914ce4bd56d832b1158e3c9220 Mon Sep 17 00:00:00 2001 From: lgieron Date: Wed, 2 Mar 2016 15:57:27 +0000 Subject: [SPARK-13515] Make FormatNumber work irrespective of locale. ## What changes were proposed in this pull request? Change in class FormatNumber to make it work irrespective of locale. ## How was this patch tested? Unit tests. Author: lgieron Closes #11396 from lgieron/SPARK-13515_Fix_Format_Number. --- .../sql/catalyst/expressions/stringExpressions.scala | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 4be065b30a..3ee19cc4ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import java.text.DecimalFormat +import java.text.{DecimalFormat, DecimalFormatSymbols} import java.util.{HashMap, Locale, Map => JMap} import org.apache.spark.sql.catalyst.InternalRow @@ -938,8 +938,10 @@ case class FormatNumber(x: Expression, d: Expression) @transient private val pattern: StringBuffer = new StringBuffer() + // SPARK-13515: US Locale configures the DecimalFormat object to use a dot ('.') + // as a decimal separator. @transient - private val numberFormat: DecimalFormat = new DecimalFormat("") + private val numberFormat = new DecimalFormat("", new DecimalFormatSymbols(Locale.US)) override protected def nullSafeEval(xObject: Any, dObject: Any): Any = { val dValue = dObject.asInstanceOf[Int] @@ -962,10 +964,9 @@ case class FormatNumber(x: Expression, d: Expression) pattern.append("0") } } - val dFormat = new DecimalFormat(pattern.toString) lastDValue = dValue - numberFormat.applyPattern(dFormat.toPattern) + numberFormat.applyLocalizedPattern(pattern.toString) } x.dataType match { @@ -992,6 +993,11 @@ case class FormatNumber(x: Expression, d: Expression) val sb = classOf[StringBuffer].getName val df = classOf[DecimalFormat].getName + val dfs = classOf[DecimalFormatSymbols].getName + val l = classOf[Locale].getName + // SPARK-13515: US Locale configures the DecimalFormat object to use a dot ('.') + // as a decimal separator. + val usLocale = "US" val lastDValue = ctx.freshName("lastDValue") val pattern = ctx.freshName("pattern") val numberFormat = ctx.freshName("numberFormat") @@ -999,7 +1005,8 @@ case class FormatNumber(x: Expression, d: Expression) val dFormat = ctx.freshName("dFormat") ctx.addMutableState("int", lastDValue, s"$lastDValue = -100;") ctx.addMutableState(sb, pattern, s"$pattern = new $sb();") - ctx.addMutableState(df, numberFormat, s"""$numberFormat = new $df("");""") + ctx.addMutableState(df, numberFormat, + s"""$numberFormat = new $df("", new $dfs($l.$usLocale));""") s""" if ($d >= 0) { @@ -1013,9 +1020,8 @@ case class FormatNumber(x: Expression, d: Expression) $pattern.append("0"); } } - $df $dFormat = new $df($pattern.toString()); $lastDValue = $d; - $numberFormat.applyPattern($dFormat.toPattern()); + $numberFormat.applyLocalizedPattern($pattern.toString()); } ${ev.value} = UTF8String.fromString($numberFormat.format(${typeHelper(num)})); } else { -- cgit v1.2.3