summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdriaan Moors <adriaan.moors@typesafe.com>2015-06-25 15:05:43 -0700
committerAdriaan Moors <adriaan.moors@typesafe.com>2015-06-25 15:05:43 -0700
commit662e23eefbe67d843945b31495e138d02f60fe5e (patch)
tree3eb0aa3ef1e9c7892c6a9d6bc12e12304e5cc8f6
parentfd526c7542492c1ddc4fffe075849fc245971cf4 (diff)
parent917f7a8239a47e678060c28cabf1d0fddc7ca891 (diff)
downloadscala-662e23eefbe67d843945b31495e138d02f60fe5e.tar.gz
scala-662e23eefbe67d843945b31495e138d02f60fe5e.tar.bz2
scala-662e23eefbe67d843945b31495e138d02f60fe5e.zip
Merge pull request #4514 from martijnhoekstra/patch-2
Documentation for split [ci: last-only]
-rw-r--r--src/library/scala/collection/immutable/StringLike.scala87
-rw-r--r--test/junit/scala/collection/immutable/StringLikeTest.scala6
2 files changed, 64 insertions, 29 deletions
diff --git a/src/library/scala/collection/immutable/StringLike.scala b/src/library/scala/collection/immutable/StringLike.scala
index 1ead894faf..7b3da1e2ea 100644
--- a/src/library/scala/collection/immutable/StringLike.scala
+++ b/src/library/scala/collection/immutable/StringLike.scala
@@ -201,35 +201,64 @@ self =>
*/
def stripMargin: String = stripMargin('|')
- private def escape(ch: Char): String = "\\Q" + ch + "\\E"
-
- def split(separator: Char): Array[String] = {
- val thisString = toString
- var pos = thisString.indexOf(separator)
-
- if (pos != -1) {
- val res = new ArrayBuilder.ofRef[String]
-
- var prev = 0
- do {
- res += thisString.substring(prev, pos)
- prev = pos + 1
- pos = thisString.indexOf(separator, prev)
- } while (pos != -1)
-
- if (prev != thisString.size)
- res += thisString.substring(prev, thisString.size)
-
- val initialResult = res.result()
- pos = initialResult.length
- while (pos > 0 && initialResult(pos - 1).isEmpty) pos = pos - 1
- if (pos != initialResult.length) {
- val trimmed = new Array[String](pos)
- Array.copy(initialResult, 0, trimmed, 0, pos)
- trimmed
- } else initialResult
- } else Array[String](thisString)
- }
+ private def escape(ch: Char): String = if (
+ (ch >= 'a') && (ch <= 'z') ||
+ (ch >= 'A') && (ch <= 'Z') ||
+ (ch >= '0' && ch <= '9')) ch.toString
+ else "\\" + ch
+
+ /** Split this string around the separator character
+ *
+ * If this string is the empty string, returns an array of strings
+ * that contains a single empty string.
+ *
+ * If this string is not the empty string, returns an array containing
+ * the substrings terminated by the start of the string, the end of the
+ * string or the separator character, excluding empty trailing substrings
+ *
+ * If the separator character is a surrogate character, only split on
+ * matching surrogate characters if they are not part of a surrogate pair
+ *
+ * The behaviour follows, and is implemented in terms of <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html#split%28java.lang.String%29">String.split(re: String)</a>
+ *
+ *
+ * @example {{{
+ * "a.b".split('.') //returns Array("a", "b")
+ *
+ * //splitting the empty string always returns the array with a single
+ * //empty string
+ * "".split('.') //returns Array("")
+ *
+ * //only trailing empty substrings are removed
+ * "a.".split('.') //returns Array("a")
+ * ".a.".split('.') //returns Array("", "a")
+ * "..a..".split('.') //returns Array("", "", "a")
+ *
+ * //all parts are empty and trailing
+ * ".".split('.') //returns Array()
+ * "..".split('.') //returns Array()
+ *
+ * //surrogate pairs
+ * val high = 0xD852.toChar
+ * val low = 0xDF62.toChar
+ * val highstring = high.toString
+ * val lowstring = low.toString
+ *
+ * //well-formed surrogate pairs are not split
+ * val highlow = highstring + lowstring
+ * highlow.split(high) //returns Array(highlow)
+ *
+ * //bare surrogate characters are split
+ * val bare = "_" + highstring + "_"
+ * bare.split(high) //returns Array("_", "_")
+ *
+ * }}}
+ *
+ * @param separator the character used as a delimiter
+ */
+ def split(separator: Char): Array[String] =
+ toString.split(escape(separator))
+
@throws(classOf[java.util.regex.PatternSyntaxException])
def split(separators: Array[Char]): Array[String] = {
diff --git a/test/junit/scala/collection/immutable/StringLikeTest.scala b/test/junit/scala/collection/immutable/StringLikeTest.scala
index 3722bdfe4d..50be638b89 100644
--- a/test/junit/scala/collection/immutable/StringLikeTest.scala
+++ b/test/junit/scala/collection/immutable/StringLikeTest.scala
@@ -28,10 +28,16 @@ class StringLikeTest {
@Test
def testSplitEdgeCases: Unit = {
+ val high = 0xD852.toChar
+ val low = 0xDF62.toChar
+ val surrogatepair = List(high, low).mkString
+ val twopairs = surrogatepair + "_" + surrogatepair
+
AssertUtil.assertSameElements("abcd".split('d'), Array("abc")) // not Array("abc", "")
AssertUtil.assertSameElements("abccc".split('c'), Array("ab")) // not Array("ab", "", "", "")
AssertUtil.assertSameElements("xxx".split('x'), Array[String]()) // not Array("", "", "", "")
AssertUtil.assertSameElements("".split('x'), Array("")) // not Array()
AssertUtil.assertSameElements("--ch--omp--".split("-"), Array("", "", "ch", "", "omp")) // All the cases!
+ AssertUtil.assertSameElements(twopairs.split(high), Array(twopairs)) //don't split on characters that are half a surrogate pair
}
}