diff options
Diffstat (limited to 'core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala')
-rwxr-xr-x | core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala new file mode 100755 index 0000000..b211908 --- /dev/null +++ b/core/src/main/scala/com/rockymadden/stringmetric/phonetic/SoundexAlgorithm.scala @@ -0,0 +1,57 @@ +package com.rockymadden.stringmetric.phonetic + +import com.rockymadden.stringmetric.Algorithm.StringAlgorithm + +case object SoundexAlgorithm extends StringAlgorithm { + import com.rockymadden.stringmetric.Alphabet.Alpha + + override def compute(a: Array[Char]): Option[Array[Char]] = + if (a.length == 0 || !(Alpha isSuperset a.head)) None + else { + val fc = a.head.toLower + + Some(transcode(a.tail, fc, Array(fc)).padTo(4, '0')) + } + + override def compute(string: String): Option[String] = compute(string.toCharArray).map(_.mkString) + + @annotation.tailrec + private val transcode: ((Array[Char], Char, Array[Char]) => Array[Char]) = (i, pc, o) => + if (i.length == 0) o + else { + val c = i.head.toLower + val m2 = (mc: Char) => (mc: @annotation.switch) match { + case 'b' | 'f' | 'p' | 'v' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => '2' + case 'd' | 't' => '3' + case 'l' => '4' + case 'm' | 'n' => '5' + case 'r' => '6' + case _ => '\0' + } + val m1 = (mc: Char, pc: Char) => (mc: @annotation.switch) match { + case 'b' | 'f' | 'p' | 'v' if pc != '1' => '1' + case 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' if pc != '2' => '2' + case 'd' | 't' if pc != '3' => '3' + case 'l' if pc != '4' => '4' + case 'm' | 'n' if pc != '5' => '5' + case 'r' if pc != '6' => '6' + case _ => '\0' + } + val a = pc match { + // Code twice. + case 'a' | 'e' | 'i' | 'o' | 'u' | 'y' => m2(c) + // Code once. + case _ => m1( + c, + (o.last: @annotation.switch) match { + case '1' | '2' | '3' | '4' | '5' | '6' => o.last + case _ => m2(o.last) + } + ) + } + + if (o.length == 3 && a != '\0') o :+ a + else transcode(i.tail, c, if (a != '\0') o :+ a else o) + } +} |