From 523abd77000df6e7323a0bd92936184d567af19f Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Tue, 19 Mar 2013 10:08:26 -0600 Subject: Leveraged companion objects for better performance. --- .../rockymadden/stringmetric/phonetic/MetaphoneMetric.scala | 11 ++++------- .../com/rockymadden/stringmetric/phonetic/NysiisMetric.scala | 9 +++------ .../stringmetric/phonetic/RefinedNysiisMetric.scala | 9 +++------ .../stringmetric/phonetic/RefinedSoundexMetric.scala | 9 +++------ .../rockymadden/stringmetric/phonetic/SoundexMetric.scala | 9 +++------ .../stringmetric/similarity/DiceSorensenMetric.scala | 12 ++++-------- .../rockymadden/stringmetric/similarity/JaccardMetric.scala | 12 ++++-------- .../stringmetric/similarity/JaroWinklerMetric.scala | 2 +- .../rockymadden/stringmetric/similarity/NGramMetric.scala | 12 ++++-------- .../rockymadden/stringmetric/similarity/OverlapMetric.scala | 12 ++++-------- 10 files changed, 33 insertions(+), 64 deletions(-) (limited to 'core/source') diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala index 1677e20..622dcc6 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/MetaphoneMetric.scala @@ -3,7 +3,7 @@ package com.rockymadden.stringmetric.phonetic import com.rockymadden.stringmetric.{ StringFilter, StringMetric } import com.rockymadden.stringmetric.Alphabet.Alpha -/** A implementation of the Metaphone metric. */ +/** An implementation of the Metaphone metric. */ class MetaphoneMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFilter => final override def compare(charArray1: Array[Char], charArray2: Array[Char]) (implicit di: DummyImplicit): Option[Boolean] = { @@ -12,12 +12,9 @@ class MetaphoneMetric extends StringMetric[DummyImplicit, Boolean] { this: Strin lazy val fca2 = filter(charArray2) if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None - else { - val metaphoneAlgorithm = MetaphoneAlgorithm() - - metaphoneAlgorithm.compute(fca1).filter(_.length > 0).flatMap(mp1 => - metaphoneAlgorithm.compute(fca2).filter(_.length > 0).map(mp1.sameElements(_))) - } + else MetaphoneAlgorithm.compute(fca1).filter(_.length > 0).flatMap(mp1 => + MetaphoneAlgorithm.compute(fca2).filter(_.length > 0).map(mp1.sameElements(_)) + ) } final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala index ff3e251..cbae0a4 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/NysiisMetric.scala @@ -20,12 +20,9 @@ class NysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: StringFi if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None else if (unequal(fca1.head, fca2.head)) Some(false) - else { - val nysiisAlgorithm = NysiisAlgorithm() - - nysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(ny1 => - nysiisAlgorithm.compute(fca2).filter(_.length > 0).map(ny1.sameElements(_))) - } + else NysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(ny1 => + NysiisAlgorithm.compute(fca2).filter(_.length > 0).map(ny1.sameElements(_)) + ) } final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala index b554147..52e8294 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedNysiisMetric.scala @@ -20,12 +20,9 @@ class RefinedNysiisMetric extends StringMetric[DummyImplicit, Boolean] { this: S if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None else if (unequal(fca1.head, fca2.head)) Some(false) - else { - val refinedNysiisAlgorithm = RefinedNysiisAlgorithm() - - refinedNysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rny1 => - refinedNysiisAlgorithm.compute(fca2).filter(_.length > 0).map(rny1.sameElements(_))) - } + else RefinedNysiisAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rny1 => + RefinedNysiisAlgorithm.compute(fca2).filter(_.length > 0).map(rny1.sameElements(_)) + ) } final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala index 777cb3e..b8e6125 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -13,12 +13,9 @@ class RefinedSoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None else if (fca1.head.toLower != fca2.head.toLower) Some(false) - else { - val refinedSoundexAlgorithm = RefinedSoundexAlgorithm() - - refinedSoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rse1 => - refinedSoundexAlgorithm.compute(fca2).filter(_.length > 0).map(rse1.sameElements(_))) - } + else RefinedSoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(rse1 => + RefinedSoundexAlgorithm.compute(fca2).filter(_.length > 0).map(rse1.sameElements(_)) + ) } final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = diff --git a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala index 512fa4d..036d275 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/phonetic/SoundexMetric.scala @@ -13,12 +13,9 @@ class SoundexMetric extends StringMetric[DummyImplicit, Boolean] { this: StringF if (fca1.length == 0 || !(Alpha isSuperset fca1.head) || fca2.length == 0 || !(Alpha isSuperset fca2.head)) None else if (fca1.head.toLower != fca2.head.toLower) Some(false) - else { - val soundexAlgorithm = SoundexAlgorithm() - - soundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(se1 => - soundexAlgorithm.compute(fca2).filter(_.length > 0).map(se1.sameElements(_))) - } + else SoundexAlgorithm.compute(fca1).filter(_.length > 0).flatMap(se1 => + SoundexAlgorithm.compute(fca2).filter(_.length > 0).map(se1.sameElements(_)) + ) } final override def compare(string1: String, string2: String)(implicit di: DummyImplicit): Option[Boolean] = diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala index 5ee0d1a..5a5b969 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/DiceSorensenMetric.scala @@ -16,15 +16,11 @@ class DiceSorensenMetric extends StringMetric[Int, Double] { this: StringFilter if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. else if (fca1.sameElements(fca2)) Some(1d) - else { - val nGramTokenizer = NGramTokenizer() + else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => + NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - nGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - nGramTokenizer.tokenize(fca2)(n).map { ca2bg => - val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - - (2d * ms) / (ca1bg.length + ca2bg.length) - } + (2d * ms) / (ca1bg.length + ca2bg.length) } } } diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala index f54a8c7..b544ab9 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaccardMetric.scala @@ -13,15 +13,11 @@ class JaccardMetric extends StringMetric[Int, Double] { this: StringFilter => if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. else if (fca1.sameElements(fca2)) Some(1d) - else { - val nGramTokenizer = NGramTokenizer() + else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => + NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - nGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - nGramTokenizer.tokenize(fca2)(n).map { ca2bg => - val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - - ms.toDouble / (ca1bg.length + ca2bg.length) - } + ms.toDouble / (ca1bg.length + ca2bg.length) } } } diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala index 92e5e9d..24fd316 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/JaroWinklerMetric.scala @@ -14,7 +14,7 @@ class JaroWinklerMetric extends StringMetric[DummyImplicit, Double] { this: Stri val fca1 = filter(charArray1) val fca2 = filter(charArray2) - JaroMetric().compare(fca1, fca2).map { + JaroMetric.compare(fca1, fca2).map { case 0d => 0d case 1d => 1d case jaro => { diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala index 5ab5ffa..ca6fa51 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/NGramMetric.scala @@ -14,15 +14,11 @@ class NGramMetric extends StringMetric[Int, Double] { this: StringFilter => if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. else if (fca1.sameElements(fca2)) Some(1d) - else { - val nGramTokenizer = NGramTokenizer() + else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => + NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) - nGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - nGramTokenizer.tokenize(fca2)(n).map { ca2bg => - val ms = scoreMatches((ca1bg.map(_.mkString), ca2bg.map(_.mkString))) - - ms.toDouble / math.max(ca1bg.length, ca2bg.length) - } + ms.toDouble / math.max(ca1bg.length, ca2bg.length) } } } diff --git a/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala b/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala index 24ced49..ee8bba0 100755 --- a/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala +++ b/core/source/core/scala/com/rockymadden/stringmetric/similarity/OverlapMetric.scala @@ -14,15 +14,11 @@ class OverlapMetric extends StringMetric[Int, Double] { this: StringFilter => if (fca1.length < n || fca2.length < n) None // Because length is less than n, it is not possible to compare. else if (fca1.sameElements(fca2)) Some(1d) - else { - val nGramTokenizer = NGramTokenizer() + else NGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => + NGramTokenizer.tokenize(fca2)(n).map { ca2bg => + val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - nGramTokenizer.tokenize(fca1)(n).flatMap { ca1bg => - nGramTokenizer.tokenize(fca2)(n).map { ca2bg => - val ms = scoreMatches(ca1bg.map(_.mkString), ca2bg.map(_.mkString)) - - ms.toDouble / (math.min(ca1bg.length, ca2bg.length)) - } + ms.toDouble / (math.min(ca1bg.length, ca2bg.length)) } } } -- cgit v1.2.3