diff options
author | Rocky Madden <git@rockymadden.com> | 2012-11-12 13:11:32 -0700 |
---|---|---|
committer | Rocky Madden <git@rockymadden.com> | 2012-11-12 13:11:32 -0700 |
commit | bebf09dbdd1f286abe7096a7005ff33940c230b6 (patch) | |
tree | 28c640d19731a041957a1c08e559af215c4cb3bf /core/source | |
parent | 1dac33e054214a29cfb2fcaf27e77f750c8cf5b1 (diff) | |
download | stringmetric-bebf09dbdd1f286abe7096a7005ff33940c230b6.tar.gz stringmetric-bebf09dbdd1f286abe7096a7005ff33940c230b6.tar.bz2 stringmetric-bebf09dbdd1f286abe7096a7005ff33940c230b6.zip |
Minor performance enhancements.
Diffstat (limited to 'core/source')
8 files changed, 17 insertions, 15 deletions
diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala index ac5aa79..a226e41 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetric.scala @@ -8,12 +8,12 @@ object MetaphoneMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringFilter: StringFilter): Option[CompareReturn] = { val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + lazy val ca2 = stringFilter.filter(charArray2) if (ca1.length == 0 || ca2.length == 0) None else { val mp1 = MetaphoneAlgorithm.compute(ca1) - val mp2 = MetaphoneAlgorithm.compute(ca2) + lazy val mp2 = MetaphoneAlgorithm.compute(ca2) if (!mp1.isDefined || !mp2.isDefined || (mp1.get.length == 0 && mp2.get.length == 0)) None else Some(mp1.get.sameElements(mp2.get)) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala index 3332f56..90b92e2 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/NysiisMetric.scala @@ -8,12 +8,12 @@ object NysiisMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringFilter: StringFilter): Option[CompareReturn] = { val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + lazy val ca2 = stringFilter.filter(charArray2) if (ca1.length == 0 || ca2.length == 0) None else { val ny1 = NysiisAlgorithm.compute(ca1) - val ny2 = NysiisAlgorithm.compute(ca2) + lazy val ny2 = NysiisAlgorithm.compute(ca2) if (!ny1.isDefined || !ny2.isDefined || (ny1.get.length == 0 && ny2.get.length == 0)) None else Some(ny1.get.sameElements(ny2.get)) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala index b3203d1..ebe8358 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetric.scala @@ -8,12 +8,12 @@ object RefinedSoundexMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringFilter: StringFilter): Option[CompareReturn] = { val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + lazy val ca2 = stringFilter.filter(charArray2) if (ca1.length == 0 || ca2.length == 0) None else { val rse1 = RefinedSoundexAlgorithm.compute(ca1) - val rse2 = RefinedSoundexAlgorithm.compute(ca2) + lazy val rse2 = RefinedSoundexAlgorithm.compute(ca2) if (!rse1.isDefined || !rse2.isDefined || (rse1.get.length == 0 && rse2.get.length == 0)) None else Some(rse1.get.sameElements(rse2.get)) diff --git a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala index c7e4d5d..d11d2c1 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/phonetic/SoundexMetric.scala @@ -8,12 +8,12 @@ object SoundexMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringFilter: StringFilter): Option[CompareReturn] = { val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + lazy val ca2 = stringFilter.filter(charArray2) if (ca1.length == 0 || ca2.length == 0) None else { val se1 = SoundexAlgorithm.compute(ca1) - val se2 = SoundexAlgorithm.compute(ca2) + lazy val se2 = SoundexAlgorithm.compute(ca2) if (!se1.isDefined || !se2.isDefined || (se1.get.length == 0 && se2.get.length == 0)) None else Some(se1.get.sameElements(se2.get)) diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala index 67768ae..4daaa19 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetric.scala @@ -11,7 +11,7 @@ object DiceSorensenMetric extends StringMetric with FilterableConfigurableString override def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int)(implicit stringFilter: StringFilter): Option[CompareReturn] = { val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + lazy val ca2 = stringFilter.filter(charArray2) if (ca1.length == 0 || ca2.length == 0) None else if (ca1.length < n || ca2.length < n) Some(0d) // Because length is less than n, it will always be 0. diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala index 0d383e0..b29bdac 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/HammingMetric.scala @@ -8,7 +8,7 @@ object HammingMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringFilter: StringFilter): Option[CompareReturn] = { val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + lazy val ca2 = stringFilter.filter(charArray2) if (ca1.length == 0 || ca2.length == 0 || ca1.length != ca2.length) None else if (ca1.sameElements(ca2)) Some(0) diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala index e692641..f77ddb8 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/JaroMetric.scala @@ -13,18 +13,20 @@ object JaroMetric extends StringMetric with FilterableStringMetric { override def compare(charArray1: Array[Char], charArray2: Array[Char])(implicit stringFilter: StringFilter): Option[CompareReturn] = { val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + lazy val ca2 = stringFilter.filter(charArray2) if (ca1.length == 0 || ca2.length == 0) None else if (ca1.sameElements(ca2)) Some(1d) else { val mt = `match`((ca1, ca2)) val ms = scoreMatches((mt._1, mt._2)) - val ts = scoreTranspositions((mt._1, mt._2)) if (ms == 0) Some(0d) - else + else { + val ts = scoreTranspositions((mt._1, mt._2)) + Some(((ms.toDouble / ca1.length) + (ms.toDouble / ca2.length) + ((ms.toDouble - ts) / ms)) / 3) + } } } @@ -35,7 +37,7 @@ object JaroMetric extends StringMetric with FilterableStringMetric { )(new StringFilterDelegate) private[this] def `match`(ct: CompareTuple[Char]): MatchTuple[Char] = { - val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1) + lazy val window = math.abs((math.max(ct._1.length, ct._2.length) / 2d).floor.toInt - 1) val one = ArrayBuffer.empty[Int] val two = HashSet.empty[Int] var i = 0 diff --git a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala index 29d7054..5619646 100755 --- a/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala +++ b/core/source/core/scala/org/hashtree/stringmetric/similarity/NGramMetric.scala @@ -9,7 +9,7 @@ object NGramMetric extends StringMetric with FilterableConfigurableStringMetric[ override def compare(charArray1: Array[Char], charArray2: Array[Char])(n: Int)(implicit stringFilter: StringFilter): Option[CompareReturn] = { val ca1 = stringFilter.filter(charArray1) - val ca2 = stringFilter.filter(charArray2) + lazy val ca2 = stringFilter.filter(charArray2) if (ca1.length == 0 || ca2.length == 0) None else if (ca1.length < n || ca2.length < n) Some(0d) // Because length is less than n, it will always be 0. |