From 690846dd22982d7a4a9b998686d6d3768163475c Mon Sep 17 00:00:00 2001 From: Rocky Madden Date: Thu, 15 Nov 2012 10:32:18 -0700 Subject: Added Google Caliper microbenchmark suite for every metric and algorithm. --- .../hashtree/stringmetric/CaliperBenchmark.scala | 7 +++ .../org/hashtree/stringmetric/CaliperRunner.scala | 7 +++ .../phonetic/MetaphoneAlgorithmBenchmark.scala | 28 +++++++++++ .../phonetic/MetaphoneMetricBenchmark.scala | 52 ++++++++++++++++++++ .../phonetic/NysiisAlgorithmBenchmark.scala | 28 +++++++++++ .../phonetic/NysiisMetricBenchmark.scala | 52 ++++++++++++++++++++ .../RefinedSoundexAlgorithmBenchmark.scala | 28 +++++++++++ .../phonetic/RefinedSoundexMetricBenchmark.scala | 52 ++++++++++++++++++++ .../phonetic/SoundexAlgorithmBenchmark.scala | 28 +++++++++++ .../phonetic/SoundexMetricBenchmark.scala | 52 ++++++++++++++++++++ .../similarity/DiceSorensenMetricBenchmark.scala | 52 ++++++++++++++++++++ .../similarity/HammingMetricBenchmark.scala | 52 ++++++++++++++++++++ .../similarity/JaroMetricBenchmark.scala | 52 ++++++++++++++++++++ .../similarity/JaroWinklerMetricBenchmark.scala | 52 ++++++++++++++++++++ .../similarity/LevenshteinMetricBenchmark.scala | 52 ++++++++++++++++++++ .../similarity/NGramAlgorithmBenchmark.scala | 31 ++++++++++++ .../similarity/NGramMetricBenchmark.scala | 55 ++++++++++++++++++++++ .../WeightedLevenshteinMetricBenchmark.scala | 52 ++++++++++++++++++++ 18 files changed, 732 insertions(+) create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/CaliperBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/CaliperRunner.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/NysiisMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/SoundexMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/similarity/HammingMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/similarity/JaroMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/similarity/LevenshteinMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/similarity/NGramAlgorithmBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/similarity/NGramMetricBenchmark.scala create mode 100755 core/source/benchmark/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala (limited to 'core/source') diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/CaliperBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/CaliperBenchmark.scala new file mode 100755 index 0000000..47beaad --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/CaliperBenchmark.scala @@ -0,0 +1,7 @@ +package org.hashtree.stringmetric + +import com.google.caliper.SimpleBenchmark + +trait CaliperBenchmark extends SimpleBenchmark { + def run(reps: Int)(code: => Unit) = (0 until reps).foreach(i => code) +} \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/CaliperRunner.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/CaliperRunner.scala new file mode 100755 index 0000000..ffb69e2 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/CaliperRunner.scala @@ -0,0 +1,7 @@ +package org.hashtree.stringmetric + +import com.google.caliper.{ Benchmark, Runner } + +abstract class CaliperRunner(private[this] val suite: java.lang.Class[_ <: Benchmark]) { + def main(args: Array[String]): Unit = Runner.main(suite, args) +} \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala new file mode 100755 index 0000000..27c93af --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/MetaphoneAlgorithmBenchmark.scala @@ -0,0 +1,28 @@ +package org.hashtree.stringmetric.phonetic + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.util.Random + +final class MetaphoneAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + MetaphoneAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + MetaphoneAlgorithm.compute(string) + } +} + +object MetaphoneAlgorithmBenchmark extends CaliperRunner(classOf[MetaphoneAlgorithmBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetricBenchmark.scala new file mode 100755 index 0000000..48b09fa --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/MetaphoneMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.phonetic + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class MetaphoneMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + MetaphoneMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + MetaphoneMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + MetaphoneMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + MetaphoneMetric.compare(string1, string1) + } +} + +object MetaphoneMetricBenchmark extends CaliperRunner(classOf[MetaphoneMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala new file mode 100755 index 0000000..d922b6d --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/NysiisAlgorithmBenchmark.scala @@ -0,0 +1,28 @@ +package org.hashtree.stringmetric.phonetic + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.util.Random + +final class NysiisAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + NysiisAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + NysiisAlgorithm.compute(string) + } +} + +object NysiisAlgorithmBenchmark extends CaliperRunner(classOf[NysiisAlgorithmBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/NysiisMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/NysiisMetricBenchmark.scala new file mode 100755 index 0000000..f3ab31d --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/NysiisMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.phonetic + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class NysiisMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + NysiisMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + NysiisMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + NysiisMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + NysiisMetric.compare(string1, string1) + } +} + +object NysiisMetricBenchmark extends CaliperRunner(classOf[NysiisMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala new file mode 100755 index 0000000..5c13b0d --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexAlgorithmBenchmark.scala @@ -0,0 +1,28 @@ +package org.hashtree.stringmetric.phonetic + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.util.Random + +final class RefinedSoundexAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + RefinedSoundexAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + RefinedSoundexAlgorithm.compute(string) + } +} + +object RefinedSoundexAlgorithmBenchmark extends CaliperRunner(classOf[RefinedSoundexAlgorithmBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala new file mode 100755 index 0000000..90f8e72 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/RefinedSoundexMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.phonetic + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class RefinedSoundexMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + RefinedSoundexMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + RefinedSoundexMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + RefinedSoundexMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + RefinedSoundexMetric.compare(string1, string1) + } +} + +object RefinedSoundexMetricBenchmark extends CaliperRunner(classOf[RefinedSoundexMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala new file mode 100755 index 0000000..57a3925 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/SoundexAlgorithmBenchmark.scala @@ -0,0 +1,28 @@ +package org.hashtree.stringmetric.phonetic + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.util.Random + +final class SoundexAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.filter(_ > '9').take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + SoundexAlgorithm.compute(charArray) + } + + def timeComputeWithString(reps: Int) = run(reps) { + SoundexAlgorithm.compute(string) + } +} + +object SoundexAlgorithmBenchmark extends CaliperRunner(classOf[SoundexAlgorithmBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/SoundexMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/SoundexMetricBenchmark.scala new file mode 100755 index 0000000..78e1d25 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/phonetic/SoundexMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.phonetic + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class SoundexMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.filter(_ > '9').take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + SoundexMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + SoundexMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + SoundexMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + SoundexMetric.compare(string1, string1) + } +} + +object SoundexMetricBenchmark extends CaliperRunner(classOf[SoundexMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetricBenchmark.scala new file mode 100755 index 0000000..28d0688 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/DiceSorensenMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.similarity + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class DiceSorensenMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + DiceSorensenMetric.compare(charArray1, charArray2)(2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + DiceSorensenMetric.compare(string1, string2)(2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + DiceSorensenMetric.compare(charArray1, charArray1)(2) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + DiceSorensenMetric.compare(string1, string1)(2) + } +} + +object DiceSorensenMetricBenchmark extends CaliperRunner(classOf[DiceSorensenMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/HammingMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/HammingMetricBenchmark.scala new file mode 100755 index 0000000..f06ae6d --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/HammingMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.similarity + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class HammingMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + HammingMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + HammingMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + HammingMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + HammingMetric.compare(string1, string1) + } +} + +object HammingMetricBenchmark extends CaliperRunner(classOf[HammingMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/JaroMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/JaroMetricBenchmark.scala new file mode 100755 index 0000000..ce9d2f6 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/JaroMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.similarity + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class JaroMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + JaroMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + JaroMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + JaroMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + JaroMetric.compare(string1, string1) + } +} + +object JaroMetricBenchmark extends CaliperRunner(classOf[JaroMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetricBenchmark.scala new file mode 100755 index 0000000..64f8b89 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/JaroWinklerMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.similarity + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class JaroWinklerMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + JaroWinklerMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + JaroWinklerMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + JaroWinklerMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + JaroWinklerMetric.compare(string1, string1) + } +} + +object JaroWinklerMetricBenchmark extends CaliperRunner(classOf[JaroWinklerMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/LevenshteinMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/LevenshteinMetricBenchmark.scala new file mode 100755 index 0000000..598a44f --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/LevenshteinMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.similarity + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class LevenshteinMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + LevenshteinMetric.compare(charArray1, charArray2) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + LevenshteinMetric.compare(string1, string2) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + LevenshteinMetric.compare(charArray1, charArray1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + LevenshteinMetric.compare(string1, string1) + } +} + +object LevenshteinMetricBenchmark extends CaliperRunner(classOf[LevenshteinMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/NGramAlgorithmBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/NGramAlgorithmBenchmark.scala new file mode 100755 index 0000000..9bef2fa --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/NGramAlgorithmBenchmark.scala @@ -0,0 +1,31 @@ +package org.hashtree.stringmetric.similarity + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.util.Random + +final class NGramAlgorithmBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + @Param(Array("2", "3")) + var n: Int = _ + + var string: String = _ + var charArray: Array[Char] = _ + + override protected def setUp() { + string = Random.alphanumeric.take(length).mkString + charArray = string.toCharArray + } + + def timeComputeWithCharArray(reps: Int) = run(reps) { + NGramAlgorithm.compute(charArray)(n) + } + + def timeComputeWithString(reps: Int) = run(reps) { + NGramAlgorithm.compute(string)(n) + } +} + +object NGramAlgorithmBenchmark extends CaliperRunner(classOf[NGramAlgorithmBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/NGramMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/NGramMetricBenchmark.scala new file mode 100755 index 0000000..c9c0717 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/NGramMetricBenchmark.scala @@ -0,0 +1,55 @@ +package org.hashtree.stringmetric.similarity + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class NGramMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + @Param(Array("2", "3")) + var n: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + NGramMetric.compare(charArray1, charArray2)(n) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + NGramMetric.compare(string1, string2)(n) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + NGramMetric.compare(charArray1, charArray1)(n) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + NGramMetric.compare(string1, string1)(n) + } +} + +object NGramMetricBenchmark extends CaliperRunner(classOf[NGramMetricBenchmark]) \ No newline at end of file diff --git a/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala new file mode 100755 index 0000000..1f79c52 --- /dev/null +++ b/core/source/benchmark/scala/org/hashtree/stringmetric/similarity/WeightedLevenshteinMetricBenchmark.scala @@ -0,0 +1,52 @@ +package org.hashtree.stringmetric.similarity + +import com.google.caliper.Param +import org.hashtree.stringmetric.{ CaliperBenchmark, CaliperRunner } +import scala.annotation.tailrec +import scala.util.Random + +final class WeightedLevenshteinMetricBenchmark extends CaliperBenchmark { + @Param(Array("0", "1", "2", "4", "8", "16")) + var length: Int = _ + + var string1: String = _ + var charArray1: Array[Char] = _ + var string2: String = _ + var charArray2: Array[Char] = _ + + override protected def setUp() { + @tailrec + def random(l: Int, ps: String = null): String = { + if (l == 0) "" + else { + val s = Random.alphanumeric.take(l).mkString + + if (ps == null || s != ps) s + else random(l, ps) + } + } + + string1 = random(length) + string2 = random(length, string1) + charArray1 = string1.toCharArray + charArray2 = string2.toCharArray + } + + def timeCompareWithDifferentCharArrays(reps: Int) = run(reps) { + WeightedLevenshteinMetric.compare(charArray1, charArray2)(1, 1, 1) + } + + def timeCompareWithDifferentStrings(reps: Int) = run(reps) { + WeightedLevenshteinMetric.compare(string1, string2)(1, 1, 1) + } + + def timeCompareWithIdenticalCharArrays(reps: Int) = run(reps) { + WeightedLevenshteinMetric.compare(charArray1, charArray1)(1, 1, 1) + } + + def timeCompareWithIdenticalStrings(reps: Int) = run(reps) { + WeightedLevenshteinMetric.compare(string1, string1)(1, 1, 1) + } +} + +object WeightedLevenshteinMetricBenchmark extends CaliperRunner(classOf[WeightedLevenshteinMetricBenchmark]) \ No newline at end of file -- cgit v1.2.3