summaryrefslogtreecommitdiffstats
path: root/bin/subtle-contrib/ruby/levenshtein.rb
diff options
context:
space:
mode:
Diffstat (limited to 'bin/subtle-contrib/ruby/levenshtein.rb')
-rw-r--r--bin/subtle-contrib/ruby/levenshtein.rb74
1 files changed, 74 insertions, 0 deletions
diff --git a/bin/subtle-contrib/ruby/levenshtein.rb b/bin/subtle-contrib/ruby/levenshtein.rb
new file mode 100644
index 0000000..6122149
--- /dev/null
+++ b/bin/subtle-contrib/ruby/levenshtein.rb
@@ -0,0 +1,74 @@
+#
+# @file Calculate Levenshtein distance
+#
+# @copyright (c) 2010-2011, Christoph Kappel <unexist@dorfelite.net>
+# @version $Id$
+#
+# This program can be distributed under the terms of the GNU GPLv2.
+# See the file COPYING for details.
+#
+
+module Levenshtein
+ MAX_LENGTH = 255
+
+ ## distance {{{
+ # @brief Calculate the Levenshtein Distance
+ # @param [String] s1 First string
+ # @param [String] s2 Second string
+ # @param [Fixnum] cost_ins Cost for insertion
+ # @param [Fixnum] cost_rep Cost for replace
+ # @param [Fixnum] cost_del Cost for deletion
+ # @param [Array] a1 First array
+ # @param [Array] a2 Second array
+ # @raise [String] Error
+ # @return [Fixnum] Calculated distance
+ ##
+
+ def self.distance(s1, s2, cost_ins = 1, cost_rep = 1,
+ cost_del = 1, a1 = nil, a2 = nil)
+ # Step 1: Check string length
+ l1 = s1.length
+ l2 = s2.length
+
+ # Check length
+ return l2 * cost_ins if 0 == l1
+ return l1 * cost_del if 0 == l2
+
+ raise "Max length" if l1 > MAX_LENGTH || l2 > MAX_LENGTH
+
+ # Step 2: Create and init arrays
+ p1 = a1 || Array.new(l2 + 1, 0)
+ p2 = a2 || Array.new(l2 + 1, 0)
+
+ (0..l2).each { |i| p1[i] = i * cost_ins }
+
+ # Step 3: Iterate over string s1
+ (0..(l1 - 1)).each do |i|
+ p2[0] = p1[0] + cost_del
+
+ # Step 4: Iterate over string s2
+ (0..(l2 - 1)).each do |j|
+ # Step 5: Get cost
+ c0 = p1[j] + ((s1[i] == s2[j]) ? 0 : cost_rep)
+ c1 = p1[j + 1] + cost_del
+ c0 = c1 if c1 < c0
+
+ c2 = p2[j] + cost_ins
+ c0 = c2 if c2 < c0
+
+ # Step 6: Store min value in matrix
+ p2[j + 1] = c0
+ end
+
+ # Swap arrays
+ tmp = p1
+ p1 = p2
+ p2 = tmp
+ end
+
+ # Step 7: Return distance
+ c0 = p1[l2]
+
+ c0
+ end # }}}
+end # }}}