PERF: limit time spent diffing large blobs of text

PERF: limit time spent diffing large blobs of text

REFACTOR: s/.length/.size/g

diff --git a/lib/discourse_diff.rb b/lib/discourse_diff.rb
index 041a713..6cac776 100644
--- a/lib/discourse_diff.rb
+++ b/lib/discourse_diff.rb
@@ -21,7 +21,7 @@ class DiscourseDiff
   def inline_html
     i = 0
     inline = []
-    while i < @block_by_block_diff.length
+    while i < @block_by_block_diff.size
       op_code = @block_by_block_diff[i][1]
       if op_code == :common then inline << @block_by_block_diff[i][0]
       else
@@ -37,7 +37,7 @@ class DiscourseDiff
           second = i
         end
 
-        if i + 1 < @block_by_block_diff.length && @block_by_block_diff[i + 1][1] == opposite_op_code
+        if i + 1 < @block_by_block_diff.size && @block_by_block_diff[i + 1][1] == opposite_op_code
           diff = ONPDiff.new(tokenize_html(@block_by_block_diff[first][0]), tokenize_html(@block_by_block_diff[second][0])).diff
           inline << generate_inline_html(diff)
           i += 1
@@ -54,7 +54,7 @@ class DiscourseDiff
   def side_by_side_html
     i = 0
     left, right = [], []
-    while i < @block_by_block_diff.length
+    while i < @block_by_block_diff.size
       op_code = @block_by_block_diff[i][1]
       if op_code == :common
         left << @block_by_block_diff[i][0]
@@ -74,7 +74,7 @@ class DiscourseDiff
           second = i
         end
 
-        if i + 1 < @block_by_block_diff.length && @block_by_block_diff[i + 1][1] == opposite_op_code
+        if i + 1 < @block_by_block_diff.size && @block_by_block_diff[i + 1][1] == opposite_op_code
           diff = ONPDiff.new(tokenize_html(@block_by_block_diff[first][0]), tokenize_html(@block_by_block_diff[second][0])).diff
           deleted, inserted = generate_side_by_side_html(diff)
           left << deleted
@@ -93,7 +93,7 @@ class DiscourseDiff
   def side_by_side_markdown
     i = 0
     table = ["<table class=\"markdown\">"]
-    while i < @line_by_line_diff.length
+    while i < @line_by_line_diff.size
       table << "<tr>"
       op_code = @line_by_line_diff[i][1]
       if op_code == :common
@@ -110,9 +110,9 @@ class DiscourseDiff
           second = i
         end
 
-        if i + 1 < @line_by_line_diff.length && @line_by_line_diff[i + 1][1] == opposite_op_code
+        if i + 1 < @line_by_line_diff.size && @line_by_line_diff[i + 1][1] == opposite_op_code
           before_tokens, after_tokens = tokenize_markdown(@line_by_line_diff[first][0]), tokenize_markdown(@line_by_line_diff[second][0])
-          if (before_tokens.length - after_tokens.length).abs > MAX_DIFFERENCE
+          if (before_tokens.size - after_tokens.size).abs > MAX_DIFFERENCE
             before_tokens, after_tokens = tokenize_line(@line_by_line_diff[first][0]), tokenize_line(@line_by_line_diff[second][0])
           end
           diff = ONPDiff.new(before_tokens, after_tokens).short_diff
@@ -147,25 +147,25 @@ class DiscourseDiff
   def tokenize_markdown(text)
     t, tokens = [], []
     i = 0
-    while i < text.length
+    while i < text.size
       if text[i] =~ /\w/
         t << text[i]
       elsif text[i] =~ /[ \t]/ && t.join =~ /^\w+$/
         begin
           t << text[i]
           i += 1
-        end while i < text.length && text[i] =~ /[ \t]/
+        end while i < text.size && text[i] =~ /[ \t]/
         i -= 1
         tokens << t.join
         t = []
       else
-        tokens << t.join if t.length > 0
+        tokens << t.join if t.size > 0
         tokens << text[i]
         t = []
       end
       i += 1
     end
-    tokens << t.join if t.length > 0
+    tokens << t.join if t.size > 0
     tokens
   end
 
@@ -180,7 +180,7 @@ class DiscourseDiff
   def add_class_or_wrap_in_tags(html_or_text, klass)
     result = html_or_text.dup
     index_of_next_chevron = result.index(">")
-    if result.length > 0 && result[0] == '<' && index_of_next_chevron
+    if result.size > 0 && result[0] == '<' && index_of_next_chevron
       index_of_class = result.index("class=")
       if index_of_class.nil? || index_of_class > index_of_next_chevron
         # we do not have a class for the current tag
@@ -192,7 +192,7 @@ class DiscourseDiff
         if classes.include?("diff-#{klass}")
           result
         else
-          result.insert(index_of_class + "class=".length + 1, "diff-#{klass} ")
+          result.insert(index_of_class + "class=".size + 1, "diff-#{klass} ")
         end
       end
     else
diff --git a/lib/onpdiff.rb b/lib/onpdiff.rb
index bcca737..ef4799f 100644
--- a/lib/onpdiff.rb
+++ b/lib/onpdiff.rb
@@ -1,12 +1,13 @@
 # frozen_string_literal: true
 
 # Use "An O(NP) Sequence Comparison Algorithm" as described by Sun Wu, Udi Manber and Gene Myers
-# in http://www.itu.dk/stud/speciale/bepjea/xwebtex/litt/an-onp-sequence-comparison-algorithm.pdf
+# in https://publications.mpi-cbg.de/Wu_1990_6334.pdf
+
 class ONPDiff
 
   def initialize(a, b)
     @a, @b = a, b
-    @m, @n = a.length, b.length
+    @m, @n = a.size, b.size
     @backtrack = []
     if @reverse = @m > @n
       @a, @b = @b, @a
@@ -30,13 +31,15 @@ class ONPDiff
     return @shortest_path if @shortest_path
 
     size = @m + @n + 3
-    fp = Array.new(size) { |i| -1 }
-    @path = Array.new(size) { |i| -1 }
+    fp = Array.new(size, -1)
+    @path = Array.new(size, -1)
     p = -1
 
     begin
       p += 1
 
+      return (@shortest_path = []) if p >= 1000
+
       k = -p
       while k <= @delta - 1
         fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
@@ -56,6 +59,7 @@ class ONPDiff
     r = @path[@delta + @offset]
 
     @shortest_path = []
+
     while r != -1
       @shortest_path << [@backtrack[r][0], @backtrack[r][1]]
       r = @backtrack[r][2]
@@ -74,7 +78,7 @@ class ONPDiff
       y += 1
     end
 
-    @path[k + @offset] = @backtrack.length
+    @path[k + @offset] = @backtrack.size
     @backtrack << [x, y, r]
 
     y
@@ -84,7 +88,7 @@ class ONPDiff
     ses = []
     x, y = 1, 1
     px, py = 0, 0
-    i = shortest_path.length - 1
+    i = shortest_path.size - 1
     while i >= 0
       while px < shortest_path[i][0] || py < shortest_path[i][1]
         if shortest_path[i][1] - shortest_path[i][0] > py - px
@@ -114,12 +118,12 @@ class ONPDiff
     ses = []
     x, y = 1, 1
     px, py = 0, 0
-    i = shortest_path.length - 1
+    i = shortest_path.size - 1
     while i >= 0
       while px < shortest_path[i][0] || py < shortest_path[i][1]
         if shortest_path[i][1] - shortest_path[i][0] > py - px
           t = @reverse ? :delete : :add
-          if ses.length > 0 && ses[-1][1] == t
+          if ses.size > 0 && ses[-1][1] == t
             ses[-1][0] << @b[py]
           else
             ses << [@b[py], t]
@@ -128,7 +132,7 @@ class ONPDiff
           py += 1
         elsif shortest_path[i][1] - shortest_path[i][0] < py - px
           t = @reverse ? :add : :delete
-          if ses.length > 0 && ses[-1][1] == t
+          if ses.size > 0 && ses[-1][1] == t
             ses[-1][0] << @a[px]
           else
             ses << [@a[px], t]
@@ -136,7 +140,7 @@ class ONPDiff
           x += 1
           px += 1
         else
-          if ses.length > 0 && ses[-1][1] == :common
+          if ses.size > 0 && ses[-1][1] == :common
             ses[-1][0] << @a[px]
           else
             ses << [@a[px], :common]
diff --git a/lib/post_revisor.rb b/lib/post_revisor.rb
index 8ac7b29..527440f 100644
--- a/lib/post_revisor.rb
+++ b/lib/post_revisor.rb
@@ -299,7 +299,7 @@ class PostRevisor
         max_diff = SiteSetting.editing_grace_period_max_diff_high_trust.to_i
       end
 
-      if (original_raw.length - new_raw.length).abs > max_diff ||
+      if (original_raw.size - new_raw.size).abs > max_diff ||
         diff_size(original_raw, new_raw) > max_diff
         return false
       end
diff --git a/spec/components/onpdiff_spec.rb b/spec/components/onpdiff_spec.rb
index 1d0b708..67c5b0b 100644
--- a/spec/components/onpdiff_spec.rb
+++ b/spec/components/onpdiff_spec.rb
@@ -15,6 +15,12 @@ describe ONPDiff do

[... diff too long, it was truncated ...]

GitHub sha: 134a4c66

1 Like