FIX: Detect Wayback Machine using user agent (#9777)

FIX: Detect Wayback Machine using user agent (#9777)

diff --git a/lib/crawler_detection.rb b/lib/crawler_detection.rb
index 6cd1eba..4a90f84 100644
--- a/lib/crawler_detection.rb
+++ b/lib/crawler_detection.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 module CrawlerDetection
-  WAYBACK_MACHINE_URL = "web.archive.org"
+  WAYBACK_MACHINE_URL = "archive.org"
 
   def self.to_matcher(string, type: nil)
     escaped = string.split('|').map { |agent| Regexp.escape(agent) }.join('|')
@@ -15,7 +15,7 @@ module CrawlerDetection
   end
 
   def self.crawler?(user_agent, via_header = nil)
-    return true if user_agent.nil? || via_header&.include?(WAYBACK_MACHINE_URL)
+    return true if user_agent.nil? || user_agent&.include?(WAYBACK_MACHINE_URL) || via_header&.include?(WAYBACK_MACHINE_URL)
 
     # this is done to avoid regenerating regexes
     @non_crawler_matchers ||= {}
diff --git a/spec/components/crawler_detection_spec.rb b/spec/components/crawler_detection_spec.rb
index db45d2c..1492d01 100644
--- a/spec/components/crawler_detection_spec.rb
+++ b/spec/components/crawler_detection_spec.rb
@@ -51,7 +51,9 @@ describe CrawlerDetection do
     end
 
     it "returns true when VIA header contains 'web.archive.org'" do
-      crawler!("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36", "HTTP/1.0 web.archive.org (Wayback Save Page)")
+      crawler! "Mozilla/5.0 (compatible; archive.org_bot +http://archive.org/details/archive.org_bot)"
+      crawler! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36", "HTTP/1.0 web.archive.org (Wayback Save Page)"
+      crawler! "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36", "Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; http://archive.org/details/archive.org_bot), 1.1 warcprox"
     end
 
     it "returns false for non-crawler user agents" do

GitHub sha: 3ed6a0e9

This commit appears in #9777 which was merged by SamSaffron.