Track Discourse user agent pageviews as crawler
Since 5bfe051e, Discourse user agents are marked as non-crawlers (to avoid accidental blacklisting). This makes sure pageviews for these agents are tracked as crawler hits.
diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb
index b31d0b0..48a7648 100644
--- a/lib/middleware/anonymous_cache.rb
+++ b/lib/middleware/anonymous_cache.rb
@@ -62,7 +62,11 @@ module Middleware
@is_crawler ||=
begin
user_agent = @env[USER_AGENT]
- CrawlerDetection.crawler?(user_agent) ? :true : :false
+ if CrawlerDetection.crawler?(user_agent)
+ :true
+ else
+ user_agent.downcase.include?("discourse") ? :true : :false
+ end
end
@is_crawler == :true
end
diff --git a/spec/components/middleware/request_tracker_spec.rb b/spec/components/middleware/request_tracker_spec.rb
index 9029024..ca8e1b6 100644
--- a/spec/components/middleware/request_tracker_spec.rb
+++ b/spec/components/middleware/request_tracker_spec.rb
@@ -68,6 +68,16 @@ describe Middleware::RequestTracker do
expect(ApplicationRequest.page_view_anon.first.count).to eq(2)
expect(ApplicationRequest.page_view_crawler.first.count).to eq(1)
expect(ApplicationRequest.page_view_anon_mobile.first.count).to eq(1)
+
+ # log discourse User Agent requests as crawler for page views
+ data = Middleware::RequestTracker.get_data(env(
+ "HTTP_USER_AGENT" => "DiscourseAPI Ruby Gem 0.19.0"
+ ), ["200", { "Content-Type" => 'text/html' }], 0.1)
+
+ Middleware::RequestTracker.log_request(data)
+ ApplicationRequest.write_cache!
+
+ expect(ApplicationRequest.page_view_crawler.first.count).to eq(2)
end
end
GitHub sha: a4eb523a