PERF: Skip post validation by default when fabricating posts (#7508)

PERF: Skip post validation by default when fabricating posts (#7508)

This speeds up tests by 10%

diff --git a/spec/components/post_revisor_spec.rb b/spec/components/post_revisor_spec.rb
index cdc2395..ebfd343 100644
--- a/spec/components/post_revisor_spec.rb
+++ b/spec/components/post_revisor_spec.rb
@@ -102,7 +102,7 @@ describe PostRevisor do
   end
 
   context 'revise' do
-    let(:post) { Fabricate(:post, post_args) }
+    let(:post) { Fabricate(:post_with_validation, post_args) }
     let(:first_version_at) { post.last_version_at }
 
     subject { PostRevisor.new(post) }
diff --git a/spec/fabricators/post_fabricator.rb b/spec/fabricators/post_fabricator.rb
index 219a165..dfa1b1c 100644
--- a/spec/fabricators/post_fabricator.rb
+++ b/spec/fabricators/post_fabricator.rb
@@ -4,9 +4,14 @@ Fabricator(:post) do
   user
   topic { |attrs| Fabricate(:topic, user: attrs[:user]) }
   raw "Hello world"
+  skip_validation true
   post_type Post.types[:regular]
 end
 
+Fabricator(:post_with_validation, from: :post) do
+  skip_validation false
+end
+
 Fabricator(:post_with_long_raw_content, from: :post) do
   raw 'This is a sample post with semi-long raw content. The raw content is also more than
       two hundred characters to satisfy any test conditions that require content longer
diff --git a/spec/integration/watched_words_spec.rb b/spec/integration/watched_words_spec.rb
index d8d127e..126fff6 100644
--- a/spec/integration/watched_words_spec.rb
+++ b/spec/integration/watched_words_spec.rb
@@ -63,7 +63,7 @@ describe WatchedWord do
     end
 
     it "blocks on revisions" do
-      post = Fabricate(:post, topic: Fabricate(:topic, user: tl2_user), user: tl2_user)
+      post = Fabricate(:post_with_validation, topic: Fabricate(:topic, user: tl2_user), user: tl2_user)
       expect {
         PostRevisor.new(post).revise!(post.user, { raw: "Want some #{block_word.word} for cheap?" }, revised_at: post.updated_at + 10.seconds)
         expect(post.errors).to be_present
diff --git a/spec/models/post_spec.rb b/spec/models/post_spec.rb
index b6edeb6..261cc51 100644
--- a/spec/models/post_spec.rb
+++ b/spec/models/post_spec.rb
@@ -57,7 +57,7 @@ describe Post do
   def post_with_body(body, user = nil)
     args = post_args.merge(raw: body)
     args[:user] = user if user.present?
-    Fabricate.build(:post, args)
+    Fabricate.build(:post_with_validation, args)
   end
 
   it { is_expected.to validate_presence_of :raw }
@@ -79,7 +79,7 @@ describe Post do
     describe '#by_newest' do
       it 'returns posts ordered by created_at desc' do
         2.times do |t|
-          Fabricate(:post, created_at: t.seconds.from_now)
+          Fabricate(:post_with_validation, created_at: t.seconds.from_now)
         end
         expect(Post.by_newest.first.created_at).to be > Post.by_newest.last.created_at
       end
@@ -87,7 +87,7 @@ describe Post do
 
     describe '#with_user' do
       it 'gives you a user' do
-        Fabricate(:post, user: Fabricate.build(:user))
+        Fabricate(:post_with_validation, user: Fabricate.build(:user))
         expect(Post.with_user.first.user).to be_a User
       end
     end
@@ -97,7 +97,7 @@ describe Post do
   describe "revisions and deleting/recovery" do
 
     context 'a post without links' do
-      let(:post) { Fabricate(:post, post_args) }
+      let(:post) { Fabricate(:post_with_validation, post_args) }
 
       before do
         post.trash!
@@ -137,7 +137,7 @@ describe Post do
 
     context 'a post with notices' do
       let(:post) {
-        post = Fabricate(:post, post_args)
+        post = Fabricate(:post_with_validation, post_args)
         post.custom_fields["notice_type"] = Post.notices[:returning_user]
         post.custom_fields["notice_args"] = 1.day.ago
         post.save_custom_fields
@@ -155,7 +155,7 @@ describe Post do
   end
 
   describe 'flagging helpers' do
-    fab!(:post) { Fabricate(:post) }
+    fab!(:post) { Fabricate(:post_with_validation) }
     fab!(:user) { Fabricate(:coding_horror) }
     fab!(:admin) { Fabricate(:admin) }
 
@@ -198,7 +198,7 @@ describe Post do
 
   describe "maximum images" do
     fab!(:newuser) { Fabricate(:user, trust_level: TrustLevel[0]) }
-    let(:post_no_images) { Fabricate.build(:post, post_args.merge(user: newuser)) }
+    let(:post_no_images) { Fabricate.build(:post_with_validation, post_args.merge(user: newuser)) }
     let(:post_one_image) { post_with_body("![sherlock](http://bbc.co.uk/sherlock.jpg)", newuser) }
     let(:post_two_images) { post_with_body("<img src='http://discourse.org/logo.png'> <img src='http://bbc.co.uk/sherlock.jpg'>", newuser) }
     let(:post_with_avatars) { post_with_body('<img alt="smiley" title=":smiley:" src="/assets/emoji/smiley.png" class="avatar"> <img alt="wink" title=":wink:" src="/assets/emoji/wink.png" class="avatar">', newuser) }
@@ -210,7 +210,7 @@ describe Post do
     let(:post_with_two_classy_images) { post_with_body("<img src='http://discourse.org/logo.png' class='classy'> <img src='http://bbc.co.uk/sherlock.jpg' class='classy'>", newuser) }
 
     it "returns 0 images for an empty post" do
-      expect(Fabricate.build(:post).image_count).to eq(0)
+      expect(Fabricate.build(:post_with_validation).image_count).to eq(0)
     end
 
     it "finds images from markdown" do
@@ -312,12 +312,12 @@ describe Post do
 
   describe "maximum attachments" do
     fab!(:newuser) { Fabricate(:user, trust_level: TrustLevel[0]) }
-    let(:post_no_attachments) { Fabricate.build(:post, post_args.merge(user: newuser)) }
+    let(:post_no_attachments) { Fabricate.build(:post_with_validation, post_args.merge(user: newuser)) }
     let(:post_one_attachment) { post_with_body('<a class="attachment" href="/uploads/default/1/2082985.txt">file.txt</a>', newuser) }
     let(:post_two_attachments) { post_with_body('<a class="attachment" href="/uploads/default/2/20947092.log">errors.log</a> <a class="attachment" href="/uploads/default/3/283572385.3ds">model.3ds</a>', newuser) }
 
     it "returns 0 attachments for an empty post" do
-      expect(Fabricate.build(:post).attachment_count).to eq(0)
+      expect(Fabricate.build(:post_with_validation).attachment_count).to eq(0)
     end
 
     it "finds attachments from HTML" do
@@ -431,7 +431,7 @@ describe Post do
     let(:post_with_mentions) { post_with_body("hello @#{newuser.username} how are you doing?", newuser) }
 
     it "returns 0 links for an empty post" do
-      expect(Fabricate.build(:post).link_count).to eq(0)
+      expect(Fabricate.build(:post_with_validation).link_count).to eq(0)
     end
 
     it "returns 0 links for a post with mentions" do
@@ -503,44 +503,44 @@ describe Post do
     context 'raw_mentions' do
 
       it "returns an empty array with no matches" do
-        post = Fabricate.build(:post, post_args.merge(raw: "Hello Jake and Finn!"))
+        post = Fabricate.build(:post_with_validation, post_args.merge(raw: "Hello Jake and Finn!"))
         expect(post.raw_mentions).to eq([])
       end
 
       it "returns lowercase unique versions of the mentions" do
-        post = Fabricate.build(:post, post_args.merge(raw: "@Jake @Finn @Jake"))
+        post = Fabricate.build(:post_with_validation, post_args.merge(raw: "@Jake @Finn @Jake"))
         expect(post.raw_mentions).to eq(['jake', 'finn'])
       end
 
       it "ignores pre" do
         # we need to force an inline
-        post = Fabricate.build(:post, post_args.merge(raw: "p <pre>@Jake</pre> @Finn"))
+        post = Fabricate.build(:post_with_validation, post_args.merge(raw: "p <pre>@Jake</pre> @Finn"))
         expect(post.raw_mentions).to eq(['finn'])
       end
 
       it "catches content between pre tags" do
         # per common mark we need to force an inline
-        post = Fabricate.build(:post, post_args.merge(raw: "a <pre>hello</pre> @Finn <pre></pre>"))
+        post = Fabricate.build(:post_with_validation, post_args.merge(raw: "a <pre>hello</pre> @Finn <pre></pre>"))
         expect(post.raw_mentions).to eq(['finn'])
       end
 
       it "ignores code" do
-        post = Fabricate.build(:post, post_args.merge(raw: "@Jake `@Finn`"))

[... diff too long, it was truncated ...]

GitHub sha: 4da6ca4d

@danielwaterworth Can you verify again if there is indeed a 10% speed gain? I ran the tests locally and I’m not seeing the improvements here. The build times on our servers are not showing the same gains as well.

@SamSaffron I’m not really a fan of this change because we’re trading the validity of post records for speed here. We may end up testing against invalid post records which does not reflect the actual situation is production.

Yeah I think this 10% number is around the very specific case of fabricating a post.

I am though a fan of this, cause if you want a proper post don’t use a fabricator, use create_post which runs through PostCreator and creates a post properly.

Not a huge fan of post_with_validations cause its only sort of real.

Not following, right now our posts doesn’t run through the basic validations which means we cant be sure that they are valid post records. PostCreator and Fabricate(:post) both creates a post record properly but PostCreator does alot more special stuff that we may not need when we’re only interested in a post record. Plus if there isn’t a significant speed gain, I don’t think we should make this change.

That is my point, if we need a real post, use the creator, it does proper validation per: discourse/post_creator.rb at 4da6ca4d9fcaa3c91fc002feb71d93e40931ac5a · discourse/discourse · GitHub

I am uneasy with Fabricate(:sort_of_valid_sometime_post) it just feels incorrect to me.

I am OK with Fabricate(:fast_but_not_valid_post) for cases that we need speed.

How is the post not valid when it passes validation?

I’ll wait for @danielwaterworth to confirm but the data doesn’t show any significant speed gain here.

cause it is not running all the validation, some validation happens in PostCreator.

Those validations are for special cases where we don’t need them most of the time. The fact that those validations are not being run at the AR level shows the optionality of those validations.

The sacrificing of basic validations is odd to me. We can easily be asserting against a post with an invalid length.

The thing we first need to confirm though is whether there is any significant speed gain at all. If there isn’t, I don’t think we should make this tradeoff.

I kind of disagree… we are already asserting on a post:

  • without links extracted
  • without tracking state for the OP
  • without post links extracted

and the list goes on.

If anything my move here would be to move all these validations out of AR and into PostCreator, the current half/half design is a problem imo.

The current change here makes this worst so I don’t follow why we’re making this change. We went from some but incomplete validation to completely no validation which doesn’t make the test suite more reliable.

I am clocking ~2-3% improvement.

Why does search spec for example need any kind of validation here?

I can not see any examples where we are fabricating :post where we want to run this code code really:

And if we don’t care about this 99% of the time why can’t people use a different method when they actually do?

Our test suite should be fabricating objects in a way that is as close to production as possible. We’ve learnt this the hard way with uploads where we were using a totally random url format in our test cases and that bit us hard because we were asserting against an incorrect format when our code expects a certain format. There are so many possibilities in our test suite that I can’t say with confidence that we don’t need validations. I personally wouldn’t take a 7 seconds gain that brings our test suite further away than what is happening on production.

I am just uneasy with relying on luck here so much.

The idea here is that we are increasing safety and robustness of our test suite by running partial validations all the time.

The validations are super partial in the RateLimiters are disabled, Post processing never runs on the posts and the list goes on.

All sorts of edge cases exist that already bypass some validator, like this complex one:

That a side effect bypasses unique_post_validator

I guess we could just add to the soup here and put:

set_regardless_of_locale(:max_consecutive_replies, 0) which would give us similar savings.

I will do this for now I guess cause you feel so awesomely strong about this, but I think you have a strong illusion of safety here.

I don’t see the giant deal of Fabricate(:post, raw: 'X') working by default.

I think you’re missing my point. Our test suite may not be perfect with its partial validations but I don’t see how a 7 seconds gain by not running any validations is a worth while trade-off over what we have now.

I feel like what you’re saying here is as good as saying that we should just turn off validations for all fabricated objects by default. If we trust the developer to pass in any attribute when fabricating a post object without ensuring that it conforms to the rules of our validations, the same can be said for all our fabricated objects as well.

PERF: avoid checking for consecutive replies in test

I am basically saying I want cheap fixtures, getting there is a hard slog. Traditional Rails fixtures don’t work for us. What we want is magic validated fixtures that build object graphs.

I do think we have a pretty big problem in that devs need to think a lot about

  • Should I use create_post ?

or

  • Should I use Fabricate(:post) ?

In my ideal world this decision point would not exist. My thoughts are that if we already have this weird distinction, might as well make Fabricate(:post) a bit weaker.

I do think there is lots of merit in saying that remembering about post_with_validation is yet another decision point now and we are adding more pain to our decision salad. On that merit alone I am amending this.

So I get to keep my 5:58s test suite and status quo is more or less retained.

I get a consistent 8-9% difference locally. It brings the tests down to 11:30 for me. It could definitely be a processor cache size or memory bandwidth thing that causes me to get a more significant speed up than either of you do. The validation is quite expensive. To check the number of mentions, it parses markdown, generates html, generates javascript, parses the generated html, generates the html again, parses the same html again and finally runs a selector.

1 Like

@tgxworld, Your concern here is completely valid. It’s extremely important that Fabricated posts pass any validations that we could run on them. Otherwise the tests are working in a fantasy land like your upload scenario.

However, we generate >2000 posts in the test suite and most of them are almost entirely identical. So, it seems wasteful to me to run the validations for each post when we could do it once and never again. It’s also worth noting that many of the validations can effectively be disabled by choosing site settings that accommodate.

There are different schools of thought in testing, but my preference is for focussed tests that each make one assertion rather than long narratives. In my experience, it has been important for tests to be confident about exactly what they are testing, otherwise its impossible for you to know where the test suite’s blind spots are.

1 Like

The identical fabrications can be replaced with fixtures if we really must. However, skipping validations when custom attributes are being passed doesn’t ensure the validity of the post record.

That is fine though because it mirrors production where site settings can be tweaked and certain validations might not run as a result.

Doesn’t removing all validations reduces confidence and increases the number of blind spots as compared to just running the validations even if the validations are partial?