diff --git a/CHANGELOG.md b/CHANGELOG.md index c000d7124..208dd60c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Unreleased +# 0.45.3 +* Retry CreateOnGithubJob on transient GitHub authentication failures. +* Stabilize PerformTaskJob tests by stubbing the task execution strategy instead of Command#stream!. + # 0.45.2 * (bugfix) Fix 404 error when removing all permissions from an API client diff --git a/Gemfile.lock b/Gemfile.lock index bfa4428da..58676efb7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - shipit-engine (0.45.2) + shipit-engine (0.45.3) active_model_serializers (~> 0.9.3) ansi_stream (~> 0.0.6) autoprefixer-rails (~> 6.4.1) diff --git a/app/jobs/shipit/create_on_github_job.rb b/app/jobs/shipit/create_on_github_job.rb index 2d75616e6..e11ba4d22 100644 --- a/app/jobs/shipit/create_on_github_job.rb +++ b/app/jobs/shipit/create_on_github_job.rb @@ -7,6 +7,17 @@ class CreateOnGithubJob < BackgroundJob queue_as :default on_duplicate :drop + # Transient Octokit::Unauthorized = GitHub installation-token propagation lag. + # attempts: 14 (~24h) outlasts the 50m token cache (GITHUB_TOKEN_RAILS_CACHE_LIFETIME). + # No token eviction here to avoid a remint storm across workers. + retry_on Octokit::Unauthorized, wait: :polynomially_longer, attempts: 14 do |job, exception| + record = job.arguments.first + Rails.logger.warn( + "[CreateOnGithubJob] Giving up on #{record.class.name} #{record.id} " \ + "after GitHub authentication failures: #{exception.class} #{exception.message}" + ) + end + # We observe that some objects regularly take longer than the default 10 seconds to create, e.g. deployments self.timeout = 40 self.lock_timeout = 20 diff --git a/lib/shipit/version.rb b/lib/shipit/version.rb index b67b2d43b..fd6a7eb91 100644 --- a/lib/shipit/version.rb +++ b/lib/shipit/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Shipit - VERSION = '0.45.2' + VERSION = '0.45.3' end diff --git a/test/jobs/perform_task_job_test.rb b/test/jobs/perform_task_job_test.rb index 45f418f07..3dd81ef94 100644 --- a/test/jobs/perform_task_job_test.rb +++ b/test/jobs/perform_task_job_test.rb @@ -107,7 +107,7 @@ def success? end test "mark deploy as error an unexpected exception is raised" do - Command.any_instance.expects(:stream!).at_least_once.raises(Command::Denied) + Shipit::TaskExecutionStrategy::Default.any_instance.expects(:capture!).at_least_once.raises(Command::Denied) @job.perform(@deploy) @@ -116,7 +116,7 @@ def success? end test "mark deploy as timedout if a command timeout" do - Command.any_instance.expects(:stream!).at_least_once.raises(Command::TimedOut) + Shipit::TaskExecutionStrategy::Default.any_instance.expects(:capture!).at_least_once.raises(Command::TimedOut) @job.perform(@deploy) @@ -129,7 +129,7 @@ def success? begin Shipit.timeout_exit_codes = [70].freeze - Command.any_instance.expects(:stream!).at_least_once.raises(Command::Failed.new('Blah', 70)) + Shipit::TaskExecutionStrategy::Default.any_instance.expects(:capture!).at_least_once.raises(Command::Failed.new('Blah', 70)) @job.perform(@deploy) diff --git a/test/jobs/shipit/create_on_github_job_test.rb b/test/jobs/shipit/create_on_github_job_test.rb new file mode 100644 index 000000000..907b409b8 --- /dev/null +++ b/test/jobs/shipit/create_on_github_job_test.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require 'test_helper' + +module Shipit + class CreateOnGithubJobTest < ActiveSupport::TestCase + setup do + @deployment = shipit_commit_deployments(:shipit_pending_fourth) + end + + test "#perform retries on GitHub authentication errors" do + CommitDeployment.any_instance.stubs(:create_on_github!).raises(Octokit::Unauthorized) + + assert_enqueued_with(job: CreateOnGithubJob) do + CreateOnGithubJob.perform_now(@deployment) + end + end + + test "#perform gives up without re-raising after exhausting authentication retries" do + CommitDeployment.any_instance.stubs(:create_on_github!).raises(Octokit::Unauthorized) + Rails.logger.stubs(:warn) + + job = CreateOnGithubJob.new(@deployment) + job.exception_executions = { "[Octokit::Unauthorized]" => 13 } + + assert_nothing_raised { job.perform_now } + end + end +end