debian-mirror-gitlab/spec/lib/gitlab/memory/watchdog_spec.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

288 lines
8.6 KiB
Ruby
Raw Normal View History

2022-08-13 15:12:31 +05:30
# frozen_string_literal: true
require 'spec_helper'
2022-11-25 23:54:43 +05:30
RSpec.describe Gitlab::Memory::Watchdog, :aggregate_failures do
2022-08-13 15:12:31 +05:30
context 'watchdog' do
2022-11-25 23:54:43 +05:30
let(:configuration) { instance_double(described_class::Configuration) }
2022-08-13 15:12:31 +05:30
let(:handler) { instance_double(described_class::NullHandler) }
2022-11-25 23:54:43 +05:30
let(:logger) { instance_double(::Logger) }
let(:sleep_time_seconds) { 60 }
let(:threshold_violated) { false }
2022-10-11 01:57:18 +05:30
let(:violations_counter) { instance_double(::Prometheus::Client::Counter) }
let(:violations_handled_counter) { instance_double(::Prometheus::Client::Counter) }
2022-08-27 11:52:29 +05:30
let(:watchdog_iterations) { 1 }
2022-11-25 23:54:43 +05:30
let(:name) { :monitor_name }
let(:payload) { { message: 'dummy_text' } }
let(:max_strikes) { 2 }
let(:monitor_class) do
Struct.new(:threshold_violated, :payload) do
def call
{ threshold_violated: threshold_violated, payload: payload }
end
def self.name
'MonitorName'
end
end
end
2022-08-27 11:52:29 +05:30
2022-08-13 15:12:31 +05:30
subject(:watchdog) do
2022-11-25 23:54:43 +05:30
described_class.new.tap do |instance|
# We need to defuse `sleep` and stop the internal loop after 1 iteration
2022-08-27 11:52:29 +05:30
iterations = 0
2022-10-11 01:57:18 +05:30
allow(instance).to receive(:sleep) do
instance.stop if (iterations += 1) > watchdog_iterations
end
2022-08-27 11:52:29 +05:30
end
end
def stub_prometheus_metrics
allow(Gitlab::Metrics).to receive(:counter)
2022-10-11 01:57:18 +05:30
.with(:gitlab_memwd_violations_total, anything, anything)
.and_return(violations_counter)
2022-08-27 11:52:29 +05:30
allow(Gitlab::Metrics).to receive(:counter)
2022-10-11 01:57:18 +05:30
.with(:gitlab_memwd_violations_handled_total, anything, anything)
.and_return(violations_handled_counter)
2022-08-27 11:52:29 +05:30
2022-10-11 01:57:18 +05:30
allow(violations_counter).to receive(:increment)
allow(violations_handled_counter).to receive(:increment)
2022-08-13 15:12:31 +05:30
end
2022-11-25 23:54:43 +05:30
describe '#initialize' do
it 'initialize new configuration' do
expect(described_class::Configuration).to receive(:new)
2022-08-27 11:52:29 +05:30
watchdog
2022-08-13 15:12:31 +05:30
end
end
2022-11-25 23:54:43 +05:30
describe '#call' do
before do
stub_prometheus_metrics
allow(Gitlab::Metrics::System).to receive(:memory_usage_rss).at_least(:once).and_return(1024)
allow(::Prometheus::PidProvider).to receive(:worker_id).and_return('worker_1')
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
watchdog.configure do |config|
config.handler = handler
config.logger = logger
config.sleep_time_seconds = sleep_time_seconds
config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes
2022-08-13 15:12:31 +05:30
end
2022-11-25 23:54:43 +05:30
allow(handler).to receive(:call).and_return(true)
allow(logger).to receive(:info)
allow(logger).to receive(:warn)
2022-08-13 15:12:31 +05:30
end
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
it 'logs start message once' do
expect(logger).to receive(:info).once
.with(
pid: Process.pid,
worker_id: 'worker_1',
memwd_handler_class: handler.class.name,
memwd_sleep_time_s: sleep_time_seconds,
memwd_rss_bytes: 1024,
message: 'started')
2022-10-11 01:57:18 +05:30
watchdog.call
end
2022-11-25 23:54:43 +05:30
it 'waits for check interval seconds' do
expect(watchdog).to receive(:sleep).with(sleep_time_seconds)
2022-10-11 01:57:18 +05:30
watchdog.call
end
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
context 'when gitlab_memory_watchdog ops toggle is off' do
2022-10-11 01:57:18 +05:30
before do
2022-11-25 23:54:43 +05:30
stub_feature_flags(gitlab_memory_watchdog: false)
2022-10-11 01:57:18 +05:30
end
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
it 'does not trigger any monitor' do
expect(configuration).not_to receive(:monitors)
2022-10-11 01:57:18 +05:30
end
end
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
context 'when process does not exceed threshold' do
it 'does not increment violations counters' do
expect(violations_counter).not_to receive(:increment)
expect(violations_handled_counter).not_to receive(:increment)
2022-10-11 01:57:18 +05:30
watchdog.call
2022-08-13 15:12:31 +05:30
end
2022-11-25 23:54:43 +05:30
it 'does not log violation' do
expect(logger).not_to receive(:warn)
2022-08-13 15:12:31 +05:30
2022-08-27 11:52:29 +05:30
watchdog.call
2022-08-13 15:12:31 +05:30
end
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
it 'does not execute handler' do
2022-10-11 01:57:18 +05:30
expect(handler).not_to receive(:call)
2022-08-13 15:12:31 +05:30
2022-08-27 11:52:29 +05:30
watchdog.call
2022-08-13 15:12:31 +05:30
end
2022-10-11 01:57:18 +05:30
end
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
context 'when process exceeds threshold' do
let(:threshold_violated) { true }
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
it 'increments violations counter' do
expect(violations_counter).to receive(:increment).with(reason: name)
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
watchdog.call
end
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
context 'when process does not exceed the allowed number of strikes' do
it 'does not increment handled violations counter' do
expect(violations_handled_counter).not_to receive(:increment)
2022-08-13 15:12:31 +05:30
2022-08-27 11:52:29 +05:30
watchdog.call
2022-08-13 15:12:31 +05:30
end
2022-11-25 23:54:43 +05:30
it 'does not log violation' do
expect(logger).not_to receive(:warn)
2022-08-13 15:12:31 +05:30
2022-08-27 11:52:29 +05:30
watchdog.call
end
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
it 'does not execute handler' do
expect(handler).not_to receive(:call)
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
watchdog.call
end
2022-10-11 01:57:18 +05:30
end
2022-11-25 23:54:43 +05:30
context 'when monitor exceeds the allowed number of strikes' do
let(:max_strikes) { 0 }
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
it 'increments handled violations counter' do
expect(violations_handled_counter).to receive(:increment).with(reason: name)
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
watchdog.call
end
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
it 'logs violation' do
expect(logger).to receive(:warn)
.with(
pid: Process.pid,
worker_id: 'worker_1',
memwd_handler_class: handler.class.name,
memwd_sleep_time_s: sleep_time_seconds,
memwd_rss_bytes: 1024,
memwd_cur_strikes: 1,
memwd_max_strikes: max_strikes,
message: 'dummy_text')
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
watchdog.call
end
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
it 'executes handler' do
expect(handler).to receive(:call)
2022-08-13 15:12:31 +05:30
2022-08-27 11:52:29 +05:30
watchdog.call
end
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
context 'when enforce_memory_watchdog ops toggle is off' do
before do
stub_feature_flags(enforce_memory_watchdog: false)
end
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
it 'always uses the NullHandler' do
expect(handler).not_to receive(:call)
expect(described_class::NullHandler.instance).to receive(:call).and_return(true)
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
watchdog.call
end
end
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
context 'when multiple monitors exceeds allowed number of strikes' do
before do
watchdog.configure do |config|
config.handler = handler
config.logger = logger
config.sleep_time_seconds = sleep_time_seconds
config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes
config.monitors.use monitor_class, threshold_violated, payload, max_strikes: max_strikes
end
end
it 'only calls the handler once' do
expect(handler).to receive(:call).once.and_return(true)
watchdog.call
end
end
2022-10-11 01:57:18 +05:30
end
2022-08-13 15:12:31 +05:30
end
2022-10-11 01:57:18 +05:30
2022-11-25 23:54:43 +05:30
it 'logs stop message once' do
expect(logger).to receive(:info).once
.with(
pid: Process.pid,
worker_id: 'worker_1',
memwd_handler_class: handler.class.name,
memwd_sleep_time_s: sleep_time_seconds,
memwd_rss_bytes: 1024,
message: 'stopped')
2022-08-13 15:12:31 +05:30
2022-08-27 11:52:29 +05:30
watchdog.call
2022-08-13 15:12:31 +05:30
end
end
2022-11-25 23:54:43 +05:30
describe '#configure' do
it 'yields block' do
expect { |b| watchdog.configure(&b) }.to yield_control
2022-10-11 01:57:18 +05:30
end
2022-08-13 15:12:31 +05:30
end
end
context 'handlers' do
context 'NullHandler' do
subject(:handler) { described_class::NullHandler.instance }
2022-10-11 01:57:18 +05:30
describe '#call' do
2022-08-13 15:12:31 +05:30
it 'does nothing' do
2022-10-11 01:57:18 +05:30
expect(handler.call).to be(false)
2022-08-13 15:12:31 +05:30
end
end
end
context 'TermProcessHandler' do
subject(:handler) { described_class::TermProcessHandler.new(42) }
2022-10-11 01:57:18 +05:30
describe '#call' do
2022-08-13 15:12:31 +05:30
it 'sends SIGTERM to the current process' do
expect(Process).to receive(:kill).with(:TERM, 42)
2022-10-11 01:57:18 +05:30
expect(handler.call).to be(true)
2022-08-13 15:12:31 +05:30
end
end
end
context 'PumaHandler' do
# rubocop: disable RSpec/VerifiedDoubles
# In tests, the Puma constant is not loaded so we cannot make this an instance_double.
let(:puma_worker_handle_class) { double('Puma::Cluster::WorkerHandle') }
let(:puma_worker_handle) { double('worker') }
# rubocop: enable RSpec/VerifiedDoubles
subject(:handler) { described_class::PumaHandler.new({}) }
before do
stub_const('::Puma::Cluster::WorkerHandle', puma_worker_handle_class)
end
2022-10-11 01:57:18 +05:30
describe '#call' do
2022-08-13 15:12:31 +05:30
it 'invokes orderly termination via Puma API' do
expect(puma_worker_handle_class).to receive(:new).and_return(puma_worker_handle)
expect(puma_worker_handle).to receive(:term)
2022-10-11 01:57:18 +05:30
expect(handler.call).to be(true)
2022-08-13 15:12:31 +05:30
end
end
end
end
end