debian-mirror-gitlab/lib/gitlab/memory/watchdog.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

135 lines
3.5 KiB
Ruby
Raw Normal View History

2022-08-13 15:12:31 +05:30
# frozen_string_literal: true
module Gitlab
module Memory
2022-11-25 23:54:43 +05:30
# A background thread that monitors Ruby memory and calls
# into a handler when the Ruby process violates defined limits
# for an extended period of time.
2022-08-27 11:52:29 +05:30
class Watchdog
2022-08-13 15:12:31 +05:30
# This handler does nothing. It returns `false` to indicate to the
# caller that the situation has not been dealt with so it will
# receive calls repeatedly if fragmentation remains high.
#
# This is useful for "dress rehearsals" in production since it allows
# us to observe how frequently the handler is invoked before taking action.
class NullHandler
include Singleton
2022-10-11 01:57:18 +05:30
def call
2022-08-13 15:12:31 +05:30
# NOP
false
end
end
# This handler sends SIGTERM and considers the situation handled.
class TermProcessHandler
def initialize(pid = $$)
@pid = pid
end
2022-10-11 01:57:18 +05:30
def call
2022-08-13 15:12:31 +05:30
Process.kill(:TERM, @pid)
true
end
end
# This handler invokes Puma's graceful termination handler, which takes
# into account a configurable grace period during which a process may
# remain unresponsive to a SIGTERM.
class PumaHandler
def initialize(puma_options = ::Puma.cli_config.options)
@worker = ::Puma::Cluster::WorkerHandle.new(0, $$, 0, puma_options)
end
2022-10-11 01:57:18 +05:30
def call
2022-08-13 15:12:31 +05:30
@worker.term
true
end
end
2022-11-25 23:54:43 +05:30
def initialize
@configuration = Configuration.new
2022-08-13 15:12:31 +05:30
@alive = true
2022-10-11 01:57:18 +05:30
end
2023-01-13 00:05:48 +05:30
##
2023-03-04 22:38:38 +05:30
# Configuration for Watchdog, see Gitlab::Memory::Watchdog::Configurator
# for examples.
2022-11-25 23:54:43 +05:30
def configure
2023-03-04 22:38:38 +05:30
yield configuration
2022-10-11 01:57:18 +05:30
end
2022-08-13 15:12:31 +05:30
2022-08-27 11:52:29 +05:30
def call
2023-03-04 22:38:38 +05:30
event_reporter.started(log_labels)
2022-08-13 15:12:31 +05:30
while @alive
2022-11-25 23:54:43 +05:30
sleep(sleep_time_seconds)
2022-10-11 01:57:18 +05:30
2023-03-04 22:38:38 +05:30
monitor
2022-08-13 15:12:31 +05:30
end
2023-03-17 16:20:25 +05:30
event_reporter.stopped(log_labels(memwd_reason: @stop_reason).compact)
2022-08-13 15:12:31 +05:30
end
2023-03-17 16:20:25 +05:30
def stop
stop_working(reason: 'background task stopped')
2022-08-27 11:52:29 +05:30
end
2022-08-13 15:12:31 +05:30
private
2023-03-04 22:38:38 +05:30
attr_reader :configuration
delegate :event_reporter, :monitors, :sleep_time_seconds, to: :configuration
2022-11-25 23:54:43 +05:30
def monitor
2023-03-04 22:38:38 +05:30
if monitors.empty?
2023-03-17 16:20:25 +05:30
stop_working(reason: 'monitors are not configured')
2023-03-04 22:38:38 +05:30
return
end
monitors.call_each do |result|
2022-11-25 23:54:43 +05:30
break unless @alive
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
next unless result.threshold_violated?
2022-08-13 15:12:31 +05:30
2023-03-04 22:38:38 +05:30
event_reporter.threshold_violated(result.monitor_name)
2022-08-13 15:12:31 +05:30
2022-11-25 23:54:43 +05:30
next unless result.strikes_exceeded?
2022-10-11 01:57:18 +05:30
2023-03-04 22:38:38 +05:30
strike_exceeded_callback(result.monitor_name, result.payload)
2022-10-11 01:57:18 +05:30
end
end
2023-03-04 22:38:38 +05:30
def strike_exceeded_callback(monitor_name, monitor_payload)
event_reporter.strikes_exceeded(monitor_name, log_labels(monitor_payload))
Gitlab::Memory::Reports::HeapDump.enqueue!
2022-08-13 15:12:31 +05:30
2023-03-17 16:20:25 +05:30
stop_working(reason: 'successfully handled') if handler.call
2022-08-13 15:12:31 +05:30
end
def handler
# This allows us to keep the watchdog running but turn it into "friendly mode" where
# all that happens is we collect logs and Prometheus events for fragmentation violations.
return NullHandler.instance unless Feature.enabled?(:enforce_memory_watchdog, type: :ops)
2023-03-04 22:38:38 +05:30
configuration.handler
2022-11-25 23:54:43 +05:30
end
2023-03-04 22:38:38 +05:30
def log_labels(extra = {})
extra.merge(
2022-08-13 15:12:31 +05:30
memwd_handler_class: handler.class.name,
2023-03-04 22:38:38 +05:30
memwd_sleep_time_s: sleep_time_seconds
2022-08-13 15:12:31 +05:30
)
end
2023-03-17 16:20:25 +05:30
def stop_working(reason:)
return unless @alive
@stop_reason = reason
@alive = false
end
2022-08-13 15:12:31 +05:30
end
end
end