68 lines
2.2 KiB
Ruby
68 lines
2.2 KiB
Ruby
|
# frozen_string_literal: true
|
||
|
|
||
|
module Gitlab
|
||
|
module Memory
|
||
|
module Reports
|
||
|
class JemallocStats
|
||
|
# On prod, Jemalloc reports sizes were ~2.5 MB:
|
||
|
# https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/15993#note_1014767214
|
||
|
# We configured 1GB emptyDir per pod:
|
||
|
# https://gitlab.com/gitlab-com/gl-infra/k8s-workloads/gitlab-com/-/merge_requests/1949
|
||
|
# The pod will be evicted when the size limit is exceeded. We never want this to happen, for availability.
|
||
|
#
|
||
|
# With the default, we have a headroom (250*2.5MB=625<1000 MB) to fit into configured emptyDir.
|
||
|
# It would allow us to keep 3+ days worth of reports for 6 workers running every 2 hours: 3*6*12=216<250
|
||
|
#
|
||
|
# The cleanup logic will be redundant after we'll implement the uploads, which would perform the cleanup.
|
||
|
DEFAULT_MAX_REPORTS_STORED = 250
|
||
|
|
||
|
def initialize(reports_path:)
|
||
|
@reports_path = reports_path
|
||
|
end
|
||
|
|
||
|
def run
|
||
|
return unless active?
|
||
|
|
||
|
Gitlab::Memory::Jemalloc.dump_stats(path: reports_path, filename_label: worker_id).tap { cleanup }
|
||
|
end
|
||
|
|
||
|
def active?
|
||
|
Feature.enabled?(:report_jemalloc_stats, type: :ops)
|
||
|
end
|
||
|
|
||
|
private
|
||
|
|
||
|
attr_reader :reports_path
|
||
|
|
||
|
def cleanup
|
||
|
reports_files_modified_order[0...-max_reports_stored].each do |f|
|
||
|
File.unlink(f) if File.exist?(f)
|
||
|
rescue Errno::ENOENT
|
||
|
# Path does not exist: Ignore. We already check `File.exist?`
|
||
|
# Rescue to be extra safe, because each worker could perform a cleanup
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def reports_files_modified_order
|
||
|
pattern = File.join(reports_path, "#{Gitlab::Memory::Jemalloc::FILENAME_PREFIX}*")
|
||
|
|
||
|
Dir.glob(pattern).sort_by do |f|
|
||
|
test('M', f)
|
||
|
rescue Errno::ENOENT
|
||
|
# Path does not exist: Return any timestamp to proceed with the sort
|
||
|
Time.current
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def worker_id
|
||
|
::Prometheus::PidProvider.worker_id
|
||
|
end
|
||
|
|
||
|
def max_reports_stored
|
||
|
ENV["GITLAB_DIAGNOSTIC_REPORTS_JEMALLOC_MAX_REPORTS_STORED"] || DEFAULT_MAX_REPORTS_STORED
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|