debian-mirror-gitlab/lib/gitlab/diff/highlight_cache.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

236 lines
7.4 KiB
Ruby
Raw Normal View History

2018-11-20 20:47:30 +05:30
# frozen_string_literal: true
#
module Gitlab
module Diff
class HighlightCache
2020-11-24 15:15:51 +05:30
include Gitlab::Utils::Gzip
2020-01-01 13:55:28 +05:30
include Gitlab::Utils::StrongMemoize
2022-10-11 01:57:18 +05:30
EXPIRATION = 1.hour
2021-04-17 20:07:23 +05:30
VERSION = 2
2020-01-01 13:55:28 +05:30
delegate :diffable, to: :@diff_collection
2018-11-20 20:47:30 +05:30
delegate :diff_options, to: :@diff_collection
2020-01-01 13:55:28 +05:30
def initialize(diff_collection)
2018-11-20 20:47:30 +05:30
@diff_collection = diff_collection
end
# - Reads from cache
# - Assigns DiffFile#highlighted_diff_lines for cached files
2020-01-01 13:55:28 +05:30
#
2018-11-20 20:47:30 +05:30
def decorate(diff_file)
2021-01-03 14:25:43 +05:30
content = read_file(diff_file)
return [] unless content
# TODO: We could add some kind of flag to #initialize that would allow
# us to force re-caching
# https://gitlab.com/gitlab-org/gitlab/-/issues/263508
#
if content.empty? && recache_due_to_size?(diff_file)
# If the file is missing from the cache and there's reason to believe
# it is uncached due to a size issue around changing the values for
# max patch size, manually populate the hash and then set the value.
#
new_cache_content = {}
new_cache_content[diff_file.file_path] = diff_file.highlighted_diff_lines.map(&:to_hash)
write_to_redis_hash(new_cache_content)
set_highlighted_diff_lines(diff_file, read_file(diff_file))
else
set_highlighted_diff_lines(diff_file, content)
2018-11-20 20:47:30 +05:30
end
end
2020-01-01 13:55:28 +05:30
# For every file that isn't already contained in the redis hash, store the
# result of #highlighted_diff_lines, then submit the uncached content
# to #write_to_redis_hash to submit a single write. This avoids excessive
# IO generated by N+1's (1 writing for each highlighted line or file).
#
2018-11-20 20:47:30 +05:30
def write_if_empty
2020-01-01 13:55:28 +05:30
return if cacheable_files.empty?
2018-11-20 20:47:30 +05:30
2020-01-01 13:55:28 +05:30
new_cache_content = {}
2018-11-20 20:47:30 +05:30
2020-01-01 13:55:28 +05:30
cacheable_files.each do |diff_file|
new_cache_content[diff_file.file_path] = diff_file.highlighted_diff_lines.map(&:to_hash)
2018-11-20 20:47:30 +05:30
end
2020-01-01 13:55:28 +05:30
write_to_redis_hash(new_cache_content)
2018-11-20 20:47:30 +05:30
end
def clear
2023-01-13 00:05:48 +05:30
with_redis do |redis|
2020-01-01 13:55:28 +05:30
redis.del(key)
end
2018-11-20 20:47:30 +05:30
end
def key
2020-01-01 13:55:28 +05:30
strong_memoize(:redis_key) do
2022-08-27 11:52:29 +05:30
options = [
2021-04-17 20:07:23 +05:30
diff_options,
2022-07-16 23:28:13 +05:30
Feature.enabled?(:diff_line_syntax_highlighting, diffable.project)
2022-08-27 11:52:29 +05:30
]
options_for_key = OpenSSL::Digest::SHA256.hexdigest(options.join)
['highlighted-diff-files', diffable.cache_key, VERSION, options_for_key].join(":")
2020-01-01 13:55:28 +05:30
end
2018-11-20 20:47:30 +05:30
end
private
2021-01-03 14:25:43 +05:30
def set_highlighted_diff_lines(diff_file, content)
diff_file.highlighted_diff_lines = content.map do |line|
Gitlab::Diff::Line.safe_init_from_hash(line)
end
end
def recache_due_to_size?(diff_file)
diff_file_class = diff_file.diff.class
current_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes
default_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes(diff_file_class::DEFAULT_MAX_PATCH_BYTES)
# If the diff is >= than the default limit, but less than the current
# limit, it is likely uncached due to having hit the default limit,
# making it eligible for recalculating.
#
diff_file.diff.diff_bytesize.between?(
default_patch_safe_limit_bytes,
current_patch_safe_limit_bytes
)
end
2020-01-01 13:55:28 +05:30
def cacheable_files
strong_memoize(:cacheable_files) do
diff_files.select { |file| cacheable?(file) && read_file(file).nil? }
end
2018-11-20 20:47:30 +05:30
end
2020-01-01 13:55:28 +05:30
# Given a hash of:
# { "file/to/cache" =>
# [ { line_code: "a5cc2925ca8258af241be7e5b0381edf30266302_19_19",
# rich_text: " <span id=\"LC19\" class=\"line\" lang=\"plaintext\">config/initializers/secret_token.rb</span>\n",
# text: " config/initializers/secret_token.rb",
# type: nil,
# index: 3,
# old_pos: 19,
# new_pos: 19 }
# ] }
#
# ...it will write/update a Gitlab::Redis hash (HSET)
#
def write_to_redis_hash(hash)
2023-01-13 00:05:48 +05:30
with_redis do |redis|
2022-10-11 01:57:18 +05:30
redis.pipelined do |pipeline|
2020-01-01 13:55:28 +05:30
hash.each do |diff_file_id, highlighted_diff_lines_hash|
2022-10-11 01:57:18 +05:30
pipeline.hset(
2020-04-22 19:07:51 +05:30
key,
diff_file_id,
2020-11-24 15:15:51 +05:30
gzip_compress(highlighted_diff_lines_hash.to_json)
2020-04-22 19:07:51 +05:30
)
2023-01-13 00:05:48 +05:30
rescue Encoding::UndefinedConversionError, EncodingError, JSON::GeneratorError
2021-11-11 11:23:49 +05:30
nil
2020-01-01 13:55:28 +05:30
end
# HSETs have to have their expiration date manually updated
2022-11-25 23:54:43 +05:30
pipeline.expire(key, EXPIRATION)
2020-01-01 13:55:28 +05:30
end
2020-04-08 14:13:33 +05:30
record_memory_usage(fetch_memory_usage(redis, key))
2020-01-01 13:55:28 +05:30
end
# Subsequent read_file calls would need the latest cache.
#
clear_memoization(:cached_content)
clear_memoization(:cacheable_files)
end
2020-04-08 14:13:33 +05:30
def record_memory_usage(memory_usage)
if memory_usage
2020-10-24 23:57:45 +05:30
current_transaction&.observe(:gitlab_redis_diff_caching_memory_usage_bytes, memory_usage) do
docstring 'Redis diff caching memory usage by key'
buckets [100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000]
end
2020-04-08 14:13:33 +05:30
end
end
def fetch_memory_usage(redis, key)
# Redis versions prior to 4.0.0 do not support memory usage reporting
# for a specific key. As of 11-March-2020 we support Redis 3.x, so
# need to account for this. We can remove this check once we
# officially cease supporting versions <4.0.0.
#
return if Gem::Version.new(redis.info["redis_version"]) < Gem::Version.new("4")
redis.memory("USAGE", key)
end
2020-01-01 13:55:28 +05:30
def file_paths
strong_memoize(:file_paths) do
diff_files.collect(&:file_path)
end
end
def read_file(diff_file)
cached_content[diff_file.file_path]
2018-11-20 20:47:30 +05:30
end
def cached_content
2020-01-01 13:55:28 +05:30
strong_memoize(:cached_content) { read_cache }
end
def read_cache
return {} unless file_paths.any?
results = []
2022-11-25 23:54:43 +05:30
cache_key = key # Moving out redis calls for feature flags out of redis.pipelined
2020-01-01 13:55:28 +05:30
2023-01-13 00:05:48 +05:30
with_redis do |redis|
2022-10-11 01:57:18 +05:30
redis.pipelined do |pipeline|
results = pipeline.hmget(cache_key, file_paths)
2022-11-25 23:54:43 +05:30
pipeline.expire(key, EXPIRATION)
2022-10-11 01:57:18 +05:30
end
2020-01-01 13:55:28 +05:30
end
2022-10-11 01:57:18 +05:30
results = results.value
2022-08-13 15:12:31 +05:30
record_hit_ratio(results)
2020-01-01 13:55:28 +05:30
results.map! do |result|
2020-11-24 15:15:51 +05:30
Gitlab::Json.parse(gzip_decompress(result), symbolize_names: true) unless result.nil?
2020-01-01 13:55:28 +05:30
end
file_paths.zip(results).to_h
2018-11-20 20:47:30 +05:30
end
def cacheable?(diff_file)
diffable.present? && diff_file.text? && diff_file.diffable?
end
2020-01-01 13:55:28 +05:30
def diff_files
# We access raw_diff_files here, as diff_files will attempt to apply the
# highlighting code found in this class, leading to a circular
# reference.
#
@diff_collection.raw_diff_files
end
2020-10-24 23:57:45 +05:30
def current_transaction
2021-12-11 22:18:48 +05:30
::Gitlab::Metrics::WebTransaction.current
2020-10-24 23:57:45 +05:30
end
2022-08-13 15:12:31 +05:30
2023-01-13 00:05:48 +05:30
def with_redis(&block)
Gitlab::Redis::Cache.with(&block) # rubocop:disable CodeReuse/ActiveRecord
end
2022-08-13 15:12:31 +05:30
def record_hit_ratio(results)
current_transaction&.increment(:gitlab_redis_diff_caching_requests_total)
current_transaction&.increment(:gitlab_redis_diff_caching_hits_total) if results.any?(&:present?)
end
2018-11-20 20:47:30 +05:30
end
end
end