debian-mirror-gitlab/lib/gitlab/git/blob.rb

235 lines
7.4 KiB
Ruby
Raw Normal View History

2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
2017-09-10 17:25:29 +05:30
2017-08-17 22:00:37 +05:30
module Gitlab
module Git
class Blob
2018-12-13 13:39:08 +05:30
include Gitlab::BlobHelper
2017-09-10 17:25:29 +05:30
include Gitlab::EncodingHelper
2018-12-13 13:39:08 +05:30
extend Gitlab::Git::WrapsGitalyErrors
2017-08-17 22:00:37 +05:30
# This number is the maximum amount of data that we want to display to
2018-12-13 13:39:08 +05:30
# the user. We load as much as we can for encoding detection and LFS
# pointer parsing. All other cases where we need full blob data should
# use load_all_data!.
2017-08-17 22:00:37 +05:30
MAX_DATA_DISPLAY_SIZE = 10.megabytes
2020-03-13 15:44:24 +05:30
# The number of blobs loaded in a single Gitaly call
# When a large number of blobs requested, we'd want to fetch them in
# multiple Gitaly calls
BATCH_SIZE = 250
2018-03-17 18:26:18 +05:30
# These limits are used as a heuristic to ignore files which can't be LFS
# pointers. The format of these is described in
# https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer
LFS_POINTER_MIN_SIZE = 120.bytes
LFS_POINTER_MAX_SIZE = 200.bytes
2022-01-26 12:08:38 +05:30
attr_accessor :size, :mode, :id, :commit_id, :loaded_size, :binary
2020-10-24 23:57:45 +05:30
attr_writer :name, :path, :data
2017-08-17 22:00:37 +05:30
2020-10-24 23:57:45 +05:30
def self.gitlab_blob_truncated_true
@gitlab_blob_truncated_true ||= ::Gitlab::Metrics.counter(:gitlab_blob_truncated_true, 'blob.truncated? == true')
2020-03-13 15:44:24 +05:30
end
2020-10-24 23:57:45 +05:30
def self.gitlab_blob_truncated_false
@gitlab_blob_truncated_false ||= ::Gitlab::Metrics.counter(:gitlab_blob_truncated_false, 'blob.truncated? == false')
2020-03-13 15:44:24 +05:30
end
2020-10-24 23:57:45 +05:30
def self.gitlab_blob_size
@gitlab_blob_size ||= ::Gitlab::Metrics.histogram(
:gitlab_blob_size,
'Gitlab::Git::Blob size',
{},
[1_000, 5_000, 10_000, 50_000, 100_000, 500_000, 1_000_000]
)
2020-04-08 14:13:33 +05:30
end
2017-08-17 22:00:37 +05:30
class << self
2018-11-08 19:23:39 +05:30
def find(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
2019-05-03 19:53:19 +05:30
tree_entry(repository, sha, path, limit)
end
def tree_entry(repository, sha, path, limit)
2018-11-08 19:23:39 +05:30
return unless path
path = path.sub(%r{\A/*}, '')
path = '/' if path.empty?
name = File.basename(path)
# Gitaly will think that setting the limit to 0 means unlimited, while
# the client might only need the metadata and thus set the limit to 0.
# In this method we'll then set the limit to 1, but clear the byte of data
# that we got back so for the outside world it looks like the limit was
# actually 0.
req_limit = limit == 0 ? 1 : limit
entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, req_limit)
return unless entry
entry.data = "" if limit == 0
case entry.type
when :COMMIT
new(id: entry.oid, name: name, size: 0, data: '', path: path, commit_id: sha)
when :BLOB
new(id: entry.oid, name: name, size: entry.size, data: entry.data.dup, mode: entry.mode.to_s(8),
path: path, commit_id: sha, binary: binary?(entry.data))
2017-09-10 17:25:29 +05:30
end
end
2017-08-17 22:00:37 +05:30
2021-10-27 15:23:28 +05:30
def raw(repository, sha, limit: MAX_DATA_DISPLAY_SIZE)
repository.gitaly_blob_client.get_blob(oid: sha, limit: limit)
2017-08-17 22:00:37 +05:30
end
2017-09-10 17:25:29 +05:30
# Returns an array of Blob instances, specified in blob_references as
# [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
# full blob contents are returned. If blob_size_limit >= 0 then each blob will
# contain no more than limit bytes in its data attribute.
2018-03-17 18:26:18 +05:30
#
2017-09-10 17:25:29 +05:30
# Keep in mind that this method may allocate a lot of memory. It is up
# to the caller to limit the number of blobs and blob_size_limit.
#
2018-03-17 18:26:18 +05:30
def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE)
2020-04-22 19:07:51 +05:30
blob_references.each_slice(BATCH_SIZE).flat_map do |refs|
repository.gitaly_blob_client.get_blobs(refs, blob_size_limit).to_a
2020-03-13 15:44:24 +05:30
end
2018-03-17 18:26:18 +05:30
end
2018-10-15 14:42:47 +05:30
# Returns an array of Blob instances just with the metadata, that means
# the data attribute has no content.
def batch_metadata(repository, blob_references)
batch(repository, blob_references, blob_size_limit: 0)
end
2018-03-17 18:26:18 +05:30
# Find LFS blobs given an array of sha ids
# Returns array of Gitlab::Git::Blob
# Does not guarantee blob data will be set
def batch_lfs_pointers(repository, blob_ids)
2018-12-13 13:39:08 +05:30
wrapped_gitaly_errors do
2018-11-18 11:00:15 +05:30
repository.gitaly_blob_client.batch_lfs_pointers(blob_ids.to_a)
2017-09-10 17:25:29 +05:30
end
2017-08-17 22:00:37 +05:30
end
2021-06-08 01:23:25 +05:30
def binary?(data, cache_key: nil)
EncodingHelper.detect_libgit2_binary?(data, cache_key: cache_key)
2018-03-17 18:26:18 +05:30
end
def size_could_be_lfs?(size)
size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE)
end
2017-08-17 22:00:37 +05:30
end
def initialize(options)
%w(id name path size data mode commit_id binary).each do |key|
2018-03-17 18:26:18 +05:30
self.__send__("#{key}=", options[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend
2017-08-17 22:00:37 +05:30
end
# Retain the actual size before it is encoded
@loaded_size = @data.bytesize if @data
2018-03-27 19:54:05 +05:30
@loaded_all_data = @loaded_size == size
2020-04-08 14:13:33 +05:30
2022-08-13 15:12:31 +05:30
# Recalculate binary status if we loaded all data
@binary = nil if @loaded_all_data
2020-04-08 14:13:33 +05:30
record_metric_blob_size
record_metric_truncated(truncated?)
2017-08-17 22:00:37 +05:30
end
2019-02-15 15:39:39 +05:30
def binary_in_repo?
2017-08-17 22:00:37 +05:30
@binary.nil? ? super : @binary == true
end
def data
encode! @data
end
# Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
# memory as a Ruby string.
def load_all_data!(repository)
return if @data == '' # don't mess with submodule blobs
2020-03-13 15:44:24 +05:30
# Even if we return early, recalculate whether this blob is binary in
2018-03-27 19:54:05 +05:30
# case a blob was initialized as text but the full data isn't
@binary = nil
return if @loaded_all_data
2018-11-18 11:00:15 +05:30
@data = repository.gitaly_blob_client.get_blob(oid: id, limit: -1).data
2017-08-17 22:00:37 +05:30
@loaded_all_data = true
@loaded_size = @data.bytesize
end
def name
encode! @name
end
2017-09-10 17:25:29 +05:30
def path
encode! @path
end
2017-08-17 22:00:37 +05:30
def truncated?
2020-04-08 14:13:33 +05:30
return false unless size && loaded_size
size > loaded_size
2017-08-17 22:00:37 +05:30
end
# Valid LFS object pointer is a text file consisting of
# version
# oid
# size
# see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
def lfs_pointer?
2018-03-17 18:26:18 +05:30
self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
2017-08-17 22:00:37 +05:30
end
def lfs_oid
if has_lfs_version_key?
oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
return oid[1] if oid
end
nil
end
def lfs_size
if has_lfs_version_key?
size = data.match(/(?<=size )([0-9]+)/)
return size[1].to_i if size
end
nil
end
def external_storage
return unless lfs_pointer?
:lfs
end
alias_method :external_size, :lfs_size
private
2020-04-08 14:13:33 +05:30
def record_metric_blob_size
return unless size
self.class.gitlab_blob_size.observe({}, size)
end
def record_metric_truncated(bool)
if bool
self.class.gitlab_blob_truncated_true.increment
else
self.class.gitlab_blob_truncated_false.increment
end
end
2017-08-17 22:00:37 +05:30
def has_lfs_version_key?
2019-02-15 15:39:39 +05:30
!empty? && text_in_repo? && data.start_with?("version https://git-lfs.github.com/spec")
2017-08-17 22:00:37 +05:30
end
end
end
end
2019-05-03 19:53:19 +05:30
Gitlab::Git::Blob.singleton_class.prepend Gitlab::Git::RuggedImpl::Blob::ClassMethods