2019-02-15 15:39:39 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module Search
|
|
|
|
class FoundBlob
|
|
|
|
include EncodingHelper
|
|
|
|
include Presentable
|
|
|
|
include BlobLanguageFromGitAttributes
|
|
|
|
include Gitlab::Utils::StrongMemoize
|
2020-03-09 13:42:32 +05:30
|
|
|
include BlobActiveModel
|
2019-02-15 15:39:39 +05:30
|
|
|
|
2019-12-26 22:10:19 +05:30
|
|
|
attr_reader :project, :content_match, :blob_path
|
2019-02-15 15:39:39 +05:30
|
|
|
|
2019-12-26 22:10:19 +05:30
|
|
|
PATH_REGEXP = /\A(?<ref>[^:]*):(?<path>[^\x00]*)\x00/.freeze
|
|
|
|
CONTENT_REGEXP = /^(?<ref>[^:]*):(?<path>[^\x00]*)\x00(?<startline>\d+)\x00/.freeze
|
2019-02-15 15:39:39 +05:30
|
|
|
|
|
|
|
def self.preload_blobs(blobs)
|
2019-12-26 22:10:19 +05:30
|
|
|
to_fetch = blobs.select { |blob| blob.is_a?(self) && blob.blob_path }
|
2019-02-15 15:39:39 +05:30
|
|
|
|
|
|
|
to_fetch.each { |blob| blob.fetch_blob }
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(opts = {})
|
|
|
|
@id = opts.fetch(:id, nil)
|
2019-12-26 22:10:19 +05:30
|
|
|
@binary_path = opts.fetch(:path, nil)
|
2019-02-15 15:39:39 +05:30
|
|
|
@binary_basename = opts.fetch(:basename, nil)
|
|
|
|
@ref = opts.fetch(:ref, nil)
|
|
|
|
@startline = opts.fetch(:startline, nil)
|
|
|
|
@binary_data = opts.fetch(:data, nil)
|
|
|
|
@per_page = opts.fetch(:per_page, 20)
|
|
|
|
@project = opts.fetch(:project, nil)
|
2019-09-30 21:07:59 +05:30
|
|
|
# Some callers (e.g. Elasticsearch) do not have the Project object,
|
2019-02-15 15:39:39 +05:30
|
|
|
# yet they can trigger many calls in one go,
|
|
|
|
# causing duplicated queries.
|
|
|
|
# Allow those to just pass project_id instead.
|
|
|
|
@project_id = opts.fetch(:project_id, nil)
|
|
|
|
@content_match = opts.fetch(:content_match, nil)
|
2019-12-26 22:10:19 +05:30
|
|
|
@blob_path = opts.fetch(:blob_path, nil)
|
2019-02-15 15:39:39 +05:30
|
|
|
@repository = opts.fetch(:repository, nil)
|
|
|
|
end
|
|
|
|
|
|
|
|
def id
|
|
|
|
@id ||= parsed_content[:id]
|
|
|
|
end
|
|
|
|
|
|
|
|
def ref
|
|
|
|
@ref ||= parsed_content[:ref]
|
|
|
|
end
|
|
|
|
|
|
|
|
def startline
|
|
|
|
@startline ||= parsed_content[:startline]
|
|
|
|
end
|
|
|
|
|
2019-12-26 22:10:19 +05:30
|
|
|
# binary_path is used for running filters on all matches.
|
|
|
|
# For grepped results (which use content_match), we get
|
|
|
|
# the path from the beginning of the grepped result which is faster
|
|
|
|
# than parsing the whole snippet
|
|
|
|
def binary_path
|
|
|
|
@binary_path ||= content_match ? search_result_path : parsed_content[:binary_path]
|
2019-02-15 15:39:39 +05:30
|
|
|
end
|
|
|
|
|
2019-12-26 22:10:19 +05:30
|
|
|
def path
|
|
|
|
@path ||= encode_utf8(@binary_path || parsed_content[:binary_path])
|
2019-02-15 15:39:39 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def basename
|
|
|
|
@basename ||= encode_utf8(@binary_basename || parsed_content[:binary_basename])
|
|
|
|
end
|
|
|
|
|
|
|
|
def data
|
|
|
|
@data ||= encode_utf8(@binary_data || parsed_content[:binary_data])
|
|
|
|
end
|
|
|
|
|
|
|
|
def project_id
|
|
|
|
@project_id || @project&.id
|
|
|
|
end
|
|
|
|
|
|
|
|
def present
|
|
|
|
super(presenter_class: BlobPresenter)
|
|
|
|
end
|
|
|
|
|
|
|
|
def fetch_blob
|
2019-12-26 22:10:19 +05:30
|
|
|
path = [ref, blob_path]
|
|
|
|
missing_blob = { binary_path: blob_path }
|
2019-02-15 15:39:39 +05:30
|
|
|
|
|
|
|
BatchLoader.for(path).batch(default_value: missing_blob) do |refs, loader|
|
|
|
|
Gitlab::Git::Blob.batch(repository, refs, blob_size_limit: 1024).each do |blob|
|
|
|
|
# if the blob couldn't be fetched for some reason,
|
2019-12-26 22:10:19 +05:30
|
|
|
# show at least the blob path
|
2019-02-15 15:39:39 +05:30
|
|
|
data = {
|
|
|
|
id: blob.id,
|
2019-12-26 22:10:19 +05:30
|
|
|
binary_path: blob.path,
|
2019-09-30 21:07:59 +05:30
|
|
|
binary_basename: path_without_extension(blob.path),
|
2019-02-15 15:39:39 +05:30
|
|
|
ref: ref,
|
|
|
|
startline: 1,
|
|
|
|
binary_data: blob.data,
|
|
|
|
project: project
|
|
|
|
}
|
|
|
|
|
|
|
|
loader.call([ref, blob.path], data)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2019-12-26 22:10:19 +05:30
|
|
|
def search_result_path
|
|
|
|
content_match.match(PATH_REGEXP) { |matches| matches[:path] }
|
2019-02-15 15:39:39 +05:30
|
|
|
end
|
|
|
|
|
2019-09-30 21:07:59 +05:30
|
|
|
def path_without_extension(path)
|
|
|
|
Pathname.new(path).sub_ext('').to_s
|
|
|
|
end
|
|
|
|
|
2019-02-15 15:39:39 +05:30
|
|
|
def parsed_content
|
|
|
|
strong_memoize(:parsed_content) do
|
|
|
|
if content_match
|
|
|
|
parse_search_result
|
2019-12-26 22:10:19 +05:30
|
|
|
elsif blob_path
|
2019-02-15 15:39:39 +05:30
|
|
|
fetch_blob
|
|
|
|
else
|
|
|
|
{}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse_search_result
|
|
|
|
ref = nil
|
2019-12-26 22:10:19 +05:30
|
|
|
path = nil
|
2019-02-15 15:39:39 +05:30
|
|
|
basename = nil
|
|
|
|
|
|
|
|
data = []
|
|
|
|
startline = 0
|
|
|
|
|
|
|
|
content_match.each_line.each_with_index do |line, index|
|
|
|
|
prefix ||= line.match(CONTENT_REGEXP)&.tap do |matches|
|
|
|
|
ref = matches[:ref]
|
2019-12-26 22:10:19 +05:30
|
|
|
path = matches[:path]
|
2019-02-15 15:39:39 +05:30
|
|
|
startline = matches[:startline]
|
|
|
|
startline = startline.to_i - index
|
2019-12-26 22:10:19 +05:30
|
|
|
basename = path_without_extension(path)
|
2019-02-15 15:39:39 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
data << line.sub(prefix.to_s, '')
|
|
|
|
end
|
|
|
|
|
|
|
|
{
|
2019-12-26 22:10:19 +05:30
|
|
|
binary_path: path,
|
2019-02-15 15:39:39 +05:30
|
|
|
binary_basename: basename,
|
|
|
|
ref: ref,
|
|
|
|
startline: startline,
|
|
|
|
binary_data: data.join,
|
|
|
|
project: project
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def repository
|
|
|
|
@repository ||= project.repository
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|