# frozen_string_literal: true module Gitlab module Search class FoundBlob include EncodingHelper include Presentable include BlobLanguageFromGitAttributes include Gitlab::Utils::StrongMemoize include BlobActiveModel attr_reader :project, :content_match, :blob_path, :highlight_line, :matched_lines_count PATH_REGEXP = /\A(?[^:]*):(?[^\x00]*)\x00/.freeze CONTENT_REGEXP = /^(?[^:]*):(?[^\x00]*)\x00(?\d+)\x00/.freeze def self.preload_blobs(blobs) to_fetch = blobs.select { |blob| blob.is_a?(self) && blob.blob_path } to_fetch.each { |blob| blob.fetch_blob } end def initialize(opts = {}) @id = opts.fetch(:id, nil) @binary_path = opts.fetch(:path, nil) @binary_basename = opts.fetch(:basename, nil) @ref = opts.fetch(:ref, nil) @matched_lines_count = opts.fetch(:matched_lines_count, nil) @startline = opts.fetch(:startline, nil) @highlight_line = opts.fetch(:highlight_line, nil) @binary_data = opts.fetch(:data, nil) @per_page = opts.fetch(:per_page, 20) @project = opts.fetch(:project, nil) # Some callers (e.g. Elasticsearch) do not have the Project object, # yet they can trigger many calls in one go, # causing duplicated queries. # Allow those to just pass project_id instead. @project_id = opts.fetch(:project_id, nil) @content_match = opts.fetch(:content_match, nil) @blob_path = opts.fetch(:blob_path, nil) @repository = opts.fetch(:repository, nil) end def id @id ||= parsed_content[:id] end def ref @ref ||= parsed_content[:ref] end def startline @startline ||= parsed_content[:startline] end # binary_path is used for running filters on all matches. # For grepped results (which use content_match), we get # the path from the beginning of the grepped result which is faster # than parsing the whole snippet def binary_path @binary_path ||= content_match ? search_result_path : parsed_content[:binary_path] end def path @path ||= encode_utf8(@binary_path || parsed_content[:binary_path]) end def basename @basename ||= encode_utf8(@binary_basename || parsed_content[:binary_basename]) end def data @data ||= encode_utf8(@binary_data || parsed_content[:binary_data]) end def project_id @project_id || @project&.id end def present super(presenter_class: BlobPresenter) end def binary? false end def fetch_blob path = [ref, blob_path] missing_blob = { binary_path: blob_path } BatchLoader.for(path).batch(default_value: missing_blob) do |refs, loader| Gitlab::Git::Blob.batch(repository, refs, blob_size_limit: 1024).each do |blob| # if the blob couldn't be fetched for some reason, # show at least the blob path data = { id: blob.id, binary_path: blob.path, binary_basename: path_without_extension(blob.path), ref: ref, startline: 1, binary_data: blob.data, project: project } loader.call([ref, blob.path], data) end end end private def search_result_path content_match.match(PATH_REGEXP) { |matches| matches[:path] } end def path_without_extension(path) Pathname.new(path).sub_ext('').to_s end def parsed_content strong_memoize(:parsed_content) do if content_match parse_search_result elsif blob_path fetch_blob else {} end end end def parse_search_result ref = nil path = nil basename = nil data = [] startline = 0 content_match.each_line.each_with_index do |line, index| prefix ||= line.match(CONTENT_REGEXP)&.tap do |matches| ref = matches[:ref] path = matches[:path] startline = matches[:startline] startline = startline.to_i - index basename = path_without_extension(path) end data << line.sub(prefix.to_s, '') end { binary_path: path, binary_basename: basename, ref: ref, startline: startline, binary_data: data.join, project: project } end def repository @repository ||= project&.repository end end end end