debian-mirror-gitlab/lib/gitlab/search/found_blob.rb

# frozen_string_literal: true

module Gitlab
  module Search
    class FoundBlob
      include EncodingHelper
      include Presentable
      include BlobLanguageFromGitAttributes
      include Gitlab::Utils::StrongMemoize
      include BlobActiveModel

      attr_reader :project, :content_match, :blob_path, :highlight_line, :matched_lines_count

      PATH_REGEXP = /\A(?<ref>[^:]*):(?<path>[^\x00]*)\x00/.freeze
      CONTENT_REGEXP = /^(?<ref>[^:]*):(?<path>[^\x00]*)\x00(?<startline>\d+)\x00/.freeze

      def self.preload_blobs(blobs)
        to_fetch = blobs.select { |blob| blob.is_a?(self) && blob.blob_path }

        to_fetch.each { |blob| blob.fetch_blob }
      end

      def initialize(opts = {})
        @id = opts.fetch(:id, nil)
        @binary_path = opts.fetch(:path, nil)
        @binary_basename = opts.fetch(:basename, nil)
        @ref = opts.fetch(:ref, nil)
        @matched_lines_count = opts.fetch(:matched_lines_count, nil)
        @startline = opts.fetch(:startline, nil)
        @highlight_line = opts.fetch(:highlight_line, nil)
        @binary_data = opts.fetch(:data, nil)
        @per_page = opts.fetch(:per_page, 20)
        @project = opts.fetch(:project, nil)
        # Some callers (e.g. Elasticsearch) do not have the Project object,
        # yet they can trigger many calls in one go,
        # causing duplicated queries.
        # Allow those to just pass project_id instead.
        @project_id = opts.fetch(:project_id, nil)
        @content_match = opts.fetch(:content_match, nil)
        @blob_path = opts.fetch(:blob_path, nil)
        @repository = opts.fetch(:repository, nil)
      end

      def id
        @id ||= parsed_content[:id]
      end

      def ref
        @ref ||= parsed_content[:ref]
      end

      def startline
        @startline ||= parsed_content[:startline]
      end

      # binary_path is used for running filters on all matches.
      # For grepped results (which use content_match), we get
      # the path from the beginning of the grepped result which is faster
      # than parsing the whole snippet
      def binary_path
        @binary_path ||= content_match ? search_result_path : parsed_content[:binary_path]
      end

      def path
        @path ||= encode_utf8(@binary_path || parsed_content[:binary_path])
      end

      def basename
        @basename ||= encode_utf8(@binary_basename || parsed_content[:binary_basename])
      end

      def data
        @data ||= encode_utf8(@binary_data || parsed_content[:binary_data])
      end

      def project_id
        @project_id || @project&.id
      end

      def present
        super(presenter_class: BlobPresenter)
      end

      def binary?
        false
      end

      def fetch_blob
        path = [ref, blob_path]
        missing_blob = { binary_path: blob_path }

        BatchLoader.for(path).batch(default_value: missing_blob) do |refs, loader|
          Gitlab::Git::Blob.batch(repository, refs, blob_size_limit: 1024).each do |blob|
            # if the blob couldn't be fetched for some reason,
            # show at least the blob path
            data = {
              id: blob.id,
              binary_path: blob.path,
              binary_basename: path_without_extension(blob.path),
              ref: ref,
              startline: 1,
              binary_data: blob.data,
              project: project
            }

            loader.call([ref, blob.path], data)
          end
        end
      end

      private

      def search_result_path
        content_match.match(PATH_REGEXP) { |matches| matches[:path] }
      end

      def path_without_extension(path)
        Pathname.new(path).sub_ext('').to_s
      end

      def parsed_content
        strong_memoize(:parsed_content) do
          if content_match
            parse_search_result
          elsif blob_path
            fetch_blob
          else
            {}
          end
        end
      end

      def parse_search_result
        ref = nil
        path = nil
        basename = nil

        data = []
        startline = 0

        content_match.each_line.each_with_index do |line, index|
          prefix ||= line.match(CONTENT_REGEXP)&.tap do |matches|
            ref = matches[:ref]
            path = matches[:path]
            startline = matches[:startline]
            startline = startline.to_i - index
            basename = path_without_extension(path)
          end

          data << line.sub(prefix.to_s, '')
        end

        {
          binary_path: path,
          binary_basename: basename,
          ref: ref,
          startline: startline,
          binary_data: data.join,
          project: project
        }
      end

      def repository
        @repository ||= project&.repository
      end
    end
  end
end
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`# frozen_string_literal: true`

			`module Gitlab`
			`module Search`
			`class FoundBlob`
			`include EncodingHelper`
			`include Presentable`
			`include BlobLanguageFromGitAttributes`
			`include Gitlab::Utils::StrongMemoize`
New upstream version 12.8.6 2020-03-13 15:44:24 +05:30			`include BlobActiveModel`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30
New upstream version 15.9.2+ds1 2023-04-23 21:23:45 +05:30			`attr_reader :project, :content_match, :blob_path, :highlight_line, :matched_lines_count`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`PATH_REGEXP = /\A(?<ref>[^:]):(?<path>[^\x00])\x00/.freeze`
			`CONTENT_REGEXP = /^(?<ref>[^:]):(?<path>[^\x00])\x00(?<startline>\d+)\x00/.freeze`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30
			`def self.preload_blobs(blobs)`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`to_fetch = blobs.select { \|blob\| blob.is_a?(self) && blob.blob_path }`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30
			`to_fetch.each { \|blob\| blob.fetch_blob }`
			`end`

			`def initialize(opts = {})`
			`@id = opts.fetch(:id, nil)`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`@binary_path = opts.fetch(:path, nil)`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`@binary_basename = opts.fetch(:basename, nil)`
			`@ref = opts.fetch(:ref, nil)`
New upstream version 15.9.2+ds1 2023-04-23 21:23:45 +05:30			`@matched_lines_count = opts.fetch(:matched_lines_count, nil)`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`@startline = opts.fetch(:startline, nil)`
New upstream version 13.6.5 2021-01-29 00:20:46 +05:30			`@highlight_line = opts.fetch(:highlight_line, nil)`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`@binary_data = opts.fetch(:data, nil)`
			`@per_page = opts.fetch(:per_page, 20)`
			`@project = opts.fetch(:project, nil)`
New upstream version 12.1.11 2019-09-30 21:07:59 +05:30			`# Some callers (e.g. Elasticsearch) do not have the Project object,`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`# yet they can trigger many calls in one go,`
			`# causing duplicated queries.`
			`# Allow those to just pass project_id instead.`
			`@project_id = opts.fetch(:project_id, nil)`
			`@content_match = opts.fetch(:content_match, nil)`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`@blob_path = opts.fetch(:blob_path, nil)`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`@repository = opts.fetch(:repository, nil)`
			`end`

			`def id`
			`@id \|\|= parsed_content[:id]`
			`end`

			`def ref`
			`@ref \|\|= parsed_content[:ref]`
			`end`

			`def startline`
			`@startline \|\|= parsed_content[:startline]`
			`end`

New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`# binary_path is used for running filters on all matches.`
			`# For grepped results (which use content_match), we get`
			`# the path from the beginning of the grepped result which is faster`
			`# than parsing the whole snippet`
			`def binary_path`
			`@binary_path \|\|= content_match ? search_result_path : parsed_content[:binary_path]`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`end`

New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`def path`
			`@path \|\|= encode_utf8(@binary_path \|\| parsed_content[:binary_path])`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`end`

			`def basename`
			`@basename \|\|= encode_utf8(@binary_basename \|\| parsed_content[:binary_basename])`
			`end`

			`def data`
			`@data \|\|= encode_utf8(@binary_data \|\| parsed_content[:binary_data])`
			`end`

			`def project_id`
			`@project_id \|\| @project&.id`
			`end`

			`def present`
			`super(presenter_class: BlobPresenter)`
			`end`

New upstream version 14.8.5+ds1 2022-04-04 11:22:00 +05:30			`def binary?`
			`false`
			`end`

New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`def fetch_blob`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`path = [ref, blob_path]`
			`missing_blob = { binary_path: blob_path }`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30
			`BatchLoader.for(path).batch(default_value: missing_blob) do \|refs, loader\|`
			`Gitlab::Git::Blob.batch(repository, refs, blob_size_limit: 1024).each do \|blob\|`
			`# if the blob couldn't be fetched for some reason,`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`# show at least the blob path`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`data = {`
			`id: blob.id,`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`binary_path: blob.path,`
New upstream version 12.1.11 2019-09-30 21:07:59 +05:30			`binary_basename: path_without_extension(blob.path),`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`ref: ref,`
			`startline: 1,`
			`binary_data: blob.data,`
			`project: project`
			`}`

			`loader.call([ref, blob.path], data)`
			`end`
			`end`
			`end`

			`private`

New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`def search_result_path`
			`content_match.match(PATH_REGEXP) { \|matches\| matches[:path] }`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`end`

New upstream version 12.1.11 2019-09-30 21:07:59 +05:30			`def path_without_extension(path)`
			`Pathname.new(path).sub_ext('').to_s`
			`end`

New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`def parsed_content`
			`strong_memoize(:parsed_content) do`
			`if content_match`
			`parse_search_result`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`elsif blob_path`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`fetch_blob`
			`else`
			`{}`
			`end`
			`end`
			`end`

			`def parse_search_result`
			`ref = nil`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`path = nil`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`basename = nil`

			`data = []`
			`startline = 0`

			`content_match.each_line.each_with_index do \|line, index\|`
			`prefix \|\|= line.match(CONTENT_REGEXP)&.tap do \|matches\|`
			`ref = matches[:ref]`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`path = matches[:path]`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`startline = matches[:startline]`
			`startline = startline.to_i - index`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`basename = path_without_extension(path)`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`end`

			`data << line.sub(prefix.to_s, '')`
			`end`

			`{`
New upstream version 12.5.4 2019-12-26 22:10:19 +05:30			`binary_path: path,`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`binary_basename: basename,`
			`ref: ref,`
			`startline: startline,`
			`binary_data: data.join,`
			`project: project`
			`}`
			`end`

			`def repository`
New upstream version 12.9.2 2020-04-08 14:13:33 +05:30			`@repository \|\|= project&.repository`
New upstream version 11.7.5 2019-02-15 15:39:39 +05:30			`end`
			`end`
			`end`
			`end`