debian-mirror-gitlab/lib/gitlab/diff/file.rb

493 lines
13 KiB
Ruby
Raw Normal View History

2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
2015-04-26 12:48:37 +05:30
module Gitlab
module Diff
class File
2019-02-15 15:39:39 +05:30
include Gitlab::Utils::StrongMemoize
attr_reader :diff, :repository, :diff_refs, :fallback_diff_refs, :unique_identifier
2017-09-10 17:25:29 +05:30
delegate :new_file?, :deleted_file?, :renamed_file?,
:old_path, :new_path, :a_mode, :b_mode, :mode_changed?,
2018-03-17 18:26:18 +05:30
:submodule?, :expanded?, :too_large?, :collapsed?, :line_count, :has_binary_notice?, to: :diff, prefix: false
2017-09-10 17:25:29 +05:30
# Finding a viewer for a diff file happens based only on extension and whether the
# diff file blobs are binary or text, which means 1 diff file should only be matched by 1 viewer,
# and the order of these viewers doesn't really matter.
#
# However, when the diff file blobs are LFS pointers, we cannot know for sure whether the
# file being pointed to is binary or text. In this case, we match only on
# extension, preferring binary viewers over text ones if both exist, since the
# large files referred to in "Large File Storage" are much more likely to be
# binary than text.
RICH_VIEWERS = [
DiffViewer::Image
].sort_by { |v| v.binary? ? 0 : 1 }.freeze
2019-02-15 15:39:39 +05:30
def initialize(
diff,
repository:,
diff_refs: nil,
fallback_diff_refs: nil,
stats: nil,
unique_identifier: nil)
2015-04-26 12:48:37 +05:30
@diff = diff
2018-12-05 23:21:45 +05:30
@stats = stats
2016-08-24 12:49:21 +05:30
@repository = repository
2016-01-29 22:53:50 +05:30
@diff_refs = diff_refs
2017-09-10 17:25:29 +05:30
@fallback_diff_refs = fallback_diff_refs
2019-02-15 15:39:39 +05:30
@unique_identifier = unique_identifier
2018-12-13 13:39:08 +05:30
@unfolded = false
2018-03-17 18:26:18 +05:30
# Ensure items are collected in the the batch
2018-05-09 12:01:36 +05:30
new_blob_lazy
old_blob_lazy
2021-12-11 22:18:48 +05:30
2022-06-21 17:19:12 +05:30
if use_semantic_ipynb_diff? && !use_renderable_diff?
diff.diff = Gitlab::Diff::CustomDiff.preprocess_before_diff(diff.new_path, old_blob_lazy, new_blob_lazy) || diff.diff
end
end
def use_semantic_ipynb_diff?
2022-07-16 23:28:13 +05:30
strong_memoize(:_use_semantic_ipynb_diff) { Feature.enabled?(:ipynb_semantic_diff, repository.project) }
2022-01-26 12:08:38 +05:30
end
2022-05-07 20:08:51 +05:30
def use_renderable_diff?
2022-07-16 23:28:13 +05:30
strong_memoize(:_renderable_diff_enabled) { Feature.enabled?(:rendered_diffs_viewer, repository.project) }
2022-05-07 20:08:51 +05:30
end
def has_renderable?
rendered&.has_renderable?
2016-01-29 22:53:50 +05:30
end
2018-03-17 18:26:18 +05:30
def position(position_marker, position_type: :text)
2016-08-24 12:49:21 +05:30
return unless diff_refs
2018-03-17 18:26:18 +05:30
data = {
diff_refs: diff_refs,
position_type: position_type.to_s,
2016-08-24 12:49:21 +05:30
old_path: old_path,
2018-03-17 18:26:18 +05:30
new_path: new_path
}
if position_type == :text
data.merge!(text_position_properties(position_marker))
else
data.merge!(image_position_properties(position_marker))
end
Position.new(data)
2016-08-24 12:49:21 +05:30
end
def line_code(line)
return if line.meta?
2018-03-17 18:26:18 +05:30
Gitlab::Git.diff_line_code(file_path, line.new_pos, line.old_pos)
2016-08-24 12:49:21 +05:30
end
def line_for_line_code(code)
diff_lines.find { |line| line_code(line) == code }
end
def line_for_position(pos)
2019-07-07 11:18:12 +05:30
return unless pos.position_type == 'text'
2018-03-17 18:26:18 +05:30
2019-02-15 15:39:39 +05:30
# This method is normally used to find which line the diff was
# commented on, and in this context, it's normally the raw diff persisted
# at `note_diff_files`, which is a fraction of the entire diff
# (it goes from the first line, to the commented line, or
# one line below). Therefore it's more performant to fetch
# from bottom to top instead of the other way around.
diff_lines
.reverse_each
.find { |line| line.old_line == pos.old_line && line.new_line == pos.new_line }
2016-08-24 12:49:21 +05:30
end
def position_for_line_code(code)
line = line_for_line_code(code)
position(line) if line
end
def line_code_for_position(pos)
line = line_for_position(pos)
line_code(line) if line
end
2018-11-08 19:23:39 +05:30
# Returns the raw diff content up to the given line index
def diff_hunk(diff_line)
diff_line_index = diff_line.index
# @@ (match) header is not kept if it's found in the top of the file,
# therefore we should keep an extra line on this scenario.
diff_line_index += 1 unless diff_lines.first.match?
diff_lines.select { |line| line.index <= diff_line_index }.map(&:text).join("\n")
end
2017-09-10 17:25:29 +05:30
def old_sha
diff_refs&.base_sha
end
2016-08-24 12:49:21 +05:30
2017-09-10 17:25:29 +05:30
def new_sha
diff_refs&.head_sha
2016-08-24 12:49:21 +05:30
end
2017-09-10 17:25:29 +05:30
def new_content_sha
return if deleted_file?
return @new_content_sha if defined?(@new_content_sha)
2017-08-17 22:00:37 +05:30
2017-09-10 17:25:29 +05:30
refs = diff_refs || fallback_diff_refs
@new_content_sha = refs&.head_sha
2017-08-17 22:00:37 +05:30
end
2017-09-10 17:25:29 +05:30
def old_content_sha
return if new_file?
return @old_content_sha if defined?(@old_content_sha)
refs = diff_refs || fallback_diff_refs
@old_content_sha = refs&.base_sha
2016-01-29 22:53:50 +05:30
end
2017-09-10 17:25:29 +05:30
def new_blob
2019-07-07 11:18:12 +05:30
strong_memoize(:new_blob) do
new_blob_lazy&.itself
end
2017-09-10 17:25:29 +05:30
end
def old_blob
2019-07-07 11:18:12 +05:30
strong_memoize(:old_blob) do
old_blob_lazy&.itself
end
2017-09-10 17:25:29 +05:30
end
2019-02-15 15:39:39 +05:30
def new_blob_lines_between(from_line, to_line)
return [] unless new_blob
from_index = from_line - 1
to_index = to_line - 1
new_blob.load_all_data!
new_blob.data.lines[from_index..to_index]
end
2017-09-10 17:25:29 +05:30
def content_sha
new_content_sha || old_content_sha
end
def blob
2018-05-09 12:01:36 +05:30
new_blob || old_blob
2015-04-26 12:48:37 +05:30
end
2019-07-07 11:18:12 +05:30
def highlighted_diff_lines=(value)
clear_memoization(:diff_lines_for_serializer)
@highlighted_diff_lines = value
end
2016-09-13 17:45:13 +05:30
2016-08-24 12:49:21 +05:30
# Array of Gitlab::Diff::Line objects
2015-04-26 12:48:37 +05:30
def diff_lines
2018-11-08 19:23:39 +05:30
@diff_lines ||=
Gitlab::Diff::Parser.new.parse(raw_diff.each_line, diff_file: self).to_a
2015-04-26 12:48:37 +05:30
end
2018-12-13 13:39:08 +05:30
# Changes diff_lines according to the given position. That is,
# it checks whether the position requires blob lines into the diff
# in order to be presented.
def unfold_diff_lines(position)
return unless position
unfolder = Gitlab::Diff::LinesUnfolder.new(self, position)
if unfolder.unfold_required?
@diff_lines = unfolder.unfolded_diff_lines
@unfolded = true
end
end
def unfolded?
@unfolded
end
2019-02-15 15:39:39 +05:30
def highlight_loaded?
@highlighted_diff_lines.present?
end
2016-01-29 22:53:50 +05:30
def highlighted_diff_lines
2018-11-08 19:23:39 +05:30
@highlighted_diff_lines ||=
Gitlab::Diff::Highlight.new(self, repository: self.repository).highlight
2016-01-29 22:53:50 +05:30
end
2021-11-18 22:05:49 +05:30
# Array[<Hash>] with right/left keys that contains Gitlab::Diff::Line objects which text is highlighted
2016-01-29 22:53:50 +05:30
def parallel_diff_lines
2016-08-24 12:49:21 +05:30
@parallel_diff_lines ||= Gitlab::Diff::ParallelDiff.new(self).parallelize
2016-01-29 22:53:50 +05:30
end
2015-04-26 12:48:37 +05:30
def raw_diff
diff.diff.to_s
end
def next_line(index)
diff_lines[index + 1]
end
def prev_line(index)
2016-08-24 12:49:21 +05:30
diff_lines[index - 1] if index > 0
end
def paths
[old_path, new_path].compact
2015-04-26 12:48:37 +05:30
end
def file_path
2016-08-24 12:49:21 +05:30
new_path.presence || old_path
2015-04-26 12:48:37 +05:30
end
2015-10-24 18:46:33 +05:30
2020-06-23 00:09:42 +05:30
def file_hash
Digest::SHA1.hexdigest(file_path)
end
2015-10-24 18:46:33 +05:30
def added_lines
2020-07-28 23:09:34 +05:30
strong_memoize(:added_lines) do
@stats&.additions || diff_lines.count(&:added?)
end
2015-10-24 18:46:33 +05:30
end
def removed_lines
2020-07-28 23:09:34 +05:30
strong_memoize(:removed_lines) do
@stats&.deletions || diff_lines.count(&:removed?)
end
2015-10-24 18:46:33 +05:30
end
2016-08-24 12:49:21 +05:30
2017-09-10 17:25:29 +05:30
def file_identifier
"#{file_path}-#{new_file?}-#{deleted_file?}-#{renamed_file?}"
end
2016-08-24 12:49:21 +05:30
2020-06-23 00:09:42 +05:30
def file_identifier_hash
Digest::SHA1.hexdigest(file_identifier)
end
2017-09-10 17:25:29 +05:30
def diffable?
2021-07-02 01:05:55 +05:30
diffable_by_attribute? && !text_with_binary_notice?
2016-08-24 12:49:21 +05:30
end
2019-02-15 15:39:39 +05:30
def binary_in_repo?
has_binary_notice? || try_blobs(:binary_in_repo?)
2017-09-10 17:25:29 +05:30
end
2016-08-24 12:49:21 +05:30
2019-02-15 15:39:39 +05:30
def text_in_repo?
!binary_in_repo?
2016-08-24 12:49:21 +05:30
end
2016-11-03 12:29:30 +05:30
2017-09-10 17:25:29 +05:30
def external_storage_error?
2018-03-17 18:26:18 +05:30
try_blobs(:external_storage_error?)
2017-09-10 17:25:29 +05:30
end
def stored_externally?
2018-03-17 18:26:18 +05:30
try_blobs(:stored_externally?)
2017-09-10 17:25:29 +05:30
end
def external_storage
2018-03-17 18:26:18 +05:30
try_blobs(:external_storage)
2017-09-10 17:25:29 +05:30
end
def content_changed?
return blobs_changed? if diff_refs
return false if new_file? || deleted_file? || renamed_file?
text? && diff_lines.any?
end
def different_type?
old_blob && new_blob && old_blob.binary? != new_blob.binary?
end
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2017-09-10 17:25:29 +05:30
def size
2022-05-07 20:08:51 +05:30
valid_blobs.sum(&:size)
2017-09-10 17:25:29 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2017-09-10 17:25:29 +05:30
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2017-09-10 17:25:29 +05:30
def raw_size
2022-05-07 20:08:51 +05:30
valid_blobs.sum(&:raw_size)
2017-09-10 17:25:29 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2017-09-10 17:25:29 +05:30
2019-02-15 15:39:39 +05:30
def empty?
valid_blobs.map(&:empty?).all?
end
def binary?
strong_memoize(:is_binary) do
try_blobs(:binary?)
end
2017-09-10 17:25:29 +05:30
end
2019-02-15 15:39:39 +05:30
def text?
strong_memoize(:is_text) do
!binary? && !different_type?
end
2017-09-10 17:25:29 +05:30
end
2019-07-07 11:18:12 +05:30
def viewer
rich_viewer || simple_viewer
end
2017-09-10 17:25:29 +05:30
def simple_viewer
@simple_viewer ||= simple_viewer_class.new(self)
end
def rich_viewer
return @rich_viewer if defined?(@rich_viewer)
@rich_viewer = rich_viewer_class&.new(self)
end
2020-05-24 23:13:21 +05:30
def alternate_viewer
alternate_viewer_class&.new(self)
end
2017-09-10 17:25:29 +05:30
def rendered_as_text?(ignore_errors: true)
simple_viewer.is_a?(DiffViewer::Text) && (ignore_errors || simple_viewer.render_error.nil?)
end
2018-11-08 19:23:39 +05:30
# This adds the bottom match line to the array if needed. It contains
# the data to load more context lines.
def diff_lines_for_serializer
2019-07-07 11:18:12 +05:30
strong_memoize(:diff_lines_for_serializer) do
lines = highlighted_diff_lines
2018-11-08 19:23:39 +05:30
2019-07-07 11:18:12 +05:30
next if lines.empty?
next if blob.nil?
2018-11-08 19:23:39 +05:30
2019-07-07 11:18:12 +05:30
last_line = lines.last
2018-11-08 19:23:39 +05:30
2019-07-07 11:18:12 +05:30
if last_line.new_pos < total_blob_lines(blob) && !deleted_file?
match_line = Gitlab::Diff::Line.new("", 'match', nil, last_line.old_pos, last_line.new_pos)
lines.push(match_line)
end
lines
2018-11-08 19:23:39 +05:30
end
2019-07-07 11:18:12 +05:30
end
def fully_expanded?
return true if binary?
lines = diff_lines_for_serializer
return true if lines.nil?
2018-11-08 19:23:39 +05:30
2019-07-07 11:18:12 +05:30
lines.none? { |line| line.type.to_s == 'match' }
2018-11-08 19:23:39 +05:30
end
2022-05-07 20:08:51 +05:30
def rendered
2022-06-21 17:19:12 +05:30
return unless use_semantic_ipynb_diff? && use_renderable_diff? && ipynb? && modified_file? && !too_large?
2022-05-07 20:08:51 +05:30
strong_memoize(:rendered) { Rendered::Notebook::DiffFile.new(self) }
end
2022-07-16 23:28:13 +05:30
def ipynb?
file_path.ends_with?('.ipynb')
end
2017-09-10 17:25:29 +05:30
private
2021-07-02 01:05:55 +05:30
def diffable_by_attribute?
2022-05-07 20:08:51 +05:30
repository.attributes(file_path).fetch('diff', true)
2021-07-02 01:05:55 +05:30
end
# NOTE: Files with unsupported encodings (e.g. UTF-16) are treated as binary by git, but they are recognized as text files during encoding detection. These files have `Binary files a/filename and b/filename differ' as their raw diff content which cannot be used. We need to handle this special case and avoid displaying incorrect diff.
def text_with_binary_notice?
text? && has_binary_notice?
end
2020-03-13 15:44:24 +05:30
def fetch_blob(sha, path)
return unless sha
2020-05-24 23:13:21 +05:30
Blob.lazy(repository, sha, path)
2020-03-13 15:44:24 +05:30
end
2018-11-08 19:23:39 +05:30
def total_blob_lines(blob)
@total_lines ||= begin
line_count = blob.lines.size
line_count -= 1 if line_count > 0 && blob.lines.last.blank?
line_count
end
end
2020-03-13 15:44:24 +05:30
def modified_file?
new_file? || deleted_file? || content_changed?
end
2018-05-09 12:01:36 +05:30
# We can't use Object#try because Blob doesn't inherit from Object, but
# from BasicObject (via SimpleDelegator).
2018-03-17 18:26:18 +05:30
def try_blobs(meth)
2018-05-09 12:01:36 +05:30
old_blob&.public_send(meth) || new_blob&.public_send(meth)
2018-03-17 18:26:18 +05:30
end
def valid_blobs
2018-05-09 12:01:36 +05:30
[old_blob, new_blob].compact
2018-03-17 18:26:18 +05:30
end
def text_position_properties(line)
{ old_line: line.old_line, new_line: line.new_line }
end
def image_position_properties(image_point)
image_point.to_h
end
2017-09-10 17:25:29 +05:30
def blobs_changed?
old_blob && new_blob && old_blob.id != new_blob.id
end
2018-05-09 12:01:36 +05:30
def new_blob_lazy
2020-03-13 15:44:24 +05:30
fetch_blob(new_content_sha, file_path)
2018-05-09 12:01:36 +05:30
end
def old_blob_lazy
2020-03-13 15:44:24 +05:30
fetch_blob(old_content_sha, old_path)
2018-05-09 12:01:36 +05:30
end
2017-09-10 17:25:29 +05:30
def simple_viewer_class
2020-03-13 15:44:24 +05:30
return DiffViewer::Collapsed if collapsed?
2017-09-10 17:25:29 +05:30
return DiffViewer::NotDiffable unless diffable?
2020-03-13 15:44:24 +05:30
return DiffViewer::Text if modified_file? && text?
return DiffViewer::NoPreview if content_changed?
return DiffViewer::Added if new_file?
return DiffViewer::Deleted if deleted_file?
return DiffViewer::Renamed if renamed_file?
return DiffViewer::ModeChanged if mode_changed?
2017-09-10 17:25:29 +05:30
2020-03-13 15:44:24 +05:30
DiffViewer::NoPreview
2017-09-10 17:25:29 +05:30
end
def rich_viewer_class
viewer_class_from(RICH_VIEWERS)
end
def viewer_class_from(classes)
2020-03-13 15:44:24 +05:30
return if collapsed?
2017-09-10 17:25:29 +05:30
return unless diffable?
2020-03-13 15:44:24 +05:30
return unless modified_file?
2020-05-24 23:13:21 +05:30
find_renderable_viewer_class(classes)
end
def alternate_viewer_class
2021-09-30 23:02:18 +05:30
return unless viewer.instance_of?(DiffViewer::Renamed)
2020-05-24 23:13:21 +05:30
find_renderable_viewer_class(RICH_VIEWERS) || (DiffViewer::Text if text?)
end
def find_renderable_viewer_class(classes)
2019-12-21 20:55:43 +05:30
return if different_type? || external_storage_error?
2017-09-10 17:25:29 +05:30
verify_binary = !stored_externally?
classes.find { |viewer_class| viewer_class.can_render?(self, verify_binary: verify_binary) }
2016-11-03 12:29:30 +05:30
end
2015-04-26 12:48:37 +05:30
end
end
end