2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
2015-04-26 12:48:37 +05:30
module Gitlab
module Diff
class File
2019-02-15 15:39:39 +05:30
include Gitlab :: Utils :: StrongMemoize
attr_reader :diff , :repository , :diff_refs , :fallback_diff_refs , :unique_identifier
2017-09-10 17:25:29 +05:30
delegate :new_file? , :deleted_file? , :renamed_file? ,
:old_path , :new_path , :a_mode , :b_mode , :mode_changed? ,
2018-03-17 18:26:18 +05:30
:submodule? , :expanded? , :too_large? , :collapsed? , :line_count , :has_binary_notice? , to : :diff , prefix : false
2017-09-10 17:25:29 +05:30
# Finding a viewer for a diff file happens based only on extension and whether the
# diff file blobs are binary or text, which means 1 diff file should only be matched by 1 viewer,
# and the order of these viewers doesn't really matter.
#
# However, when the diff file blobs are LFS pointers, we cannot know for sure whether the
# file being pointed to is binary or text. In this case, we match only on
# extension, preferring binary viewers over text ones if both exist, since the
# large files referred to in "Large File Storage" are much more likely to be
# binary than text.
RICH_VIEWERS = [
DiffViewer :: Image
] . sort_by { | v | v . binary? ? 0 : 1 } . freeze
2019-02-15 15:39:39 +05:30
def initialize (
diff ,
repository : ,
diff_refs : nil ,
fallback_diff_refs : nil ,
stats : nil ,
unique_identifier : nil )
2015-04-26 12:48:37 +05:30
@diff = diff
2018-12-05 23:21:45 +05:30
@stats = stats
2016-08-24 12:49:21 +05:30
@repository = repository
2016-01-29 22:53:50 +05:30
@diff_refs = diff_refs
2017-09-10 17:25:29 +05:30
@fallback_diff_refs = fallback_diff_refs
2019-02-15 15:39:39 +05:30
@unique_identifier = unique_identifier
2018-12-13 13:39:08 +05:30
@unfolded = false
2018-03-17 18:26:18 +05:30
# Ensure items are collected in the the batch
2018-05-09 12:01:36 +05:30
new_blob_lazy
old_blob_lazy
2022-06-21 17:19:12 +05:30
end
def use_semantic_ipynb_diff?
2022-07-16 23:28:13 +05:30
strong_memoize ( :_use_semantic_ipynb_diff ) { Feature . enabled? ( :ipynb_semantic_diff , repository . project ) }
2022-01-26 12:08:38 +05:30
end
2022-05-07 20:08:51 +05:30
def has_renderable?
rendered & . has_renderable?
2016-01-29 22:53:50 +05:30
end
2018-03-17 18:26:18 +05:30
def position ( position_marker , position_type : :text )
2016-08-24 12:49:21 +05:30
return unless diff_refs
2018-03-17 18:26:18 +05:30
data = {
diff_refs : diff_refs ,
position_type : position_type . to_s ,
2016-08-24 12:49:21 +05:30
old_path : old_path ,
2018-03-17 18:26:18 +05:30
new_path : new_path
}
if position_type == :text
data . merge! ( text_position_properties ( position_marker ) )
else
data . merge! ( image_position_properties ( position_marker ) )
end
Position . new ( data )
2016-08-24 12:49:21 +05:30
end
def line_code ( line )
return if line . meta?
2018-03-17 18:26:18 +05:30
Gitlab :: Git . diff_line_code ( file_path , line . new_pos , line . old_pos )
2016-08-24 12:49:21 +05:30
end
def line_for_line_code ( code )
diff_lines . find { | line | line_code ( line ) == code }
end
def line_for_position ( pos )
2019-07-07 11:18:12 +05:30
return unless pos . position_type == 'text'
2018-03-17 18:26:18 +05:30
2019-02-15 15:39:39 +05:30
# This method is normally used to find which line the diff was
# commented on, and in this context, it's normally the raw diff persisted
# at `note_diff_files`, which is a fraction of the entire diff
# (it goes from the first line, to the commented line, or
# one line below). Therefore it's more performant to fetch
# from bottom to top instead of the other way around.
diff_lines
. reverse_each
. find { | line | line . old_line == pos . old_line && line . new_line == pos . new_line }
2016-08-24 12:49:21 +05:30
end
def position_for_line_code ( code )
line = line_for_line_code ( code )
position ( line ) if line
end
def line_code_for_position ( pos )
line = line_for_position ( pos )
line_code ( line ) if line
end
2018-11-08 19:23:39 +05:30
# Returns the raw diff content up to the given line index
def diff_hunk ( diff_line )
diff_line_index = diff_line . index
# @@ (match) header is not kept if it's found in the top of the file,
# therefore we should keep an extra line on this scenario.
diff_line_index += 1 unless diff_lines . first . match?
diff_lines . select { | line | line . index < = diff_line_index } . map ( & :text ) . join ( " \n " )
end
2017-09-10 17:25:29 +05:30
def old_sha
diff_refs & . base_sha
end
2016-08-24 12:49:21 +05:30
2017-09-10 17:25:29 +05:30
def new_sha
diff_refs & . head_sha
2016-08-24 12:49:21 +05:30
end
2017-09-10 17:25:29 +05:30
def new_content_sha
return if deleted_file?
return @new_content_sha if defined? ( @new_content_sha )
2017-08-17 22:00:37 +05:30
2017-09-10 17:25:29 +05:30
refs = diff_refs || fallback_diff_refs
@new_content_sha = refs & . head_sha
2017-08-17 22:00:37 +05:30
end
2017-09-10 17:25:29 +05:30
def old_content_sha
return if new_file?
return @old_content_sha if defined? ( @old_content_sha )
refs = diff_refs || fallback_diff_refs
@old_content_sha = refs & . base_sha
2016-01-29 22:53:50 +05:30
end
2017-09-10 17:25:29 +05:30
def new_blob
2019-07-07 11:18:12 +05:30
strong_memoize ( :new_blob ) do
new_blob_lazy & . itself
end
2017-09-10 17:25:29 +05:30
end
def old_blob
2019-07-07 11:18:12 +05:30
strong_memoize ( :old_blob ) do
old_blob_lazy & . itself
end
2017-09-10 17:25:29 +05:30
end
2019-02-15 15:39:39 +05:30
def new_blob_lines_between ( from_line , to_line )
return [ ] unless new_blob
from_index = from_line - 1
to_index = to_line - 1
new_blob . load_all_data!
new_blob . data . lines [ from_index .. to_index ]
end
2017-09-10 17:25:29 +05:30
def content_sha
new_content_sha || old_content_sha
end
def blob
2018-05-09 12:01:36 +05:30
new_blob || old_blob
2015-04-26 12:48:37 +05:30
end
2019-07-07 11:18:12 +05:30
def highlighted_diff_lines = ( value )
clear_memoization ( :diff_lines_for_serializer )
@highlighted_diff_lines = value
end
2016-09-13 17:45:13 +05:30
2016-08-24 12:49:21 +05:30
# Array of Gitlab::Diff::Line objects
2015-04-26 12:48:37 +05:30
def diff_lines
2018-11-08 19:23:39 +05:30
@diff_lines || =
Gitlab :: Diff :: Parser . new . parse ( raw_diff . each_line , diff_file : self ) . to_a
2015-04-26 12:48:37 +05:30
end
2018-12-13 13:39:08 +05:30
# Changes diff_lines according to the given position. That is,
# it checks whether the position requires blob lines into the diff
# in order to be presented.
def unfold_diff_lines ( position )
return unless position
unfolder = Gitlab :: Diff :: LinesUnfolder . new ( self , position )
if unfolder . unfold_required?
@diff_lines = unfolder . unfolded_diff_lines
@unfolded = true
end
end
def unfolded?
@unfolded
end
2019-02-15 15:39:39 +05:30
def highlight_loaded?
@highlighted_diff_lines . present?
end
2016-01-29 22:53:50 +05:30
def highlighted_diff_lines
2018-11-08 19:23:39 +05:30
@highlighted_diff_lines || =
Gitlab :: Diff :: Highlight . new ( self , repository : self . repository ) . highlight
2016-01-29 22:53:50 +05:30
end
2021-11-18 22:05:49 +05:30
# Array[<Hash>] with right/left keys that contains Gitlab::Diff::Line objects which text is highlighted
2016-01-29 22:53:50 +05:30
def parallel_diff_lines
2016-08-24 12:49:21 +05:30
@parallel_diff_lines || = Gitlab :: Diff :: ParallelDiff . new ( self ) . parallelize
2016-01-29 22:53:50 +05:30
end
2015-04-26 12:48:37 +05:30
def raw_diff
diff . diff . to_s
end
def next_line ( index )
diff_lines [ index + 1 ]
end
def prev_line ( index )
2016-08-24 12:49:21 +05:30
diff_lines [ index - 1 ] if index > 0
end
def paths
[ old_path , new_path ] . compact
2015-04-26 12:48:37 +05:30
end
def file_path
2016-08-24 12:49:21 +05:30
new_path . presence || old_path
2015-04-26 12:48:37 +05:30
end
2015-10-24 18:46:33 +05:30
2020-06-23 00:09:42 +05:30
def file_hash
Digest :: SHA1 . hexdigest ( file_path )
end
2015-10-24 18:46:33 +05:30
def added_lines
2020-07-28 23:09:34 +05:30
strong_memoize ( :added_lines ) do
@stats & . additions || diff_lines . count ( & :added? )
end
2015-10-24 18:46:33 +05:30
end
def removed_lines
2020-07-28 23:09:34 +05:30
strong_memoize ( :removed_lines ) do
@stats & . deletions || diff_lines . count ( & :removed? )
end
2015-10-24 18:46:33 +05:30
end
2016-08-24 12:49:21 +05:30
2017-09-10 17:25:29 +05:30
def file_identifier
" #{ file_path } - #{ new_file? } - #{ deleted_file? } - #{ renamed_file? } "
end
2016-08-24 12:49:21 +05:30
2020-06-23 00:09:42 +05:30
def file_identifier_hash
Digest :: SHA1 . hexdigest ( file_identifier )
end
2017-09-10 17:25:29 +05:30
def diffable?
2021-07-02 01:05:55 +05:30
diffable_by_attribute? && ! text_with_binary_notice?
2016-08-24 12:49:21 +05:30
end
2019-02-15 15:39:39 +05:30
def binary_in_repo?
has_binary_notice? || try_blobs ( :binary_in_repo? )
2017-09-10 17:25:29 +05:30
end
2016-08-24 12:49:21 +05:30
2019-02-15 15:39:39 +05:30
def text_in_repo?
! binary_in_repo?
2016-08-24 12:49:21 +05:30
end
2016-11-03 12:29:30 +05:30
2017-09-10 17:25:29 +05:30
def external_storage_error?
2018-03-17 18:26:18 +05:30
try_blobs ( :external_storage_error? )
2017-09-10 17:25:29 +05:30
end
def stored_externally?
2018-03-17 18:26:18 +05:30
try_blobs ( :stored_externally? )
2017-09-10 17:25:29 +05:30
end
def external_storage
2018-03-17 18:26:18 +05:30
try_blobs ( :external_storage )
2017-09-10 17:25:29 +05:30
end
def content_changed?
return blobs_changed? if diff_refs
return false if new_file? || deleted_file? || renamed_file?
text? && diff_lines . any?
end
def different_type?
old_blob && new_blob && old_blob . binary? != new_blob . binary?
end
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2017-09-10 17:25:29 +05:30
def size
2022-05-07 20:08:51 +05:30
valid_blobs . sum ( & :size )
2017-09-10 17:25:29 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2017-09-10 17:25:29 +05:30
2018-12-05 23:21:45 +05:30
# rubocop: disable CodeReuse/ActiveRecord
2017-09-10 17:25:29 +05:30
def raw_size
2022-05-07 20:08:51 +05:30
valid_blobs . sum ( & :raw_size )
2017-09-10 17:25:29 +05:30
end
2018-12-05 23:21:45 +05:30
# rubocop: enable CodeReuse/ActiveRecord
2017-09-10 17:25:29 +05:30
2019-02-15 15:39:39 +05:30
def empty?
valid_blobs . map ( & :empty? ) . all?
end
def binary?
strong_memoize ( :is_binary ) do
try_blobs ( :binary? )
end
2017-09-10 17:25:29 +05:30
end
2019-02-15 15:39:39 +05:30
def text?
strong_memoize ( :is_text ) do
! binary? && ! different_type?
end
2017-09-10 17:25:29 +05:30
end
2019-07-07 11:18:12 +05:30
def viewer
rich_viewer || simple_viewer
end
2017-09-10 17:25:29 +05:30
def simple_viewer
@simple_viewer || = simple_viewer_class . new ( self )
end
def rich_viewer
return @rich_viewer if defined? ( @rich_viewer )
@rich_viewer = rich_viewer_class & . new ( self )
end
2020-05-24 23:13:21 +05:30
def alternate_viewer
alternate_viewer_class & . new ( self )
end
2017-09-10 17:25:29 +05:30
def rendered_as_text? ( ignore_errors : true )
simple_viewer . is_a? ( DiffViewer :: Text ) && ( ignore_errors || simple_viewer . render_error . nil? )
end
2018-11-08 19:23:39 +05:30
# This adds the bottom match line to the array if needed. It contains
# the data to load more context lines.
def diff_lines_for_serializer
2019-07-07 11:18:12 +05:30
strong_memoize ( :diff_lines_for_serializer ) do
lines = highlighted_diff_lines
2018-11-08 19:23:39 +05:30
2019-07-07 11:18:12 +05:30
next if lines . empty?
next if blob . nil?
2018-11-08 19:23:39 +05:30
2019-07-07 11:18:12 +05:30
last_line = lines . last
2018-11-08 19:23:39 +05:30
2019-07-07 11:18:12 +05:30
if last_line . new_pos < total_blob_lines ( blob ) && ! deleted_file?
match_line = Gitlab :: Diff :: Line . new ( " " , 'match' , nil , last_line . old_pos , last_line . new_pos )
lines . push ( match_line )
end
lines
2018-11-08 19:23:39 +05:30
end
2019-07-07 11:18:12 +05:30
end
def fully_expanded?
return true if binary?
lines = diff_lines_for_serializer
return true if lines . nil?
2018-11-08 19:23:39 +05:30
2019-07-07 11:18:12 +05:30
lines . none? { | line | line . type . to_s == 'match' }
2018-11-08 19:23:39 +05:30
end
2022-05-07 20:08:51 +05:30
def rendered
2022-08-13 15:12:31 +05:30
return unless use_semantic_ipynb_diff? && ipynb? && modified_file? && ! collapsed? && ! too_large?
2022-05-07 20:08:51 +05:30
strong_memoize ( :rendered ) { Rendered :: Notebook :: DiffFile . new ( self ) }
end
2022-07-16 23:28:13 +05:30
def ipynb?
file_path . ends_with? ( '.ipynb' )
end
2017-09-10 17:25:29 +05:30
private
2021-07-02 01:05:55 +05:30
def diffable_by_attribute?
2022-05-07 20:08:51 +05:30
repository . attributes ( file_path ) . fetch ( 'diff' , true )
2021-07-02 01:05:55 +05:30
end
# NOTE: Files with unsupported encodings (e.g. UTF-16) are treated as binary by git, but they are recognized as text files during encoding detection. These files have `Binary files a/filename and b/filename differ' as their raw diff content which cannot be used. We need to handle this special case and avoid displaying incorrect diff.
def text_with_binary_notice?
text? && has_binary_notice?
end
2020-03-13 15:44:24 +05:30
def fetch_blob ( sha , path )
return unless sha
2020-05-24 23:13:21 +05:30
Blob . lazy ( repository , sha , path )
2020-03-13 15:44:24 +05:30
end
2018-11-08 19:23:39 +05:30
def total_blob_lines ( blob )
@total_lines || = begin
line_count = blob . lines . size
line_count -= 1 if line_count > 0 && blob . lines . last . blank?
line_count
end
end
2020-03-13 15:44:24 +05:30
def modified_file?
new_file? || deleted_file? || content_changed?
end
2018-05-09 12:01:36 +05:30
# We can't use Object#try because Blob doesn't inherit from Object, but
# from BasicObject (via SimpleDelegator).
2018-03-17 18:26:18 +05:30
def try_blobs ( meth )
2018-05-09 12:01:36 +05:30
old_blob & . public_send ( meth ) || new_blob & . public_send ( meth )
2018-03-17 18:26:18 +05:30
end
def valid_blobs
2018-05-09 12:01:36 +05:30
[ old_blob , new_blob ] . compact
2018-03-17 18:26:18 +05:30
end
def text_position_properties ( line )
{ old_line : line . old_line , new_line : line . new_line }
end
def image_position_properties ( image_point )
image_point . to_h
end
2017-09-10 17:25:29 +05:30
def blobs_changed?
old_blob && new_blob && old_blob . id != new_blob . id
end
2018-05-09 12:01:36 +05:30
def new_blob_lazy
2020-03-13 15:44:24 +05:30
fetch_blob ( new_content_sha , file_path )
2018-05-09 12:01:36 +05:30
end
def old_blob_lazy
2020-03-13 15:44:24 +05:30
fetch_blob ( old_content_sha , old_path )
2018-05-09 12:01:36 +05:30
end
2017-09-10 17:25:29 +05:30
def simple_viewer_class
2020-03-13 15:44:24 +05:30
return DiffViewer :: Collapsed if collapsed?
2017-09-10 17:25:29 +05:30
return DiffViewer :: NotDiffable unless diffable?
2020-03-13 15:44:24 +05:30
return DiffViewer :: Text if modified_file? && text?
return DiffViewer :: NoPreview if content_changed?
return DiffViewer :: Added if new_file?
return DiffViewer :: Deleted if deleted_file?
return DiffViewer :: Renamed if renamed_file?
return DiffViewer :: ModeChanged if mode_changed?
2017-09-10 17:25:29 +05:30
2020-03-13 15:44:24 +05:30
DiffViewer :: NoPreview
2017-09-10 17:25:29 +05:30
end
def rich_viewer_class
viewer_class_from ( RICH_VIEWERS )
end
def viewer_class_from ( classes )
2020-03-13 15:44:24 +05:30
return if collapsed?
2017-09-10 17:25:29 +05:30
return unless diffable?
2020-03-13 15:44:24 +05:30
return unless modified_file?
2020-05-24 23:13:21 +05:30
find_renderable_viewer_class ( classes )
end
def alternate_viewer_class
2021-09-30 23:02:18 +05:30
return unless viewer . instance_of? ( DiffViewer :: Renamed )
2020-05-24 23:13:21 +05:30
find_renderable_viewer_class ( RICH_VIEWERS ) || ( DiffViewer :: Text if text? )
end
def find_renderable_viewer_class ( classes )
2019-12-21 20:55:43 +05:30
return if different_type? || external_storage_error?
2017-09-10 17:25:29 +05:30
verify_binary = ! stored_externally?
classes . find { | viewer_class | viewer_class . can_render? ( self , verify_binary : verify_binary ) }
2016-11-03 12:29:30 +05:30
end
2015-04-26 12:48:37 +05:30
end
end
end