2018-11-18 11:00:15 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2019-07-07 11:18:12 +05:30
|
|
|
class MergeRequestDiff < ApplicationRecord
|
2015-04-26 12:48:37 +05:30
|
|
|
include Sortable
|
2016-06-22 15:30:34 +05:30
|
|
|
include Importable
|
2018-03-17 18:26:18 +05:30
|
|
|
include ManualInverseAssociation
|
2018-11-08 19:23:39 +05:30
|
|
|
include EachBatch
|
2018-12-13 13:39:08 +05:30
|
|
|
include Gitlab::Utils::StrongMemoize
|
2019-03-02 22:35:43 +05:30
|
|
|
include ObjectStorage::BackgroundMove
|
2020-04-08 14:13:33 +05:30
|
|
|
include BulkInsertableAssociations
|
2015-04-26 12:48:37 +05:30
|
|
|
|
2018-03-17 18:26:18 +05:30
|
|
|
# Don't display more than 100 commits at once
|
2016-06-02 11:05:42 +05:30
|
|
|
COMMITS_SAFE_SIZE = 100
|
2020-01-01 13:55:28 +05:30
|
|
|
BATCH_SIZE = 1000
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2019-07-07 11:18:12 +05:30
|
|
|
# Applies to closed or merged MRs when determining whether to migrate their
|
|
|
|
# diffs to external storage
|
|
|
|
EXTERNAL_DIFF_CUTOFF = 7.days.freeze
|
2016-11-03 12:29:30 +05:30
|
|
|
|
2020-11-24 15:15:51 +05:30
|
|
|
# The files_count column is a 2-byte signed integer. Look up the true value
|
|
|
|
# from the database if this sentinel is seen
|
|
|
|
FILES_COUNT_SENTINEL = 2**15 - 1
|
|
|
|
|
2014-09-02 18:07:02 +05:30
|
|
|
belongs_to :merge_request
|
2019-03-02 22:35:43 +05:30
|
|
|
|
2018-03-17 18:26:18 +05:30
|
|
|
manual_inverse_association :merge_request, :merge_request_diff
|
|
|
|
|
2019-03-02 22:35:43 +05:30
|
|
|
has_many :merge_request_diff_files,
|
|
|
|
-> { order(:merge_request_diff_id, :relative_order) },
|
|
|
|
inverse_of: :merge_request_diff
|
|
|
|
|
2017-09-10 17:25:29 +05:30
|
|
|
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2019-03-13 22:55:13 +05:30
|
|
|
validates :base_commit_sha, :head_commit_sha, :start_commit_sha, sha: true
|
2021-03-11 19:13:27 +05:30
|
|
|
validates :merge_request_id, uniqueness: { scope: :diff_type }, if: :merge_head?
|
2019-03-13 22:55:13 +05:30
|
|
|
|
2014-09-02 18:07:02 +05:30
|
|
|
state_machine :state, initial: :empty do
|
2018-11-08 19:23:39 +05:30
|
|
|
event :clean do
|
|
|
|
transition any => :without_files
|
|
|
|
end
|
|
|
|
|
2014-09-02 18:07:02 +05:30
|
|
|
state :collected
|
2016-06-02 11:05:42 +05:30
|
|
|
state :overflow
|
2018-11-08 19:23:39 +05:30
|
|
|
# Diff files have been deleted by the system
|
|
|
|
state :without_files
|
2016-06-02 11:05:42 +05:30
|
|
|
# Deprecated states: these are no longer used but these values may still occur
|
|
|
|
# in the database.
|
2014-09-02 18:07:02 +05:30
|
|
|
state :timeout
|
|
|
|
state :overflow_commits_safe_size
|
|
|
|
state :overflow_diff_files_limit
|
|
|
|
state :overflow_diff_lines_limit
|
|
|
|
end
|
|
|
|
|
2021-03-11 19:13:27 +05:30
|
|
|
enum diff_type: {
|
|
|
|
regular: 1,
|
|
|
|
merge_head: 2
|
|
|
|
}
|
|
|
|
|
2018-11-08 19:23:39 +05:30
|
|
|
scope :with_files, -> { without_states(:without_files, :empty) }
|
2017-08-17 22:00:37 +05:30
|
|
|
scope :viewable, -> { without_state(:empty) }
|
2018-03-17 18:26:18 +05:30
|
|
|
scope :by_commit_sha, ->(sha) do
|
|
|
|
joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
|
|
|
|
end
|
|
|
|
|
2019-07-31 22:56:46 +05:30
|
|
|
scope :by_project_id, -> (project_id) do
|
|
|
|
joins(:merge_request).where(merge_requests: { target_project_id: project_id })
|
|
|
|
end
|
|
|
|
|
2018-03-17 18:26:18 +05:30
|
|
|
scope :recent, -> { order(id: :desc).limit(100) }
|
2020-10-24 23:57:45 +05:30
|
|
|
|
|
|
|
scope :files_in_database, -> do
|
|
|
|
where(stored_externally: [false, nil]).where(arel_table[:files_count].gt(0))
|
|
|
|
end
|
2019-07-07 11:18:12 +05:30
|
|
|
|
|
|
|
scope :not_latest_diffs, -> do
|
|
|
|
merge_requests = MergeRequest.arel_table
|
|
|
|
mr_diffs = arel_table
|
|
|
|
|
|
|
|
join_condition = merge_requests[:id].eq(mr_diffs[:merge_request_id])
|
|
|
|
.and(mr_diffs[:id].not_eq(merge_requests[:latest_merge_request_diff_id]))
|
2021-03-11 19:13:27 +05:30
|
|
|
.and(mr_diffs[:diff_type].eq(diff_types[:regular]))
|
2019-07-07 11:18:12 +05:30
|
|
|
|
|
|
|
arel_join = mr_diffs.join(merge_requests).on(join_condition)
|
|
|
|
joins(arel_join.join_sources)
|
|
|
|
end
|
|
|
|
|
|
|
|
scope :old_merged_diffs, -> (before) do
|
|
|
|
merge_requests = MergeRequest.arel_table
|
|
|
|
mr_metrics = MergeRequest::Metrics.arel_table
|
|
|
|
mr_diffs = arel_table
|
|
|
|
|
|
|
|
mr_join = mr_diffs
|
|
|
|
.join(merge_requests)
|
|
|
|
.on(mr_diffs[:merge_request_id].eq(merge_requests[:id]))
|
|
|
|
|
|
|
|
metrics_join_condition = mr_diffs[:merge_request_id]
|
|
|
|
.eq(mr_metrics[:merge_request_id])
|
|
|
|
.and(mr_metrics[:merged_at].not_eq(nil))
|
|
|
|
|
|
|
|
metrics_join = mr_diffs.join(mr_metrics).on(metrics_join_condition)
|
|
|
|
|
2019-12-21 20:55:43 +05:30
|
|
|
condition = MergeRequest.arel_table[:state_id].eq(MergeRequest.available_states[:merged])
|
2019-07-07 11:18:12 +05:30
|
|
|
.and(MergeRequest::Metrics.arel_table[:merged_at].lteq(before))
|
|
|
|
.and(MergeRequest::Metrics.arel_table[:merged_at].not_eq(nil))
|
|
|
|
|
|
|
|
joins(metrics_join.join_sources, mr_join.join_sources).where(condition)
|
|
|
|
end
|
|
|
|
|
|
|
|
scope :old_closed_diffs, -> (before) do
|
2019-12-21 20:55:43 +05:30
|
|
|
condition = MergeRequest.arel_table[:state_id].eq(MergeRequest.available_states[:closed])
|
2019-07-07 11:18:12 +05:30
|
|
|
.and(MergeRequest::Metrics.arel_table[:latest_closed_at].lteq(before))
|
|
|
|
|
|
|
|
joins(merge_request: :metrics).where(condition)
|
|
|
|
end
|
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
# This scope uses LATERAL JOIN to find the most recent MR diff association for the given merge requests.
|
|
|
|
# To avoid joining the merge_requests table, we build an in memory table using the merge request ids.
|
|
|
|
# Example:
|
|
|
|
# SELECT ...
|
|
|
|
# FROM (VALUES (MR_ID_1),(MR_ID_2)) merge_requests (id)
|
|
|
|
# INNER JOIN LATERAL (...)
|
2021-01-03 14:25:43 +05:30
|
|
|
scope :latest_diff_for_merge_requests, -> (merge_requests) do
|
2021-06-08 01:23:25 +05:30
|
|
|
mrs = Array(merge_requests)
|
|
|
|
return MergeRequestDiff.none if mrs.empty?
|
2021-01-03 14:25:43 +05:30
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
merge_request_table = MergeRequest.arel_table
|
|
|
|
merge_request_diff_table = MergeRequestDiff.arel_table
|
|
|
|
|
|
|
|
join_query = MergeRequestDiff
|
|
|
|
.where(merge_request_table[:id].eq(merge_request_diff_table[:merge_request_id]))
|
|
|
|
.order(created_at: :desc)
|
|
|
|
.limit(1)
|
|
|
|
|
|
|
|
mr_id_list = mrs.map { |mr| "(#{Integer(mr.id)})" }.join(",")
|
|
|
|
|
|
|
|
MergeRequestDiff
|
|
|
|
.from("(VALUES #{mr_id_list}) merge_requests (id)")
|
|
|
|
.joins("INNER JOIN LATERAL (#{join_query.to_sql}) #{MergeRequestDiff.table_name} ON TRUE")
|
2021-01-03 14:25:43 +05:30
|
|
|
.includes(:merge_request_diff_commits)
|
|
|
|
end
|
|
|
|
|
2020-10-24 23:57:45 +05:30
|
|
|
class << self
|
|
|
|
def ids_for_external_storage_migration(limit:)
|
|
|
|
return [] unless Gitlab.config.external_diffs.enabled
|
2019-07-07 11:18:12 +05:30
|
|
|
|
2020-10-24 23:57:45 +05:30
|
|
|
case Gitlab.config.external_diffs.when
|
|
|
|
when 'always'
|
|
|
|
ids_for_external_storage_migration_strategy_always(limit: limit)
|
|
|
|
when 'outdated'
|
|
|
|
ids_for_external_storage_migration_strategy_outdated(limit: limit)
|
|
|
|
else
|
|
|
|
[]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def ids_for_external_storage_migration_strategy_always(limit:)
|
2019-07-07 11:18:12 +05:30
|
|
|
files_in_database.limit(limit).pluck(:id)
|
2020-10-24 23:57:45 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def ids_for_external_storage_migration_strategy_outdated(limit:)
|
2019-07-07 11:18:12 +05:30
|
|
|
# Outdated is too complex to be a single SQL query, so split into three
|
|
|
|
before = EXTERNAL_DIFF_CUTOFF.ago
|
|
|
|
|
|
|
|
ids = files_in_database
|
|
|
|
.old_merged_diffs(before)
|
|
|
|
.limit(limit)
|
|
|
|
.pluck(:id)
|
|
|
|
|
|
|
|
return ids if ids.size >= limit
|
|
|
|
|
|
|
|
ids += files_in_database
|
|
|
|
.old_closed_diffs(before)
|
|
|
|
.limit(limit - ids.size)
|
|
|
|
.pluck(:id)
|
|
|
|
|
|
|
|
return ids if ids.size >= limit
|
|
|
|
|
|
|
|
ids + files_in_database
|
|
|
|
.not_latest_diffs
|
|
|
|
.limit(limit - ids.size)
|
|
|
|
.pluck(:id)
|
|
|
|
end
|
|
|
|
end
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2019-03-02 22:35:43 +05:30
|
|
|
mount_uploader :external_diff, ExternalDiffUploader
|
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
# All diff information is collected from repository after object is created.
|
|
|
|
# It allows you to override variables like head_commit_sha before getting diff.
|
|
|
|
after_create :save_git_content, unless: :importing?
|
2020-03-13 15:44:24 +05:30
|
|
|
after_create_commit :set_as_latest_diff, unless: :importing?
|
2016-09-29 09:46:39 +05:30
|
|
|
|
2020-05-24 23:13:21 +05:30
|
|
|
after_save :update_external_diff_store
|
2020-11-24 15:15:51 +05:30
|
|
|
after_save :set_count_columns
|
2019-03-02 22:35:43 +05:30
|
|
|
|
2017-08-17 22:00:37 +05:30
|
|
|
def self.find_by_diff_refs(diff_refs)
|
|
|
|
find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
|
|
|
|
end
|
|
|
|
|
2018-11-08 19:23:39 +05:30
|
|
|
def viewable?
|
|
|
|
collected? || without_files? || overflow?
|
|
|
|
end
|
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
# Collect information about commits and diff from repository
|
|
|
|
# and save it to the database as serialized data
|
|
|
|
def save_git_content
|
2017-09-10 17:25:29 +05:30
|
|
|
ensure_commit_shas
|
2016-09-29 09:46:39 +05:30
|
|
|
save_commits
|
|
|
|
save_diffs
|
2019-07-31 22:56:46 +05:30
|
|
|
|
|
|
|
# Another set of `after_save` hooks will be called here when we update the record
|
2018-03-17 18:26:18 +05:30
|
|
|
save
|
2019-07-31 22:56:46 +05:30
|
|
|
# We need to reset so that dirty tracking is reset when running the original set
|
|
|
|
# of `after_save` hooks that come after this `after_create` hook. Otherwise, the
|
|
|
|
# hooks that run when an attribute was changed are run twice.
|
|
|
|
reset
|
|
|
|
|
2020-01-01 13:55:28 +05:30
|
|
|
keep_around_commits unless importing?
|
2016-09-29 09:46:39 +05:30
|
|
|
end
|
|
|
|
|
2019-12-21 20:55:43 +05:30
|
|
|
def set_as_latest_diff
|
2021-03-11 19:13:27 +05:30
|
|
|
# Don't set merge_head diff as latest so it won't get considered as the
|
|
|
|
# MergeRequest#merge_request_diff.
|
|
|
|
return if merge_head?
|
|
|
|
|
2019-12-21 20:55:43 +05:30
|
|
|
MergeRequest
|
|
|
|
.where('id = ? AND COALESCE(latest_merge_request_diff_id, 0) < ?', self.merge_request_id, self.id)
|
|
|
|
.update_all(latest_merge_request_diff_id: self.id)
|
|
|
|
end
|
|
|
|
|
2017-09-10 17:25:29 +05:30
|
|
|
def ensure_commit_shas
|
2016-09-29 09:46:39 +05:30
|
|
|
self.start_commit_sha ||= merge_request.target_branch_sha
|
2021-03-11 19:13:27 +05:30
|
|
|
|
|
|
|
if merge_head? && merge_request.merge_ref_head.present?
|
|
|
|
diff_refs = merge_request.merge_ref_head.diff_refs
|
|
|
|
|
|
|
|
self.head_commit_sha ||= diff_refs.head_sha
|
|
|
|
self.base_commit_sha ||= diff_refs.base_sha
|
|
|
|
else
|
|
|
|
self.head_commit_sha ||= merge_request.source_branch_sha
|
|
|
|
self.base_commit_sha ||= find_base_sha
|
|
|
|
end
|
2016-09-29 09:46:39 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
# Override head_commit_sha to keep compatibility with merge request diff
|
|
|
|
# created before version 8.4 that does not store head_commit_sha in separate db field.
|
|
|
|
def head_commit_sha
|
|
|
|
if persisted? && super.nil?
|
2017-09-10 17:25:29 +05:30
|
|
|
last_commit_sha
|
2016-09-29 09:46:39 +05:30
|
|
|
else
|
|
|
|
super
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-11-24 15:15:51 +05:30
|
|
|
def files_count
|
|
|
|
db_value = read_attribute(:files_count)
|
|
|
|
|
|
|
|
case db_value
|
|
|
|
when nil, FILES_COUNT_SENTINEL
|
|
|
|
merge_request_diff_files.count
|
|
|
|
else
|
|
|
|
db_value
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
# This method will rely on repository branch sha
|
|
|
|
# in case start_commit_sha is nil. Its necesarry for old merge request diff
|
|
|
|
# created before version 8.4 to work
|
|
|
|
def safe_start_commit_sha
|
|
|
|
start_commit_sha || merge_request.target_branch_sha
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2016-06-02 11:05:42 +05:30
|
|
|
def size
|
2016-09-13 17:45:13 +05:30
|
|
|
real_size.presence || raw_diffs.size
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2019-10-12 21:52:04 +05:30
|
|
|
def lines_count
|
|
|
|
strong_memoize(:lines_count) do
|
2019-12-04 20:38:33 +05:30
|
|
|
raw_diffs(limits: false).line_count
|
2019-10-12 21:52:04 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-09-13 17:45:13 +05:30
|
|
|
def raw_diffs(options = {})
|
2016-06-02 11:05:42 +05:30
|
|
|
if options[:ignore_whitespace_change]
|
2017-09-10 17:25:29 +05:30
|
|
|
@diffs_no_whitespace ||= compare.diffs(options)
|
2016-06-02 11:05:42 +05:30
|
|
|
else
|
2016-09-13 17:45:13 +05:30
|
|
|
@raw_diffs ||= {}
|
2017-09-10 17:25:29 +05:30
|
|
|
@raw_diffs[options] ||= load_diffs(options)
|
2016-06-02 11:05:42 +05:30
|
|
|
end
|
2015-11-26 14:37:03 +05:30
|
|
|
end
|
|
|
|
|
2019-12-26 22:10:19 +05:30
|
|
|
def commits(limit: nil)
|
|
|
|
strong_memoize(:"commits_#{limit || 'all'}") do
|
|
|
|
load_commits(limit: limit)
|
|
|
|
end
|
2016-09-29 09:46:39 +05:30
|
|
|
end
|
|
|
|
|
2017-09-10 17:25:29 +05:30
|
|
|
def last_commit_sha
|
2019-12-26 22:10:19 +05:30
|
|
|
commit_shas(limit: 1).first
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2015-10-24 18:46:33 +05:30
|
|
|
def first_commit
|
|
|
|
commits.last
|
|
|
|
end
|
|
|
|
|
2019-12-04 20:38:33 +05:30
|
|
|
def last_commit
|
|
|
|
commits.first
|
|
|
|
end
|
|
|
|
|
2016-01-29 22:53:50 +05:30
|
|
|
def base_commit
|
2016-09-29 09:46:39 +05:30
|
|
|
return unless base_commit_sha
|
2016-01-29 22:53:50 +05:30
|
|
|
|
2018-03-17 18:26:18 +05:30
|
|
|
project.commit_by(oid: base_commit_sha)
|
2016-01-29 22:53:50 +05:30
|
|
|
end
|
|
|
|
|
2016-08-24 12:49:21 +05:30
|
|
|
def start_commit
|
2016-09-29 09:46:39 +05:30
|
|
|
return unless start_commit_sha
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2018-03-17 18:26:18 +05:30
|
|
|
project.commit_by(oid: start_commit_sha)
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2016-08-24 12:49:21 +05:30
|
|
|
def head_commit
|
2016-09-29 09:46:39 +05:30
|
|
|
return unless head_commit_sha
|
|
|
|
|
2018-03-17 18:26:18 +05:30
|
|
|
project.commit_by(oid: head_commit_sha)
|
2016-09-29 09:46:39 +05:30
|
|
|
end
|
|
|
|
|
2019-12-26 22:10:19 +05:30
|
|
|
def commit_shas(limit: nil)
|
2021-01-03 14:25:43 +05:30
|
|
|
if association(:merge_request_diff_commits).loaded?
|
|
|
|
sorted_diff_commits = merge_request_diff_commits.sort_by { |diff_commit| [diff_commit.id, diff_commit.relative_order] }
|
|
|
|
sorted_diff_commits = sorted_diff_commits.take(limit) if limit
|
|
|
|
sorted_diff_commits.map(&:sha)
|
|
|
|
else
|
|
|
|
merge_request_diff_commits.limit(limit).pluck(:sha)
|
|
|
|
end
|
2017-08-17 22:00:37 +05:30
|
|
|
end
|
|
|
|
|
2020-01-01 13:55:28 +05:30
|
|
|
def includes_any_commits?(shas)
|
|
|
|
return false if shas.blank?
|
2018-12-13 13:39:08 +05:30
|
|
|
|
2020-01-01 13:55:28 +05:30
|
|
|
# when the number of shas is huge (1000+) we don't want
|
|
|
|
# to pass them all as an SQL param, let's pass them in batches
|
|
|
|
shas.each_slice(BATCH_SIZE).any? do |batched_shas|
|
|
|
|
merge_request_diff_commits.where(sha: batched_shas).exists?
|
|
|
|
end
|
2018-12-13 13:39:08 +05:30
|
|
|
end
|
|
|
|
|
2017-08-17 22:00:37 +05:30
|
|
|
def diff_refs=(new_diff_refs)
|
|
|
|
self.base_commit_sha = new_diff_refs&.base_sha
|
|
|
|
self.start_commit_sha = new_diff_refs&.start_sha
|
|
|
|
self.head_commit_sha = new_diff_refs&.head_sha
|
2016-09-29 09:46:39 +05:30
|
|
|
end
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
def diff_refs
|
|
|
|
return unless start_commit_sha || base_commit_sha
|
|
|
|
|
|
|
|
Gitlab::Diff::DiffRefs.new(
|
|
|
|
base_sha: base_commit_sha,
|
|
|
|
start_sha: start_commit_sha,
|
|
|
|
head_sha: head_commit_sha
|
|
|
|
)
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2017-09-10 17:25:29 +05:30
|
|
|
# MRs created before 8.4 don't store their true diff refs (start and base),
|
|
|
|
# but we need to get a commit SHA for the "View file @ ..." link by a file,
|
|
|
|
# so we use an approximation of the diff refs if we can't get the actual one.
|
|
|
|
#
|
|
|
|
# These will not be the actual diff refs if the target branch was merged into
|
|
|
|
# the source branch after the merge request was created, but it is good enough
|
|
|
|
# for the specific purpose of linking to a commit.
|
|
|
|
#
|
|
|
|
# It is not good enough for highlighting diffs, so we can't simply pass
|
|
|
|
# these as `diff_refs.`
|
|
|
|
def fallback_diff_refs
|
|
|
|
real_refs = diff_refs
|
|
|
|
return real_refs if real_refs
|
|
|
|
|
|
|
|
likely_base_commit_sha = (first_commit&.parent || first_commit)&.sha
|
|
|
|
|
|
|
|
Gitlab::Diff::DiffRefs.new(
|
|
|
|
base_sha: likely_base_commit_sha,
|
|
|
|
start_sha: safe_start_commit_sha,
|
|
|
|
head_sha: head_commit_sha
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2016-09-13 17:45:13 +05:30
|
|
|
def diff_refs_by_sha?
|
|
|
|
base_commit_sha? && head_commit_sha? && start_commit_sha?
|
|
|
|
end
|
|
|
|
|
2019-12-21 20:55:43 +05:30
|
|
|
def diffs_in_batch(batch_page, batch_size, diff_options:)
|
2020-01-01 13:55:28 +05:30
|
|
|
fetching_repository_diffs(diff_options) do |comparison|
|
2021-09-04 01:27:46 +05:30
|
|
|
reorder_diff_files!
|
|
|
|
diffs_batch = diffs_in_batch_collection(batch_page, batch_size, diff_options: diff_options)
|
|
|
|
|
2020-01-01 13:55:28 +05:30
|
|
|
if comparison
|
2021-09-04 01:27:46 +05:30
|
|
|
if diff_options[:paths].blank? && !without_files?
|
|
|
|
# Return the empty MergeRequestDiffBatch for an out of bound batch request
|
|
|
|
break diffs_batch if diffs_batch.diff_file_paths.blank?
|
|
|
|
|
|
|
|
diff_options.merge!(
|
|
|
|
paths: diffs_batch.diff_file_paths,
|
|
|
|
pagination_data: diffs_batch.pagination_data
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
comparison.diffs(diff_options)
|
2020-01-01 13:55:28 +05:30
|
|
|
else
|
2021-09-04 01:27:46 +05:30
|
|
|
diffs_batch
|
2020-01-01 13:55:28 +05:30
|
|
|
end
|
|
|
|
end
|
2019-12-21 20:55:43 +05:30
|
|
|
end
|
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
def diffs(diff_options = nil)
|
2020-01-01 13:55:28 +05:30
|
|
|
fetching_repository_diffs(diff_options) do |comparison|
|
2018-11-08 19:23:39 +05:30
|
|
|
# It should fetch the repository when diffs are cleaned by the system.
|
|
|
|
# We don't keep these for storage overload purposes.
|
2019-12-04 20:38:33 +05:30
|
|
|
# See https://gitlab.com/gitlab-org/gitlab-foss/issues/37639
|
2020-01-01 13:55:28 +05:30
|
|
|
if comparison
|
|
|
|
comparison.diffs(diff_options)
|
|
|
|
else
|
2021-02-22 17:27:13 +05:30
|
|
|
reorder_diff_files!
|
2020-01-01 13:55:28 +05:30
|
|
|
diffs_collection(diff_options)
|
|
|
|
end
|
2018-11-08 19:23:39 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Should always return the DB persisted diffs collection
|
|
|
|
# (e.g. Gitlab::Diff::FileCollection::MergeRequestDiff.
|
|
|
|
# It's useful when trying to invalidate old caches through
|
|
|
|
# FileCollection::MergeRequestDiff#clear_cache!
|
|
|
|
def diffs_collection(diff_options = nil)
|
2016-09-29 09:46:39 +05:30
|
|
|
Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options)
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
def project
|
|
|
|
merge_request.target_project
|
|
|
|
end
|
2016-08-24 12:49:21 +05:30
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
def compare
|
|
|
|
@compare ||=
|
|
|
|
Gitlab::Git::Compare.new(
|
|
|
|
repository.raw_repository,
|
|
|
|
safe_start_commit_sha,
|
|
|
|
head_commit_sha
|
|
|
|
)
|
|
|
|
end
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
def latest?
|
2018-03-17 18:26:18 +05:30
|
|
|
self.id == merge_request.latest_merge_request_diff_id
|
2016-09-29 09:46:39 +05:30
|
|
|
end
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2018-12-05 23:21:45 +05:30
|
|
|
# rubocop: disable CodeReuse/ServiceClass
|
2017-09-10 17:25:29 +05:30
|
|
|
def compare_with(sha)
|
2016-11-03 12:29:30 +05:30
|
|
|
# When compare merge request versions we want diff A..B instead of A...B
|
|
|
|
# so we handle cases when user does squash and rebase of the commits between versions.
|
|
|
|
# For this reason we set straight to true by default.
|
2017-09-10 17:25:29 +05:30
|
|
|
CompareService.new(project, head_commit_sha).execute(project, sha, straight: true)
|
2017-08-17 22:00:37 +05:30
|
|
|
end
|
2018-12-05 23:21:45 +05:30
|
|
|
# rubocop: enable CodeReuse/ServiceClass
|
2017-08-17 22:00:37 +05:30
|
|
|
|
2020-04-22 19:07:51 +05:30
|
|
|
def modified_paths(fallback_on_overflow: false)
|
|
|
|
if fallback_on_overflow && overflow?
|
|
|
|
# This is an extremely slow means to find the modified paths for a given
|
|
|
|
# MergeRequestDiff. This should be avoided, except where the limit of
|
|
|
|
# 1_000 (as of %12.10) entries returned by the default behavior is an
|
|
|
|
# issue.
|
|
|
|
strong_memoize(:overflowed_modified_paths) do
|
|
|
|
project.repository.diff_stats(
|
|
|
|
base_commit_sha,
|
|
|
|
head_commit_sha
|
|
|
|
).paths
|
|
|
|
end
|
|
|
|
else
|
|
|
|
strong_memoize(:modified_paths) do
|
|
|
|
merge_request_diff_files.pluck(:new_path, :old_path).flatten.uniq
|
|
|
|
end
|
2018-12-13 13:39:08 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-03-02 22:35:43 +05:30
|
|
|
def update_external_diff_store
|
2020-05-24 23:13:21 +05:30
|
|
|
return unless saved_change_to_external_diff? || saved_change_to_stored_externally?
|
2019-03-02 22:35:43 +05:30
|
|
|
|
2020-05-24 23:13:21 +05:30
|
|
|
update_column(:external_diff_store, external_diff.object_store)
|
2019-03-02 22:35:43 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
# If enabled, yields the external file containing the diff. Otherwise, yields
|
|
|
|
# nil. This method is not thread-safe, but it *is* re-entrant, which allows
|
|
|
|
# multiple merge_request_diff_files to load their data efficiently
|
|
|
|
def opening_external_diff
|
|
|
|
return yield(nil) unless stored_externally?
|
|
|
|
return yield(@external_diff_file) if @external_diff_file
|
|
|
|
|
|
|
|
external_diff.open do |file|
|
2019-07-07 11:18:12 +05:30
|
|
|
@external_diff_file = file
|
2019-05-18 00:54:41 +05:30
|
|
|
|
2019-07-07 11:18:12 +05:30
|
|
|
yield(@external_diff_file)
|
|
|
|
ensure
|
|
|
|
@external_diff_file = nil
|
2019-05-18 00:54:41 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-07-07 11:18:12 +05:30
|
|
|
# Transactionally migrate the current merge_request_diff_files entries to
|
|
|
|
# external storage. If external storage isn't an option for this diff, the
|
|
|
|
# method is a no-op.
|
|
|
|
def migrate_files_to_external_storage!
|
2020-11-24 15:15:51 +05:30
|
|
|
return if stored_externally? || !use_external_diff? || files_count == 0
|
2019-07-07 11:18:12 +05:30
|
|
|
|
|
|
|
rows = build_merge_request_diff_files(merge_request_diff_files)
|
2020-07-28 23:09:34 +05:30
|
|
|
rows = build_external_merge_request_diff_files(rows)
|
|
|
|
|
|
|
|
# Perform carrierwave activity before entering the database transaction.
|
|
|
|
# This is safe as until the `external_diff_store` column is changed, we will
|
|
|
|
# continue to consult the in-database content.
|
|
|
|
self.external_diff.store!
|
2019-07-07 11:18:12 +05:30
|
|
|
|
|
|
|
transaction do
|
|
|
|
MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
|
2020-07-28 23:09:34 +05:30
|
|
|
Gitlab::Database.bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
|
2019-07-07 11:18:12 +05:30
|
|
|
save!
|
|
|
|
end
|
|
|
|
|
2019-07-31 22:56:46 +05:30
|
|
|
merge_request_diff_files.reset
|
2019-07-07 11:18:12 +05:30
|
|
|
end
|
|
|
|
|
2020-04-22 19:07:51 +05:30
|
|
|
# Transactionally migrate the current merge_request_diff_files entries from
|
|
|
|
# external storage, back to the database. This is the rollback operation for
|
|
|
|
# +migrate_files_to_external_storage!+
|
|
|
|
#
|
|
|
|
# If this diff isn't in external storage, the method is a no-op.
|
|
|
|
def migrate_files_to_database!
|
|
|
|
return unless stored_externally?
|
2020-11-24 15:15:51 +05:30
|
|
|
return if files_count == 0
|
2020-04-22 19:07:51 +05:30
|
|
|
|
|
|
|
rows = convert_external_diffs_to_database
|
|
|
|
|
|
|
|
transaction do
|
|
|
|
MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
|
2020-06-23 00:09:42 +05:30
|
|
|
Gitlab::Database.bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
|
2020-04-22 19:07:51 +05:30
|
|
|
update!(stored_externally: false)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Only delete the external diff file after the contents have been saved to
|
|
|
|
# the database
|
|
|
|
remove_external_diff!
|
|
|
|
merge_request_diff_files.reset
|
|
|
|
end
|
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
private
|
|
|
|
|
2020-04-22 19:07:51 +05:30
|
|
|
def convert_external_diffs_to_database
|
|
|
|
opening_external_diff do |external_file|
|
|
|
|
merge_request_diff_files.map do |diff_file|
|
|
|
|
row = diff_file.attributes.except('diff')
|
|
|
|
|
|
|
|
raise "Diff file lacks external diff offset or size: #{row.inspect}" unless
|
|
|
|
row['external_diff_offset'] && row['external_diff_size']
|
|
|
|
|
|
|
|
# The diff in the external file is already base64-encoded if necessary,
|
|
|
|
# matching the 'binary' attribute of the row. Reading it directly allows
|
|
|
|
# a cycle of decode-encode to be skipped
|
|
|
|
external_file.seek(row.delete('external_diff_offset'))
|
|
|
|
row['diff'] = external_file.read(row.delete('external_diff_size'))
|
|
|
|
|
|
|
|
row
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-01-01 13:55:28 +05:30
|
|
|
def diffs_in_batch_collection(batch_page, batch_size, diff_options:)
|
|
|
|
Gitlab::Diff::FileCollection::MergeRequestDiffBatch.new(self,
|
|
|
|
batch_page,
|
|
|
|
batch_size,
|
|
|
|
diff_options: diff_options)
|
|
|
|
end
|
|
|
|
|
2019-05-03 19:53:19 +05:30
|
|
|
def encode_in_base64?(diff_text)
|
2021-01-03 14:25:43 +05:30
|
|
|
return false if diff_text.nil?
|
|
|
|
|
2019-05-03 19:53:19 +05:30
|
|
|
(diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?) ||
|
|
|
|
diff_text.include?("\0")
|
|
|
|
end
|
|
|
|
|
2019-07-07 11:18:12 +05:30
|
|
|
def build_external_merge_request_diff_files(rows)
|
2019-03-02 22:35:43 +05:30
|
|
|
tempfile = build_external_diff_tempfile(rows)
|
|
|
|
|
|
|
|
self.external_diff = tempfile
|
|
|
|
self.stored_externally = true
|
|
|
|
|
|
|
|
rows
|
|
|
|
ensure
|
|
|
|
tempfile&.unlink
|
|
|
|
end
|
|
|
|
|
2019-07-07 11:18:12 +05:30
|
|
|
def create_merge_request_diff_files(rows)
|
|
|
|
rows = build_external_merge_request_diff_files(rows) if use_external_diff?
|
|
|
|
|
|
|
|
# Faster inserts
|
2020-06-23 00:09:42 +05:30
|
|
|
Gitlab::Database.bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
|
2019-07-07 11:18:12 +05:30
|
|
|
end
|
|
|
|
|
2019-03-02 22:35:43 +05:30
|
|
|
def build_external_diff_tempfile(rows)
|
|
|
|
Tempfile.open(external_diff.filename) do |file|
|
2019-07-07 11:18:12 +05:30
|
|
|
rows.each do |row|
|
2019-03-02 22:35:43 +05:30
|
|
|
data = row.delete(:diff)
|
2019-07-07 11:18:12 +05:30
|
|
|
row[:external_diff_offset] = file.pos
|
2021-01-03 14:25:43 +05:30
|
|
|
row[:external_diff_size] = data&.bytesize || 0
|
2019-03-02 22:35:43 +05:30
|
|
|
|
|
|
|
file.write(data)
|
|
|
|
end
|
|
|
|
|
|
|
|
file
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def build_merge_request_diff_files(diffs)
|
2021-02-22 17:27:13 +05:30
|
|
|
sort_diffs(diffs).map.with_index do |diff, index|
|
2017-09-10 17:25:29 +05:30
|
|
|
diff_hash = diff.to_hash.merge(
|
|
|
|
binary: false,
|
|
|
|
merge_request_diff_id: self.id,
|
|
|
|
relative_order: index
|
|
|
|
)
|
|
|
|
|
|
|
|
# Compatibility with old diffs created with Psych.
|
|
|
|
diff_hash.tap do |hash|
|
|
|
|
diff_text = hash[:diff]
|
|
|
|
|
2019-05-03 19:53:19 +05:30
|
|
|
if encode_in_base64?(diff_text)
|
2017-09-10 17:25:29 +05:30
|
|
|
hash[:binary] = true
|
|
|
|
hash[:diff] = [diff_text].pack('m0')
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2016-08-24 12:49:21 +05:30
|
|
|
end
|
|
|
|
|
2020-01-01 13:55:28 +05:30
|
|
|
# Yields the block with the repository Compare object if it should
|
|
|
|
# fetch diffs from the repository instead DB.
|
|
|
|
def fetching_repository_diffs(diff_options)
|
|
|
|
return unless block_given?
|
|
|
|
|
|
|
|
diff_options ||= {}
|
|
|
|
|
|
|
|
# Can be read as: fetch the persisted diffs if yielded without the
|
|
|
|
# Compare object.
|
|
|
|
return yield unless without_files? || diff_options[:ignore_whitespace_change]
|
|
|
|
return yield unless diff_refs&.complete?
|
|
|
|
|
|
|
|
comparison = diff_refs.compare_in(repository.project)
|
|
|
|
|
|
|
|
return yield unless comparison
|
|
|
|
|
|
|
|
yield(comparison)
|
|
|
|
end
|
|
|
|
|
2019-07-07 11:18:12 +05:30
|
|
|
def use_external_diff?
|
|
|
|
return false unless Gitlab.config.external_diffs.enabled
|
|
|
|
|
|
|
|
case Gitlab.config.external_diffs.when
|
|
|
|
when 'always'
|
|
|
|
true
|
|
|
|
when 'outdated'
|
|
|
|
outdated_by_merge? || outdated_by_closure? || old_version?
|
|
|
|
else
|
|
|
|
false # Disable external diffs if misconfigured
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def outdated_by_merge?
|
|
|
|
return false unless merge_request&.metrics&.merged_at
|
|
|
|
|
|
|
|
merge_request.merged? && merge_request.metrics.merged_at < EXTERNAL_DIFF_CUTOFF.ago
|
|
|
|
end
|
|
|
|
|
|
|
|
def outdated_by_closure?
|
|
|
|
return false unless merge_request&.metrics&.latest_closed_at
|
|
|
|
|
|
|
|
merge_request.closed? && merge_request.metrics.latest_closed_at < EXTERNAL_DIFF_CUTOFF.ago
|
|
|
|
end
|
|
|
|
|
|
|
|
def old_version?
|
|
|
|
latest_id = MergeRequest
|
|
|
|
.where(id: merge_request_id)
|
|
|
|
.limit(1)
|
|
|
|
.pluck(:latest_merge_request_diff_id)
|
|
|
|
.first
|
|
|
|
|
2019-12-21 20:55:43 +05:30
|
|
|
latest_id && self.id < latest_id
|
2019-07-07 11:18:12 +05:30
|
|
|
end
|
|
|
|
|
2017-09-10 17:25:29 +05:30
|
|
|
def load_diffs(options)
|
2019-03-02 22:35:43 +05:30
|
|
|
# Ensure all diff files operate on the same external diff file instance if
|
|
|
|
# present. This reduces file open/close overhead.
|
|
|
|
opening_external_diff do
|
|
|
|
collection = merge_request_diff_files
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2019-03-02 22:35:43 +05:30
|
|
|
if paths = options[:paths]
|
|
|
|
collection = collection.where('old_path IN (?) OR new_path IN (?)', paths, paths)
|
|
|
|
end
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2019-03-02 22:35:43 +05:30
|
|
|
Gitlab::Git::DiffCollection.new(collection.map(&:to_hash), options)
|
|
|
|
end
|
2016-08-24 12:49:21 +05:30
|
|
|
end
|
|
|
|
|
2019-12-26 22:10:19 +05:30
|
|
|
def load_commits(limit: nil)
|
|
|
|
commits = merge_request_diff_commits.limit(limit)
|
|
|
|
.map { |commit| Commit.from_hash(commit.to_hash, project) }
|
2017-09-10 17:25:29 +05:30
|
|
|
|
2018-03-17 18:26:18 +05:30
|
|
|
CommitCollection
|
|
|
|
.new(merge_request.source_project, commits, merge_request.source_branch)
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
def save_diffs
|
2016-08-24 12:49:21 +05:30
|
|
|
new_attributes = {}
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2020-10-24 23:57:45 +05:30
|
|
|
if compare.commits.empty?
|
2016-08-24 12:49:21 +05:30
|
|
|
new_attributes[:state] = :empty
|
2014-09-02 18:07:02 +05:30
|
|
|
else
|
2021-01-03 14:25:43 +05:30
|
|
|
diff_collection = compare.diffs(Commit.max_diff_options(project: merge_request.project))
|
2016-08-24 12:49:21 +05:30
|
|
|
new_attributes[:real_size] = diff_collection.real_size
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2016-06-02 11:05:42 +05:30
|
|
|
if diff_collection.any?
|
2016-08-24 12:49:21 +05:30
|
|
|
new_attributes[:state] = :collected
|
2017-08-17 22:00:37 +05:30
|
|
|
|
2019-07-07 11:18:12 +05:30
|
|
|
rows = build_merge_request_diff_files(diff_collection)
|
|
|
|
create_merge_request_diff_files(rows)
|
2021-02-22 17:27:13 +05:30
|
|
|
new_attributes[:sorted] = true
|
2020-10-24 23:57:45 +05:30
|
|
|
self.class.uncached { merge_request_diff_files.reset }
|
2017-09-10 17:25:29 +05:30
|
|
|
end
|
2017-08-17 22:00:37 +05:30
|
|
|
|
|
|
|
# Set our state to 'overflow' to make the #empty? and #collected?
|
|
|
|
# methods (generated by StateMachine) return false.
|
|
|
|
#
|
|
|
|
# This attribution has to come at the end of the method so 'overflow'
|
|
|
|
# state does not get overridden by 'collected'.
|
|
|
|
new_attributes[:state] = :overflow if diff_collection.overflow?
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2018-03-17 18:26:18 +05:30
|
|
|
assign_attributes(new_attributes)
|
2017-09-10 17:25:29 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def save_commits
|
|
|
|
MergeRequestDiffCommit.create_bulk(self.id, compare.commits.reverse)
|
2020-10-24 23:57:45 +05:30
|
|
|
self.class.uncached { merge_request_diff_commits.reset }
|
|
|
|
end
|
2017-09-10 17:25:29 +05:30
|
|
|
|
2020-10-24 23:57:45 +05:30
|
|
|
def set_count_columns
|
|
|
|
update_columns(
|
|
|
|
commits_count: merge_request_diff_commits.size,
|
2020-11-24 15:15:51 +05:30
|
|
|
files_count: [FILES_COUNT_SENTINEL, merge_request_diff_files.size].min
|
2020-10-24 23:57:45 +05:30
|
|
|
)
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
def repository
|
2016-08-24 12:49:21 +05:30
|
|
|
project.repository
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
def find_base_sha
|
|
|
|
return unless head_commit_sha && start_commit_sha
|
2014-09-02 18:07:02 +05:30
|
|
|
|
2016-09-29 09:46:39 +05:30
|
|
|
project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
|
2016-06-02 11:05:42 +05:30
|
|
|
end
|
|
|
|
|
2016-08-24 12:49:21 +05:30
|
|
|
def keep_around_commits
|
2018-03-17 18:26:18 +05:30
|
|
|
[repository, merge_request.source_project.repository].uniq.each do |repo|
|
2018-11-20 20:47:30 +05:30
|
|
|
repo.keep_around(start_commit_sha, head_commit_sha, base_commit_sha)
|
2017-08-17 22:00:37 +05:30
|
|
|
end
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
2021-02-22 17:27:13 +05:30
|
|
|
|
|
|
|
def reorder_diff_files!
|
|
|
|
return if sorted? || merge_request_diff_files.empty?
|
|
|
|
|
|
|
|
diff_files = sort_diffs(merge_request_diff_files)
|
|
|
|
|
|
|
|
diff_files.each_with_index do |diff_file, index|
|
|
|
|
diff_file.relative_order = index
|
|
|
|
end
|
|
|
|
|
|
|
|
transaction do
|
|
|
|
# The `merge_request_diff_files` table doesn't have an `id` column so
|
|
|
|
# we cannot use `Gitlab::Database::BulkUpdate`.
|
|
|
|
MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
|
|
|
|
MergeRequestDiffFile.bulk_insert!(diff_files)
|
|
|
|
update_column(:sorted, true)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def sort_diffs(diffs)
|
|
|
|
Gitlab::Diff::FileCollectionSorter.new(diffs).sort
|
|
|
|
end
|
2014-09-02 18:07:02 +05:30
|
|
|
end
|
2019-12-21 20:55:43 +05:30
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
MergeRequestDiff.prepend_mod_with('MergeRequestDiff')
|