2021-04-17 20:07:23 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
# rubocop: disable Style/Documentation
|
2022-03-02 08:16:31 +05:30
|
|
|
class Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid # rubocop:disable Metrics/ClassLength
|
2021-04-17 20:07:23 +05:30
|
|
|
# rubocop: disable Gitlab/NamespacedClass
|
|
|
|
class VulnerabilitiesIdentifier < ActiveRecord::Base
|
|
|
|
self.table_name = "vulnerability_identifiers"
|
|
|
|
has_many :primary_findings, class_name: 'VulnerabilitiesFinding', inverse_of: :primary_identifier, foreign_key: 'primary_identifier_id'
|
|
|
|
end
|
|
|
|
|
|
|
|
class VulnerabilitiesFinding < ActiveRecord::Base
|
2022-03-02 08:16:31 +05:30
|
|
|
include EachBatch
|
2021-10-27 15:23:28 +05:30
|
|
|
include ShaAttribute
|
|
|
|
|
2021-04-17 20:07:23 +05:30
|
|
|
self.table_name = "vulnerability_occurrences"
|
2022-03-02 08:16:31 +05:30
|
|
|
|
|
|
|
has_many :signatures, foreign_key: 'finding_id', class_name: 'VulnerabilityFindingSignature', inverse_of: :finding
|
2021-04-17 20:07:23 +05:30
|
|
|
belongs_to :primary_identifier, class_name: 'VulnerabilitiesIdentifier', inverse_of: :primary_findings, foreign_key: 'primary_identifier_id'
|
2022-03-02 08:16:31 +05:30
|
|
|
|
2021-04-17 20:07:23 +05:30
|
|
|
REPORT_TYPES = {
|
|
|
|
sast: 0,
|
|
|
|
dependency_scanning: 1,
|
|
|
|
container_scanning: 2,
|
|
|
|
dast: 3,
|
|
|
|
secret_detection: 4,
|
|
|
|
coverage_fuzzing: 5,
|
2022-03-02 08:16:31 +05:30
|
|
|
api_fuzzing: 6,
|
|
|
|
cluster_image_scanning: 7,
|
|
|
|
generic: 99
|
2021-04-17 20:07:23 +05:30
|
|
|
}.with_indifferent_access.freeze
|
|
|
|
enum report_type: REPORT_TYPES
|
2021-10-27 15:23:28 +05:30
|
|
|
|
|
|
|
sha_attribute :fingerprint
|
|
|
|
sha_attribute :location_fingerprint
|
2021-04-17 20:07:23 +05:30
|
|
|
end
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
class VulnerabilityFindingSignature < ActiveRecord::Base
|
|
|
|
include ShaAttribute
|
|
|
|
|
|
|
|
self.table_name = 'vulnerability_finding_signatures'
|
|
|
|
belongs_to :finding, foreign_key: 'finding_id', inverse_of: :signatures, class_name: 'VulnerabilitiesFinding'
|
|
|
|
|
|
|
|
sha_attribute :signature_sha
|
|
|
|
end
|
|
|
|
|
|
|
|
class VulnerabilitiesFindingPipeline < ActiveRecord::Base
|
|
|
|
include EachBatch
|
|
|
|
self.table_name = "vulnerability_occurrence_pipelines"
|
|
|
|
end
|
|
|
|
|
|
|
|
class Vulnerability < ActiveRecord::Base
|
|
|
|
include EachBatch
|
|
|
|
self.table_name = "vulnerabilities"
|
|
|
|
end
|
|
|
|
|
2021-04-17 20:07:23 +05:30
|
|
|
class CalculateFindingUUID
|
|
|
|
FINDING_NAMESPACES_IDS = {
|
|
|
|
development: "a143e9e2-41b3-47bc-9a19-081d089229f4",
|
|
|
|
test: "a143e9e2-41b3-47bc-9a19-081d089229f4",
|
|
|
|
staging: "a6930898-a1b2-4365-ab18-12aa474d9b26",
|
|
|
|
production: "58dc0f06-936c-43b3-93bb-71693f1b6570"
|
|
|
|
}.freeze
|
|
|
|
|
|
|
|
NAMESPACE_REGEX = /(\h{8})-(\h{4})-(\h{4})-(\h{4})-(\h{4})(\h{8})/.freeze
|
2021-04-29 21:17:54 +05:30
|
|
|
PACK_PATTERN = "NnnnnN"
|
2021-04-17 20:07:23 +05:30
|
|
|
|
|
|
|
def self.call(value)
|
|
|
|
Digest::UUID.uuid_v5(namespace_id, value)
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.namespace_id
|
|
|
|
namespace_uuid = FINDING_NAMESPACES_IDS.fetch(Rails.env.to_sym)
|
|
|
|
# Digest::UUID is broken when using an UUID in namespace_id
|
|
|
|
# https://github.com/rails/rails/issues/37681#issue-520718028
|
|
|
|
namespace_uuid.scan(NAMESPACE_REGEX).flatten.map { |s| s.to_i(16) }.pack(PACK_PATTERN)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
# rubocop: enable Gitlab/NamespacedClass
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
# rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength
|
2021-04-17 20:07:23 +05:30
|
|
|
def perform(start_id, end_id)
|
2022-03-02 08:16:31 +05:30
|
|
|
log_info('Migration started', start_id: start_id, end_id: end_id)
|
2021-09-04 01:27:46 +05:30
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
VulnerabilitiesFinding
|
|
|
|
.joins(:primary_identifier)
|
|
|
|
.includes(:signatures)
|
|
|
|
.select(:id, :report_type, :primary_identifier_id, :fingerprint, :location_fingerprint, :project_id, :created_at, :vulnerability_id, :uuid)
|
|
|
|
.where(id: start_id..end_id)
|
|
|
|
.each_batch(of: 50) do |relation|
|
|
|
|
duplicates = find_duplicates(relation)
|
|
|
|
remove_findings(ids: duplicates) if duplicates.present?
|
|
|
|
|
|
|
|
to_update = relation.reject { |finding| duplicates.include?(finding.id) }
|
|
|
|
|
|
|
|
begin
|
|
|
|
known_uuids = Set.new
|
|
|
|
to_be_deleted = []
|
|
|
|
|
|
|
|
mappings = to_update.each_with_object({}) do |finding, hash|
|
|
|
|
uuid = calculate_uuid_v5_for_finding(finding)
|
|
|
|
|
|
|
|
if known_uuids.add?(uuid)
|
|
|
|
hash[finding] = { uuid: uuid }
|
|
|
|
else
|
|
|
|
to_be_deleted << finding.id
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# It is technically still possible to have duplicate uuids
|
|
|
|
# if the data integrity is broken somehow and the primary identifiers of
|
|
|
|
# the findings are pointing to different projects with the same fingerprint values.
|
|
|
|
if to_be_deleted.present?
|
|
|
|
log_info('Conflicting UUIDs found within the batch', finding_ids: to_be_deleted)
|
|
|
|
|
|
|
|
remove_findings(ids: to_be_deleted)
|
|
|
|
end
|
|
|
|
|
|
|
|
::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings) if mappings.present?
|
|
|
|
|
|
|
|
log_info('Recalculation is done', finding_ids: mappings.keys.pluck(:id))
|
|
|
|
rescue ActiveRecord::RecordNotUnique => error
|
|
|
|
log_info('RecordNotUnique error received')
|
|
|
|
|
|
|
|
match_data = /\(uuid\)=\((?<uuid>\S{36})\)/.match(error.message)
|
|
|
|
|
|
|
|
# This exception returns the **correct** UUIDv5 which probably comes from a later record
|
|
|
|
# and it's the one we can drop in the easiest way before retrying the UPDATE query
|
|
|
|
if match_data
|
|
|
|
uuid = match_data[:uuid]
|
|
|
|
log_info('Conflicting UUID found', uuid: uuid)
|
|
|
|
|
|
|
|
id = VulnerabilitiesFinding.find_by(uuid: uuid)&.id
|
|
|
|
remove_findings(ids: id) if id
|
|
|
|
retry
|
|
|
|
else
|
|
|
|
log_error('Couldnt find conflicting uuid')
|
|
|
|
|
|
|
|
Gitlab::ErrorTracking.track_and_raise_exception(error)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2021-09-04 01:27:46 +05:30
|
|
|
|
|
|
|
mark_job_as_succeeded(start_id, end_id)
|
|
|
|
rescue StandardError => error
|
2022-03-02 08:16:31 +05:30
|
|
|
log_error('An exception happened')
|
|
|
|
|
|
|
|
Gitlab::ErrorTracking.track_and_raise_exception(error)
|
2021-04-17 20:07:23 +05:30
|
|
|
end
|
2022-03-02 08:16:31 +05:30
|
|
|
# rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength
|
2021-04-17 20:07:23 +05:30
|
|
|
|
|
|
|
private
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
def find_duplicates(relation)
|
|
|
|
to_exclude = []
|
|
|
|
relation.flat_map do |record|
|
|
|
|
# Assuming we're scanning id 31 and the duplicate is id 40
|
|
|
|
# first we'd process 31 and add 40 to the list of ids to remove
|
|
|
|
# then we would process record 40 and add 31 to the list of removals
|
|
|
|
# so we would drop both records
|
|
|
|
to_exclude << record.id
|
|
|
|
|
|
|
|
VulnerabilitiesFinding.where(
|
|
|
|
report_type: record.report_type,
|
|
|
|
location_fingerprint: record.location_fingerprint,
|
|
|
|
primary_identifier_id: record.primary_identifier_id,
|
|
|
|
project_id: record.project_id
|
|
|
|
).where.not(id: to_exclude).pluck(:id)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def remove_findings(ids:)
|
|
|
|
ids = Array(ids)
|
|
|
|
log_info('Removing Findings and associated records', ids: ids)
|
|
|
|
|
|
|
|
vulnerability_ids = VulnerabilitiesFinding.where(id: ids).pluck(:vulnerability_id).uniq.compact
|
|
|
|
|
|
|
|
VulnerabilitiesFindingPipeline.where(occurrence_id: ids).each_batch { |batch| batch.delete_all }
|
|
|
|
Vulnerability.where(id: vulnerability_ids).each_batch { |batch| batch.delete_all }
|
|
|
|
VulnerabilitiesFinding.where(id: ids).delete_all
|
|
|
|
end
|
|
|
|
|
2021-04-17 20:07:23 +05:30
|
|
|
def calculate_uuid_v5_for_finding(vulnerability_finding)
|
|
|
|
return unless vulnerability_finding
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
signatures = vulnerability_finding.signatures.sort_by { |signature| signature.algorithm_type_before_type_cast }
|
|
|
|
location_fingerprint = signatures.last&.signature_sha || vulnerability_finding.location_fingerprint
|
|
|
|
|
2021-04-17 20:07:23 +05:30
|
|
|
uuid_v5_name_components = {
|
|
|
|
report_type: vulnerability_finding.report_type,
|
|
|
|
primary_identifier_fingerprint: vulnerability_finding.fingerprint,
|
2022-03-02 08:16:31 +05:30
|
|
|
location_fingerprint: location_fingerprint,
|
2021-04-17 20:07:23 +05:30
|
|
|
project_id: vulnerability_finding.project_id
|
|
|
|
}
|
|
|
|
|
|
|
|
name = uuid_v5_name_components.values.join('-')
|
|
|
|
|
|
|
|
CalculateFindingUUID.call(name)
|
|
|
|
end
|
2021-09-04 01:27:46 +05:30
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
def log_info(message, **extra)
|
|
|
|
logger.info(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra)
|
|
|
|
end
|
|
|
|
|
|
|
|
def log_error(message, **extra)
|
|
|
|
logger.error(migrator: 'RecalculateVulnerabilitiesOccurrencesUuid', message: message, **extra)
|
|
|
|
end
|
|
|
|
|
2021-09-04 01:27:46 +05:30
|
|
|
def logger
|
|
|
|
@logger ||= Gitlab::BackgroundMigration::Logger.build
|
|
|
|
end
|
|
|
|
|
|
|
|
def mark_job_as_succeeded(*arguments)
|
|
|
|
Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
|
|
|
|
'RecalculateVulnerabilitiesOccurrencesUuid',
|
|
|
|
arguments
|
|
|
|
)
|
|
|
|
end
|
2021-04-17 20:07:23 +05:30
|
|
|
end
|