109 lines
3.3 KiB
Ruby
109 lines
3.3 KiB
Ruby
|
# frozen_string_literal: true
|
||
|
|
||
|
# ActiveModel::Serialization (https://github.com/rails/rails/blob/v5.0.7/activemodel/lib/active_model/serialization.rb#L184)
|
||
|
# is simple in that it recursively calls `as_json` on each object to
|
||
|
# serialize everything. However, for a model like a Project, this can
|
||
|
# generate a query for every single association, which can add up to tens
|
||
|
# of thousands of queries and lead to memory bloat.
|
||
|
#
|
||
|
# To improve this, we can do several things:
|
||
|
|
||
|
# 1. Use the option tree in http://api.rubyonrails.org/classes/ActiveModel/Serializers/JSON.html
|
||
|
# to generate the necessary preload clauses.
|
||
|
#
|
||
|
# 2. We observe that a single project has many issues, merge requests,
|
||
|
# etc. Instead of serializing everything at once, which could lead to
|
||
|
# database timeouts and high memory usage, we take each top-level
|
||
|
# association and serialize the data in batches.
|
||
|
#
|
||
|
# For example, we serialize the first 100 issues and preload all of
|
||
|
# their associated events, notes, etc. before moving onto the next
|
||
|
# batch. When we're done, we serialize merge requests in the same way.
|
||
|
# We repeat this pattern for the remaining associations specified in
|
||
|
# import_export.yml.
|
||
|
module Gitlab
|
||
|
module ImportExport
|
||
|
class FastHashSerializer
|
||
|
attr_reader :subject, :tree
|
||
|
|
||
|
BATCH_SIZE = 100
|
||
|
|
||
|
def initialize(subject, tree, batch_size: BATCH_SIZE)
|
||
|
@subject = subject
|
||
|
@batch_size = batch_size
|
||
|
@tree = tree
|
||
|
end
|
||
|
|
||
|
# Serializes the subject into a Hash for the given option tree
|
||
|
# (e.g. Project#as_json)
|
||
|
def execute
|
||
|
simple_serialize.merge(serialize_includes)
|
||
|
end
|
||
|
|
||
|
private
|
||
|
|
||
|
def simple_serialize
|
||
|
subject.as_json(
|
||
|
tree.merge(include: nil, preloads: nil))
|
||
|
end
|
||
|
|
||
|
def serialize_includes
|
||
|
return {} unless includes
|
||
|
|
||
|
includes
|
||
|
.map(&method(:serialize_include_definition))
|
||
|
.compact
|
||
|
.to_h
|
||
|
end
|
||
|
|
||
|
# definition:
|
||
|
# { labels: { includes: ... } }
|
||
|
def serialize_include_definition(definition)
|
||
|
raise ArgumentError, 'definition needs to be Hash' unless definition.is_a?(Hash)
|
||
|
raise ArgumentError, 'definition needs to have exactly one Hash element' unless definition.one?
|
||
|
|
||
|
key = definition.first.first
|
||
|
options = definition.first.second
|
||
|
|
||
|
record = subject.public_send(key) # rubocop: disable GitlabSecurity/PublicSend
|
||
|
return unless record
|
||
|
|
||
|
serialized_record = serialize_record(key, record, options)
|
||
|
return unless serialized_record
|
||
|
|
||
|
# `#as_json` always returns keys as `strings`
|
||
|
[key.to_s, serialized_record]
|
||
|
end
|
||
|
|
||
|
def serialize_record(key, record, options)
|
||
|
unless record.respond_to?(:as_json)
|
||
|
raise "Invalid type of #{key} is #{record.class}"
|
||
|
end
|
||
|
|
||
|
# no has-many relation
|
||
|
unless record.is_a?(ActiveRecord::Relation)
|
||
|
return record.as_json(options)
|
||
|
end
|
||
|
|
||
|
# has-many relation
|
||
|
data = []
|
||
|
|
||
|
record.in_batches(of: @batch_size) do |batch| # rubocop:disable Cop/InBatches
|
||
|
batch = batch.preload(preloads[key]) if preloads&.key?(key)
|
||
|
data += batch.as_json(options)
|
||
|
end
|
||
|
|
||
|
data
|
||
|
end
|
||
|
|
||
|
def includes
|
||
|
tree[:include]
|
||
|
end
|
||
|
|
||
|
def preloads
|
||
|
tree[:preload]
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|