160 lines
5 KiB
Ruby
160 lines
5 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# ActiveModel::Serialization (https://github.com/rails/rails/blob/v5.0.7/activemodel/lib/active_model/serialization.rb#L184)
|
|
# is simple in that it recursively calls `as_json` on each object to
|
|
# serialize everything. However, for a model like a Project, this can
|
|
# generate a query for every single association, which can add up to tens
|
|
# of thousands of queries and lead to memory bloat.
|
|
#
|
|
# To improve this, we can do several things:
|
|
|
|
# 1. Use the option tree in http://api.rubyonrails.org/classes/ActiveModel/Serializers/JSON.html
|
|
# to generate the necessary preload clauses.
|
|
#
|
|
# 2. We observe that a single project has many issues, merge requests,
|
|
# etc. Instead of serializing everything at once, which could lead to
|
|
# database timeouts and high memory usage, we take each top-level
|
|
# association and serialize the data in batches.
|
|
#
|
|
# For example, we serialize the first 100 issues and preload all of
|
|
# their associated events, notes, etc. before moving onto the next
|
|
# batch. When we're done, we serialize merge requests in the same way.
|
|
# We repeat this pattern for the remaining associations specified in
|
|
# import_export.yml.
|
|
module Gitlab
|
|
module ImportExport
|
|
class FastHashSerializer
|
|
attr_reader :subject, :tree
|
|
|
|
# Usage of this class results in delayed
|
|
# serialization of relation. The serialization
|
|
# will be triggered when the `JSON.generate`
|
|
# is exected.
|
|
#
|
|
# This class uses memory-optimised, lazily
|
|
# initialised, fast to recycle relation
|
|
# serialization.
|
|
#
|
|
# The `JSON.generate` does use `#to_json`,
|
|
# that returns raw JSON content that is written
|
|
# directly to file.
|
|
class JSONBatchRelation
|
|
include Gitlab::Utils::StrongMemoize
|
|
|
|
def initialize(relation, options, preloads)
|
|
@relation = relation
|
|
@options = options
|
|
@preloads = preloads
|
|
end
|
|
|
|
def raw_json
|
|
strong_memoize(:raw_json) do
|
|
result = +''
|
|
|
|
batch = @relation
|
|
batch = batch.preload(@preloads) if @preloads
|
|
batch.each do |item|
|
|
result.concat(",") unless result.empty?
|
|
result.concat(item.to_json(@options))
|
|
end
|
|
|
|
result
|
|
end
|
|
end
|
|
|
|
def to_json(options = {})
|
|
raw_json
|
|
end
|
|
|
|
def as_json(*)
|
|
raise NotImplementedError
|
|
end
|
|
end
|
|
|
|
BATCH_SIZE = 100
|
|
|
|
def initialize(subject, tree, batch_size: BATCH_SIZE)
|
|
@subject = subject
|
|
@batch_size = batch_size
|
|
@tree = tree
|
|
end
|
|
|
|
# With the usage of `JSONBatchRelation`, it returns partially
|
|
# serialized hash which is not easily accessible.
|
|
# It means you can only manipulate and replace top-level objects.
|
|
# All future mutations of the hash (such as `fix_project_tree`)
|
|
# should be aware of that.
|
|
def execute
|
|
simple_serialize.merge(serialize_includes)
|
|
end
|
|
|
|
private
|
|
|
|
def simple_serialize
|
|
subject.as_json(
|
|
tree.merge(include: nil, preloads: nil))
|
|
end
|
|
|
|
def serialize_includes
|
|
return {} unless includes
|
|
|
|
includes
|
|
.map(&method(:serialize_include_definition))
|
|
.tap { |entries| entries.compact! }
|
|
.to_h
|
|
end
|
|
|
|
# definition:
|
|
# { labels: { includes: ... } }
|
|
def serialize_include_definition(definition)
|
|
raise ArgumentError, 'definition needs to be Hash' unless definition.is_a?(Hash)
|
|
raise ArgumentError, 'definition needs to have exactly one Hash element' unless definition.one?
|
|
|
|
key = definition.first.first
|
|
options = definition.first.second
|
|
|
|
record = subject.public_send(key) # rubocop: disable GitlabSecurity/PublicSend
|
|
return unless record
|
|
|
|
serialized_record = serialize_record(key, record, options)
|
|
return unless serialized_record
|
|
|
|
# `#as_json` always returns keys as `strings`
|
|
[key.to_s, serialized_record]
|
|
end
|
|
|
|
def serialize_record(key, record, options)
|
|
unless record.respond_to?(:as_json)
|
|
raise "Invalid type of #{key} is #{record.class}"
|
|
end
|
|
|
|
# no has-many relation
|
|
unless record.is_a?(ActiveRecord::Relation)
|
|
return record.as_json(options)
|
|
end
|
|
|
|
data = []
|
|
|
|
record.in_batches(of: @batch_size) do |batch| # rubocop:disable Cop/InBatches
|
|
# order each batch by it's primary key to ensure
|
|
# consistent and predictable ordering of each exported relation
|
|
# as additional `WHERE` clauses can impact the order in which data is being
|
|
# returned by database when no `ORDER` is specified
|
|
batch = batch.reorder(batch.klass.primary_key)
|
|
|
|
data.append(JSONBatchRelation.new(batch, options, preloads[key]).tap(&:raw_json))
|
|
end
|
|
|
|
data
|
|
end
|
|
|
|
def includes
|
|
tree[:include]
|
|
end
|
|
|
|
def preloads
|
|
tree[:preload]
|
|
end
|
|
end
|
|
end
|
|
end
|