2021-04-29 21:17:54 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
#
|
|
|
|
# Query a recursively defined namespace hierarchy using linear methods through
|
|
|
|
# the traversal_ids attribute.
|
|
|
|
#
|
|
|
|
# Namespace is a nested hierarchy of one parent to many children. A search
|
|
|
|
# using only the parent-child relationships is a slow operation. This process
|
|
|
|
# was previously optimized using Postgresql recursive common table expressions
|
|
|
|
# (CTE) with acceptable performance. However, it lead to slower than possible
|
|
|
|
# performance, and resulted in complicated queries that were difficult to make
|
|
|
|
# performant.
|
|
|
|
#
|
|
|
|
# Instead of searching the hierarchy recursively, we store a `traversal_ids`
|
|
|
|
# attribute on each node. The `traversal_ids` is an ordered array of Namespace
|
|
|
|
# IDs that define the traversal path from the root Namespace to the current
|
|
|
|
# Namespace.
|
|
|
|
#
|
|
|
|
# For example, suppose we have the following Namespaces:
|
|
|
|
#
|
|
|
|
# GitLab (id: 1) > Engineering (id: 2) > Manage (id: 3) > Access (id: 4)
|
|
|
|
#
|
|
|
|
# Then `traversal_ids` for group "Access" is [1, 2, 3, 4]
|
|
|
|
#
|
|
|
|
# And we can match against other Namespace `traversal_ids` such that:
|
|
|
|
#
|
|
|
|
# - Ancestors are [1], [1, 2], [1, 2, 3]
|
|
|
|
# - Descendants are [1, 2, 3, 4, *]
|
|
|
|
# - Root is [1]
|
|
|
|
# - Hierarchy is [1, *]
|
|
|
|
#
|
|
|
|
# Note that this search method works so long as the IDs are unique and the
|
|
|
|
# traversal path is ordered from root to leaf nodes.
|
|
|
|
#
|
|
|
|
# We implement this in the database using Postgresql arrays, indexed by a
|
|
|
|
# generalized inverted index (gin).
|
|
|
|
module Namespaces
|
|
|
|
module Traversal
|
|
|
|
module Linear
|
|
|
|
extend ActiveSupport::Concern
|
2021-10-27 15:23:28 +05:30
|
|
|
include LinearScopes
|
2021-04-29 21:17:54 +05:30
|
|
|
|
|
|
|
UnboundedSearch = Class.new(StandardError)
|
|
|
|
|
|
|
|
included do
|
2022-07-23 23:45:48 +05:30
|
|
|
before_update :lock_both_roots, if: -> { parent_id_changed? }
|
|
|
|
after_update :sync_traversal_ids, if: -> { saved_change_to_parent_id? }
|
2022-04-04 11:22:00 +05:30
|
|
|
# This uses rails internal before_commit API to sync traversal_ids on namespace create, right before transaction is committed.
|
|
|
|
# This helps reduce the time during which the root namespace record is locked to ensure updated traversal_ids are valid
|
2022-07-23 23:45:48 +05:30
|
|
|
before_commit :sync_traversal_ids, on: [:create]
|
2023-04-23 21:23:45 +05:30
|
|
|
after_commit :set_traversal_ids,
|
|
|
|
if: -> { traversal_ids.empty? || saved_change_to_parent_id? },
|
|
|
|
on: [:create, :update]
|
2022-10-11 01:57:18 +05:30
|
|
|
|
|
|
|
define_model_callbacks :sync_traversal_ids
|
2021-04-29 21:17:54 +05:30
|
|
|
end
|
|
|
|
|
2022-06-21 17:19:12 +05:30
|
|
|
class_methods do
|
|
|
|
# This method looks into a list of namespaces trying to optimise a returned traversal_ids
|
|
|
|
# into a list of shortest prefixes, due to fact that the shortest prefixes include all childrens.
|
|
|
|
# Example:
|
|
|
|
# INPUT: [[4909902], [4909902,51065789], [4909902,51065793], [7135830], [15599674, 1], [15599674, 1, 3], [15599674, 2]]
|
|
|
|
# RESULT: [[4909902], [7135830], [15599674, 1], [15599674, 2]]
|
|
|
|
def shortest_traversal_ids_prefixes
|
|
|
|
raise ArgumentError, 'Feature not supported since the `:use_traversal_ids` is disabled' unless use_traversal_ids?
|
|
|
|
|
|
|
|
prefixes = []
|
|
|
|
|
|
|
|
# The array needs to be sorted (O(nlogn)) to ensure shortest elements are always first
|
|
|
|
# This allows to do O(n) search of shortest prefixes
|
|
|
|
all_traversal_ids = all.order('namespaces.traversal_ids').pluck('namespaces.traversal_ids')
|
|
|
|
last_prefix = [nil]
|
|
|
|
|
|
|
|
all_traversal_ids.each do |traversal_ids|
|
|
|
|
next if last_prefix == traversal_ids[0..(last_prefix.count - 1)]
|
|
|
|
|
|
|
|
last_prefix = traversal_ids
|
|
|
|
prefixes << traversal_ids
|
|
|
|
end
|
|
|
|
|
|
|
|
prefixes
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2023-04-23 21:23:45 +05:30
|
|
|
def traversal_ids=(ids)
|
|
|
|
super(ids)
|
|
|
|
self.transient_traversal_ids = nil
|
|
|
|
end
|
|
|
|
|
|
|
|
def traversal_ids
|
|
|
|
read_attribute(:traversal_ids).presence || transient_traversal_ids || []
|
|
|
|
end
|
|
|
|
|
2021-04-29 21:17:54 +05:30
|
|
|
def use_traversal_ids?
|
2022-07-16 23:28:13 +05:30
|
|
|
return false unless Feature.enabled?(:use_traversal_ids)
|
2021-06-08 01:23:25 +05:30
|
|
|
|
|
|
|
traversal_ids.present?
|
2021-04-29 21:17:54 +05:30
|
|
|
end
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
def use_traversal_ids_for_self_and_hierarchy?
|
|
|
|
return false unless use_traversal_ids?
|
2022-07-16 23:28:13 +05:30
|
|
|
return false unless Feature.enabled?(:use_traversal_ids_for_self_and_hierarchy, root_ancestor)
|
2022-03-02 08:16:31 +05:30
|
|
|
|
|
|
|
traversal_ids.present?
|
|
|
|
end
|
|
|
|
|
2021-09-30 23:02:18 +05:30
|
|
|
def use_traversal_ids_for_ancestors?
|
|
|
|
return false unless use_traversal_ids?
|
2022-07-16 23:28:13 +05:30
|
|
|
return false unless Feature.enabled?(:use_traversal_ids_for_ancestors, root_ancestor)
|
2021-09-30 23:02:18 +05:30
|
|
|
|
|
|
|
traversal_ids.present?
|
|
|
|
end
|
|
|
|
|
2022-01-26 12:08:38 +05:30
|
|
|
def use_traversal_ids_for_ancestors_upto?
|
|
|
|
return false unless use_traversal_ids?
|
2022-07-16 23:28:13 +05:30
|
|
|
return false unless Feature.enabled?(:use_traversal_ids_for_ancestors_upto, root_ancestor)
|
2022-01-26 12:08:38 +05:30
|
|
|
|
|
|
|
traversal_ids.present?
|
|
|
|
end
|
|
|
|
|
2021-09-30 23:02:18 +05:30
|
|
|
def use_traversal_ids_for_root_ancestor?
|
2022-07-16 23:28:13 +05:30
|
|
|
return false unless Feature.enabled?(:use_traversal_ids_for_root_ancestor)
|
2021-09-04 01:27:46 +05:30
|
|
|
|
2021-09-30 23:02:18 +05:30
|
|
|
traversal_ids.present?
|
|
|
|
end
|
|
|
|
|
|
|
|
def root_ancestor
|
|
|
|
return super unless use_traversal_ids_for_root_ancestor?
|
2021-09-04 01:27:46 +05:30
|
|
|
|
|
|
|
strong_memoize(:root_ancestor) do
|
2021-11-11 11:23:49 +05:30
|
|
|
if parent_id.nil?
|
2021-09-30 23:02:18 +05:30
|
|
|
self
|
|
|
|
else
|
|
|
|
Namespace.find_by(id: traversal_ids.first)
|
|
|
|
end
|
2021-09-04 01:27:46 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-04-29 21:17:54 +05:30
|
|
|
def self_and_descendants
|
2021-06-08 01:23:25 +05:30
|
|
|
return super unless use_traversal_ids?
|
|
|
|
|
|
|
|
lineage(top: self)
|
|
|
|
end
|
|
|
|
|
2021-09-04 01:27:46 +05:30
|
|
|
def self_and_descendant_ids
|
|
|
|
return super unless use_traversal_ids?
|
|
|
|
|
|
|
|
self_and_descendants.as_ids
|
|
|
|
end
|
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
def descendants
|
|
|
|
return super unless use_traversal_ids?
|
|
|
|
|
|
|
|
self_and_descendants.where.not(id: id)
|
|
|
|
end
|
|
|
|
|
2022-03-02 08:16:31 +05:30
|
|
|
def self_and_hierarchy
|
|
|
|
return super unless use_traversal_ids_for_self_and_hierarchy?
|
|
|
|
|
|
|
|
self_and_descendants.or(ancestors)
|
|
|
|
end
|
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
def ancestors(hierarchy_order: nil)
|
2021-09-30 23:02:18 +05:30
|
|
|
return super unless use_traversal_ids_for_ancestors?
|
2021-06-08 01:23:25 +05:30
|
|
|
|
|
|
|
return self.class.none if parent_id.blank?
|
|
|
|
|
|
|
|
lineage(bottom: parent, hierarchy_order: hierarchy_order)
|
2021-04-29 21:17:54 +05:30
|
|
|
end
|
|
|
|
|
2021-09-30 23:02:18 +05:30
|
|
|
def ancestor_ids(hierarchy_order: nil)
|
|
|
|
return super unless use_traversal_ids_for_ancestors?
|
|
|
|
|
|
|
|
hierarchy_order == :desc ? traversal_ids[0..-2] : traversal_ids[0..-2].reverse
|
|
|
|
end
|
|
|
|
|
2022-01-26 12:08:38 +05:30
|
|
|
# Returns all ancestors upto but excluding the top.
|
|
|
|
# When no top is given, all ancestors are returned.
|
|
|
|
# When top is not found, returns all ancestors.
|
|
|
|
#
|
|
|
|
# This copies the behavior of the recursive method. We will deprecate
|
|
|
|
# this behavior soon.
|
|
|
|
def ancestors_upto(top = nil, hierarchy_order: nil)
|
|
|
|
return super unless use_traversal_ids_for_ancestors_upto?
|
|
|
|
|
|
|
|
# We can't use a default value in the method definition above because
|
|
|
|
# we need to preserve those specific parameters for super.
|
|
|
|
hierarchy_order ||= :desc
|
|
|
|
|
2023-04-23 21:23:45 +05:30
|
|
|
top_index = ancestors_upto_top_index(top)
|
|
|
|
ids = traversal_ids[top_index...-1].reverse
|
2022-01-26 12:08:38 +05:30
|
|
|
|
|
|
|
# WITH ORDINALITY lets us order the result to match traversal_ids order.
|
2023-04-23 21:23:45 +05:30
|
|
|
ids_string = ids.map { |id| Integer(id) }.join(',')
|
2022-01-26 12:08:38 +05:30
|
|
|
from_sql = <<~SQL
|
|
|
|
unnest(ARRAY[#{ids_string}]::bigint[]) WITH ORDINALITY AS ancestors(id, ord)
|
|
|
|
INNER JOIN namespaces ON namespaces.id = ancestors.id
|
|
|
|
SQL
|
|
|
|
|
|
|
|
self.class
|
|
|
|
.from(Arel.sql(from_sql))
|
|
|
|
.order('ancestors.ord': hierarchy_order)
|
|
|
|
end
|
|
|
|
|
2021-09-30 23:02:18 +05:30
|
|
|
def self_and_ancestors(hierarchy_order: nil)
|
|
|
|
return super unless use_traversal_ids_for_ancestors?
|
|
|
|
|
|
|
|
return self.class.where(id: id) if parent_id.blank?
|
|
|
|
|
|
|
|
lineage(bottom: self, hierarchy_order: hierarchy_order)
|
|
|
|
end
|
|
|
|
|
|
|
|
def self_and_ancestor_ids(hierarchy_order: nil)
|
|
|
|
return super unless use_traversal_ids_for_ancestors?
|
|
|
|
|
|
|
|
hierarchy_order == :desc ? traversal_ids : traversal_ids.reverse
|
|
|
|
end
|
|
|
|
|
2021-04-29 21:17:54 +05:30
|
|
|
private
|
|
|
|
|
2023-04-23 21:23:45 +05:30
|
|
|
attr_accessor :transient_traversal_ids
|
|
|
|
|
2021-04-29 21:17:54 +05:30
|
|
|
# Update the traversal_ids for the full hierarchy.
|
|
|
|
#
|
|
|
|
# NOTE: self.traversal_ids will be stale. Reload for a fresh record.
|
|
|
|
def sync_traversal_ids
|
2022-10-11 01:57:18 +05:30
|
|
|
run_callbacks :sync_traversal_ids do
|
|
|
|
# Clear any previously memoized root_ancestor as our ancestors have changed.
|
|
|
|
clear_memoization(:root_ancestor)
|
2021-04-29 21:17:54 +05:30
|
|
|
|
2022-10-11 01:57:18 +05:30
|
|
|
Namespace::TraversalHierarchy.for_namespace(self).sync_traversal_ids!
|
|
|
|
end
|
2021-04-29 21:17:54 +05:30
|
|
|
end
|
|
|
|
|
2023-04-23 21:23:45 +05:30
|
|
|
def set_traversal_ids
|
|
|
|
# This is a temporary guard and will be removed.
|
|
|
|
return if is_a?(Namespaces::ProjectNamespace)
|
|
|
|
|
|
|
|
return unless Feature.enabled?(:set_traversal_ids_on_save, root_ancestor)
|
|
|
|
|
|
|
|
self.transient_traversal_ids = if parent_id
|
|
|
|
parent.traversal_ids + [id]
|
|
|
|
else
|
|
|
|
[id]
|
|
|
|
end
|
|
|
|
|
|
|
|
# Clear root_ancestor memo if changed.
|
|
|
|
if read_attribute(traversal_ids)&.first != transient_traversal_ids.first
|
|
|
|
clear_memoization(:root_ancestor)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Update traversal_ids for any associated child objects.
|
|
|
|
children.each(&:reload) if children.loaded?
|
|
|
|
end
|
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
# Lock the root of the hierarchy we just left, and lock the root of the hierarchy
|
|
|
|
# we just joined. In most cases the two hierarchies will be the same.
|
|
|
|
def lock_both_roots
|
|
|
|
parent_ids = [
|
|
|
|
parent_id_was || self.id,
|
|
|
|
parent_id || self.id
|
|
|
|
].compact
|
|
|
|
|
|
|
|
roots = Gitlab::ObjectHierarchy
|
|
|
|
.new(Namespace.where(id: parent_ids))
|
|
|
|
.base_and_ancestors
|
|
|
|
.reorder(nil)
|
|
|
|
.where(parent_id: nil)
|
|
|
|
|
|
|
|
Namespace.lock.select(:id).where(id: roots).order(id: :asc).load
|
|
|
|
end
|
|
|
|
|
2021-04-29 21:17:54 +05:30
|
|
|
# Search this namespace's lineage. Bound inclusively by top node.
|
2021-06-08 01:23:25 +05:30
|
|
|
def lineage(top: nil, bottom: nil, hierarchy_order: nil)
|
|
|
|
raise UnboundedSearch, 'Must bound search by either top or bottom' unless top || bottom
|
2021-04-29 21:17:54 +05:30
|
|
|
|
2021-12-11 22:18:48 +05:30
|
|
|
skope = self.class
|
2021-04-29 21:17:54 +05:30
|
|
|
|
2021-06-08 01:23:25 +05:30
|
|
|
if top
|
2021-10-27 15:23:28 +05:30
|
|
|
skope = skope.where("traversal_ids @> ('{?}')", top.id)
|
2021-06-08 01:23:25 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
if bottom
|
2022-01-26 12:08:38 +05:30
|
|
|
skope = skope.where(id: bottom.traversal_ids)
|
2021-06-08 01:23:25 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
# The original `with_depth` attribute in ObjectHierarchy increments as you
|
|
|
|
# walk away from the "base" namespace. This direction changes depending on
|
|
|
|
# if you are walking up the ancestors or down the descendants.
|
|
|
|
if hierarchy_order
|
|
|
|
depth_sql = "ABS(#{traversal_ids.count} - array_length(traversal_ids, 1))"
|
2021-11-11 11:23:49 +05:30
|
|
|
skope = skope.select(skope.default_select_columns, "#{depth_sql} as depth")
|
2021-10-27 15:23:28 +05:30
|
|
|
# The SELECT includes an extra depth attribute. We wrap the SQL in a
|
|
|
|
# standard SELECT to avoid mismatched attribute errors when trying to
|
|
|
|
# chain future ActiveRelation commands, and retain the ordering.
|
|
|
|
skope = self.class
|
|
|
|
.from(skope, self.class.table_name)
|
2021-11-11 11:23:49 +05:30
|
|
|
.select(skope.arel_table[Arel.star])
|
2021-10-27 15:23:28 +05:30
|
|
|
.order(depth: hierarchy_order)
|
2021-06-08 01:23:25 +05:30
|
|
|
end
|
|
|
|
|
|
|
|
skope
|
2021-04-29 21:17:54 +05:30
|
|
|
end
|
2023-04-23 21:23:45 +05:30
|
|
|
|
|
|
|
def ancestors_upto_top_index(top)
|
|
|
|
return 0 if top.nil?
|
|
|
|
|
|
|
|
index = traversal_ids.find_index(top.id)
|
|
|
|
if index.nil?
|
|
|
|
0
|
|
|
|
else
|
|
|
|
index + 1
|
|
|
|
end
|
|
|
|
end
|
2021-04-29 21:17:54 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|