2021-04-29 21:17:54 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module Database
|
|
|
|
# This class builds efficient batched distinct query by using loose index scan.
|
|
|
|
# Consider the following example:
|
|
|
|
# > Issue.distinct(:project_id).where(project_id: (1...100)).count
|
|
|
|
#
|
|
|
|
# Note: there is an index on project_id
|
|
|
|
#
|
|
|
|
# This query will read each element in the index matching the project_id filter.
|
|
|
|
# If for a project_id has 100_000 issues, all 100_000 elements will be read.
|
|
|
|
#
|
2021-06-08 01:23:25 +05:30
|
|
|
# A loose index scan will only read one entry from the index for each project_id to reduce the number of disk reads.
|
2021-04-29 21:17:54 +05:30
|
|
|
#
|
|
|
|
# Usage:
|
|
|
|
#
|
|
|
|
# Gitlab::Database::LooseIndexScanDisctinctCount.new(Issue, :project_id).count(from: 1, to: 100)
|
|
|
|
#
|
|
|
|
# The query will return the number of distinct projects_ids between 1 and 100
|
|
|
|
#
|
|
|
|
# Getting the Arel query:
|
|
|
|
#
|
|
|
|
# Gitlab::Database::LooseIndexScanDisctinctCount.new(Issue, :project_id).build_query(from: 1, to: 100)
|
|
|
|
class LooseIndexScanDistinctCount
|
|
|
|
COLUMN_ALIAS = 'distinct_count_column'
|
|
|
|
|
|
|
|
ColumnConfigurationError = Class.new(StandardError)
|
|
|
|
|
|
|
|
def initialize(scope, column)
|
|
|
|
if scope.is_a?(ActiveRecord::Relation)
|
|
|
|
@scope = scope
|
|
|
|
@model = scope.model
|
|
|
|
else
|
|
|
|
@scope = scope.where({})
|
|
|
|
@model = scope
|
|
|
|
end
|
|
|
|
|
|
|
|
@column = transform_column(column)
|
|
|
|
end
|
|
|
|
|
|
|
|
def count(from:, to:)
|
|
|
|
build_query(from: from, to: to).count(COLUMN_ALIAS)
|
|
|
|
end
|
|
|
|
|
|
|
|
def build_query(from:, to:) # rubocop:disable Metrics/AbcSize
|
|
|
|
cte = Gitlab::SQL::RecursiveCTE.new(:counter_cte, union_args: { remove_order: false })
|
|
|
|
table = model.arel_table
|
|
|
|
|
|
|
|
cte << @scope
|
|
|
|
.dup
|
|
|
|
.select(column.as(COLUMN_ALIAS))
|
|
|
|
.where(column.gteq(from))
|
|
|
|
.where(column.lt(to))
|
|
|
|
.order(column)
|
|
|
|
.limit(1)
|
|
|
|
|
|
|
|
inner_query = @scope
|
|
|
|
.dup
|
|
|
|
.where(column.gt(cte.table[COLUMN_ALIAS]))
|
|
|
|
.where(column.lt(to))
|
|
|
|
.select(column.as(COLUMN_ALIAS))
|
|
|
|
.order(column)
|
|
|
|
.limit(1)
|
|
|
|
|
|
|
|
cte << cte.table
|
|
|
|
.project(Arel::Nodes::Grouping.new(Arel.sql(inner_query.to_sql)).as(COLUMN_ALIAS))
|
|
|
|
.where(cte.table[COLUMN_ALIAS].lt(to))
|
|
|
|
|
|
|
|
model
|
|
|
|
.with
|
|
|
|
.recursive(cte.to_arel)
|
|
|
|
.from(cte.alias_to(table))
|
|
|
|
.unscope(where: :source_type)
|
|
|
|
.unscope(where: model.inheritance_column) # Remove STI query, not needed here
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
attr_reader :column, :model
|
|
|
|
|
|
|
|
# Transforms the column so it can be used in Arel expressions
|
|
|
|
#
|
|
|
|
# 'table.column' => 'table.column'
|
|
|
|
# 'column' => 'table_name.column'
|
|
|
|
# :column => 'table_name.column'
|
|
|
|
# Arel::Attributes::Attribute => name of the column
|
|
|
|
def transform_column(column)
|
|
|
|
if column.is_a?(String) || column.is_a?(Symbol)
|
|
|
|
column_as_string = column.to_s
|
|
|
|
column_as_string = "#{model.table_name}.#{column_as_string}" unless column_as_string.include?('.')
|
|
|
|
|
|
|
|
Arel.sql(column_as_string)
|
|
|
|
elsif column.is_a?(Arel::Attributes::Attribute)
|
|
|
|
column
|
|
|
|
else
|
2021-06-08 01:23:25 +05:30
|
|
|
raise ColumnConfigurationError, "Cannot transform the column: #{column.inspect}, please provide the column name as string"
|
2021-04-29 21:17:54 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|