debian-mirror-gitlab/lib/gitlab/sql/pattern.rb
2023-03-05 14:24:40 +05:30

83 lines
2.8 KiB
Ruby

# frozen_string_literal: true
module Gitlab
module SQL
module Pattern
extend ActiveSupport::Concern
MIN_CHARS_FOR_PARTIAL_MATCHING = 3
REGEX_QUOTED_TERM = /(?<=\A| )"[^"]+"(?= |\z)/.freeze
class_methods do
def fuzzy_search(query, columns, use_minimum_char_limit: true)
matches = columns.map do |col|
fuzzy_arel_match(col, query, use_minimum_char_limit: use_minimum_char_limit)
end.compact.reduce(:or)
where(matches)
end
def to_pattern(query, use_minimum_char_limit: true)
if partial_matching?(query, use_minimum_char_limit: use_minimum_char_limit)
"%#{sanitize_sql_like(query)}%"
else
sanitize_sql_like(query)
end
end
def min_chars_for_partial_matching
MIN_CHARS_FOR_PARTIAL_MATCHING
end
def partial_matching?(query, use_minimum_char_limit: true)
return true unless use_minimum_char_limit
query.length >= min_chars_for_partial_matching
end
# column - The column name / Arel column to search in.
# query - The text to search for.
# lower_exact_match - When set to `true` we'll fall back to using
# `LOWER(column) = query` instead of using `ILIKE`.
def fuzzy_arel_match(column, query, lower_exact_match: false, use_minimum_char_limit: true)
return unless query.is_a?(String)
query = query.squish
return unless query.present?
arel_column = column.is_a?(Arel::Attributes::Attribute) ? column : arel_table[column]
words = select_fuzzy_terms(query, use_minimum_char_limit: use_minimum_char_limit)
if words.any?
words.map { |word| arel_column.matches(to_pattern(word, use_minimum_char_limit: use_minimum_char_limit)) }.reduce(:and)
elsif lower_exact_match
# No words of at least 3 chars, but we can search for an exact
# case insensitive match with the query as a whole
Arel::Nodes::NamedFunction
.new('LOWER', [arel_column])
.eq(query)
else
arel_column.matches(sanitize_sql_like(query))
end
end
def select_fuzzy_terms(query, use_minimum_char_limit: true)
terms = Gitlab::SQL::Pattern.split_query_to_search_terms(query)
terms.select { |term| partial_matching?(term, use_minimum_char_limit: use_minimum_char_limit) }
end
end
def self.split_query_to_search_terms(query)
quoted_terms = []
query = query.gsub(REGEX_QUOTED_TERM) do |quoted_term|
quoted_terms << quoted_term
""
end
query.split + quoted_terms.map { |quoted_term| quoted_term[1..-2] }
end
end
end
end