debian-mirror-gitlab/lib/gitlab/sql/pattern.rb

82 lines
2.7 KiB
Ruby
Raw Normal View History

2019-02-15 15:39:39 +05:30
# frozen_string_literal: true
2018-03-17 18:26:18 +05:30
module Gitlab
module SQL
module Pattern
extend ActiveSupport::Concern
MIN_CHARS_FOR_PARTIAL_MATCHING = 3
2019-07-31 22:56:46 +05:30
REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/.freeze
2018-03-17 18:26:18 +05:30
class_methods do
2019-09-30 21:07:59 +05:30
def fuzzy_search(query, columns, use_minimum_char_limit: true)
matches = columns.map do |col|
fuzzy_arel_match(col, query, use_minimum_char_limit: use_minimum_char_limit)
end.compact.reduce(:or)
2018-03-17 18:26:18 +05:30
where(matches)
end
2019-09-30 21:07:59 +05:30
def to_pattern(query, use_minimum_char_limit: true)
if partial_matching?(query, use_minimum_char_limit: use_minimum_char_limit)
2018-03-17 18:26:18 +05:30
"%#{sanitize_sql_like(query)}%"
else
sanitize_sql_like(query)
end
end
2019-07-07 11:18:12 +05:30
def min_chars_for_partial_matching
MIN_CHARS_FOR_PARTIAL_MATCHING
end
2019-09-30 21:07:59 +05:30
def partial_matching?(query, use_minimum_char_limit: true)
return true unless use_minimum_char_limit
2019-07-07 11:18:12 +05:30
query.length >= min_chars_for_partial_matching
2018-03-17 18:26:18 +05:30
end
2020-01-01 13:55:28 +05:30
# column - The column name / Arel column to search in.
2018-03-17 18:26:18 +05:30
# query - The text to search for.
# lower_exact_match - When set to `true` we'll fall back to using
# `LOWER(column) = query` instead of using `ILIKE`.
2019-09-30 21:07:59 +05:30
def fuzzy_arel_match(column, query, lower_exact_match: false, use_minimum_char_limit: true)
2018-03-17 18:26:18 +05:30
query = query.squish
2019-07-07 11:18:12 +05:30
return unless query.present?
2018-03-17 18:26:18 +05:30
2020-01-01 13:55:28 +05:30
arel_column = column.is_a?(Arel::Attributes::Attribute) ? column : arel_table[column]
2019-09-30 21:07:59 +05:30
words = select_fuzzy_words(query, use_minimum_char_limit: use_minimum_char_limit)
2018-03-17 18:26:18 +05:30
if words.any?
2020-01-01 13:55:28 +05:30
words.map { |word| arel_column.matches(to_pattern(word, use_minimum_char_limit: use_minimum_char_limit)) }.reduce(:and)
2018-03-17 18:26:18 +05:30
else
# No words of at least 3 chars, but we can search for an exact
# case insensitive match with the query as a whole
if lower_exact_match
Arel::Nodes::NamedFunction
2020-01-01 13:55:28 +05:30
.new('LOWER', [arel_column])
2018-03-17 18:26:18 +05:30
.eq(query)
else
2020-01-01 13:55:28 +05:30
arel_column.matches(sanitize_sql_like(query))
2018-03-17 18:26:18 +05:30
end
end
end
2019-09-30 21:07:59 +05:30
def select_fuzzy_words(query, use_minimum_char_limit: true)
2018-03-17 18:26:18 +05:30
quoted_words = query.scan(REGEX_QUOTED_WORD)
query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') }
words = query.split
quoted_words.map! { |quoted_word| quoted_word[1..-2] }
words.concat(quoted_words)
2019-09-30 21:07:59 +05:30
words.select { |word| partial_matching?(word, use_minimum_char_limit: use_minimum_char_limit) }
2018-03-17 18:26:18 +05:30
end
end
end
end
end