debian-mirror-gitlab/elasticsearch-model/examples/activerecord_custom_analyzer.rb

# Custom Analyzer for ActiveRecord integration with Elasticsearch
# ===============================================================

$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)

require 'ansi'
require 'logger'

require 'active_record'
require 'elasticsearch/model'

ActiveRecord::Base.logger = ActiveSupport::Logger.new(STDOUT)
ActiveRecord::Base.establish_connection( adapter: 'sqlite3', database: ":memory:" )

ActiveRecord::Schema.define(version: 1) do
  create_table :articles do |t|
    t.string :title
    t.date    :published_at
    t.timestamps
  end
end

Elasticsearch::Model.client.transport.logger = ActiveSupport::Logger.new(STDOUT)
Elasticsearch::Model.client.transport.logger.formatter = lambda { |s, d, p, m| "#{m.ansi(:faint)}\n" }

class Article < ActiveRecord::Base
  include Elasticsearch::Model

  settings index: {
    number_of_shards: 1,
    number_of_replicas: 0,
    analysis: {
      analyzer: {
        pattern: {
          type: 'pattern',
          pattern: "\\s|_|-|\\.",
          lowercase: true
        },
        trigram: {
          tokenizer: 'trigram'
        }
      },
      tokenizer: {
        trigram: {
          type: 'ngram',
          min_gram: 3,
          max_gram: 3,
          token_chars: ['letter', 'digit']
        }
      }
    } } do
    mapping do
      indexes :title, type: 'text', analyzer: 'english' do
        indexes :keyword, analyzer: 'keyword'
        indexes :pattern, analyzer: 'pattern'
        indexes :trigram, analyzer: 'trigram'
      end
    end
  end
end

# Create example records
#
Article.delete_all
Article.create title: 'Foo'
Article.create title: 'Foo-Bar'
Article.create title: 'Foo_Bar_Bazooka'
Article.create title: 'Foo.Bar'

# Index records
#
errors = Article.import force: true, refresh: true, return: 'errors'
puts "[!] Errors importing records: #{errors.map { |d| d['index']['error'] }.join(', ')}".ansi(:red) && exit(1) unless errors.empty?

puts '', '-'*80

puts "English analyzer [Foo_Bar_1_Bazooka]".ansi(:bold),
     "Tokens: " +
     Article.__elasticsearch__.client.indices
      .analyze(index: Article.index_name, body: { field: 'title', text: 'Foo_Bar_1_Bazooka' })['tokens']
      .map { |d| "[#{d['token']}]" }.join(' '),
    "\n"

puts "Keyword analyzer [Foo_Bar_1_Bazooka]".ansi(:bold),
     "Tokens: " +
     Article.__elasticsearch__.client.indices
      .analyze(index: Article.index_name, body: { field: 'title.keyword', text: 'Foo_Bar_1_Bazooka' })['tokens']
      .map { |d| "[#{d['token']}]" }.join(' '),
     "\n"

puts "Pattern analyzer [Foo_Bar_1_Bazooka]".ansi(:bold),
     "Tokens: " +
     Article.__elasticsearch__.client.indices
      .analyze(index: Article.index_name, body: { field: 'title.pattern', text: 'Foo_Bar_1_Bazooka' })['tokens']
      .map { |d| "[#{d['token']}]" }.join(' '),
     "\n"

puts "Trigram analyzer [Foo_Bar_1_Bazooka]".ansi(:bold),
     "Tokens: " +
     Article.__elasticsearch__.client.indices
      .analyze(index: Article.index_name, body: { field: 'title.trigram', text: 'Foo_Bar_1_Bazooka' })['tokens']
      .map { |d| "[#{d['token']}]" }.join(' '),
     "\n"

puts '', '-'*80

response = Article.search query: { match: { 'title' => 'foo' } } ;

puts "English search for 'foo'".ansi(:bold),
     "#{response.response.hits.total} matches: " +
     response.records.map { |d| d.title }.join(', '),
     "\n"

puts '', '-'*80

response = Article.search query: { match: { 'title.pattern' => 'foo' } } ;

puts "Pattern search for 'foo'".ansi(:bold),
     "#{response.response.hits.total} matches: " +
     response.records.map { |d| d.title }.join(', '),
     "\n"

puts '', '-'*80

response = Article.search query: { match: { 'title.trigram' => 'zoo' } } ;

puts "Trigram search for 'zoo'".ansi(:bold),
     "#{response.response.hits.total} matches: " +
     response.records.map { |d| d.title }.join(', '),
     "\n"

puts '', '-'*80


require 'pry'; binding.pry;