debian-mirror-gitlab/elasticsearch-rails/lib/elasticsearch/rails/tasks/import.rb
2020-03-09 13:42:32 +05:30

112 lines
3.7 KiB
Ruby

# A collection of Rake tasks to facilitate importing data from your models into Elasticsearch.
#
# Add this e.g. into the `lib/tasks/elasticsearch.rake` file in your Rails application:
#
# require 'elasticsearch/rails/tasks/import'
#
# To import the records from your `Article` model, run:
#
# $ bundle exec rake environment elasticsearch:import:model CLASS='MyModel'
#
# Run this command to display usage instructions:
#
# $ bundle exec rake -D elasticsearch
#
STDOUT.sync = true
STDERR.sync = true
begin; require 'ansi/progressbar'; rescue LoadError; end
namespace :elasticsearch do
task :import => 'import:model'
namespace :import do
import_model_desc = <<-DESC.gsub(/ /, '')
Import data from your model (pass name as CLASS environment variable).
$ rake environment elasticsearch:import:model CLASS='MyModel'
Force rebuilding the index (delete and create):
$ rake environment elasticsearch:import:model CLASS='Article' FORCE=y
Customize the batch size:
$ rake environment elasticsearch:import:model CLASS='Article' BATCH=100
Set target index name:
$ rake environment elasticsearch:import:model CLASS='Article' INDEX='articles-new'
Pass an ActiveRecord scope to limit the imported records:
$ rake environment elasticsearch:import:model CLASS='Article' SCOPE='published'
DESC
desc import_model_desc
task :model do
if ENV['CLASS'].to_s == ''
puts '='*90, 'USAGE', '='*90, import_model_desc, ""
exit(1)
end
klass = eval(ENV['CLASS'].to_s)
total = klass.count rescue nil
pbar = ANSI::Progressbar.new(klass.to_s, total) rescue nil
pbar.__send__ :show if pbar
unless ENV['DEBUG']
begin
klass.__elasticsearch__.client.transport.logger.level = Logger::WARN
rescue NoMethodError; end
begin
klass.__elasticsearch__.client.transport.tracer.level = Logger::WARN
rescue NoMethodError; end
end
total_errors = klass.__elasticsearch__.import force: ENV.fetch('FORCE', false),
batch_size: ENV.fetch('BATCH', 1000).to_i,
index: ENV.fetch('INDEX', nil),
type: ENV.fetch('TYPE', nil),
scope: ENV.fetch('SCOPE', nil) do |response|
pbar.inc response['items'].size if pbar
STDERR.flush
STDOUT.flush
end
pbar.finish if pbar
puts "[IMPORT] #{total_errors} errors occurred" unless total_errors.zero?
puts '[IMPORT] Done'
end
desc <<-DESC.gsub(/ /, '')
Import all indices from `app/models` (or use DIR environment variable).
$ rake environment elasticsearch:import:all DIR=app/models
DESC
task :all do
dir = ENV['DIR'].to_s != '' ? ENV['DIR'] : Rails.root.join("app/models")
puts "[IMPORT] Loading models from: #{dir}"
Dir.glob(File.join("#{dir}/**/*.rb")).each do |path|
model_filename = path[/#{Regexp.escape(dir.to_s)}\/([^\.]+).rb/, 1]
next if model_filename.match(/^concerns\//i) # Skip concerns/ folder
begin
klass = model_filename.camelize.constantize
rescue NameError
require(path) ? retry : raise(RuntimeError, "Cannot load class '#{klass}'")
end
# Skip if the class doesn't have Elasticsearch integration
next unless klass.respond_to?(:__elasticsearch__)
puts "[IMPORT] Processing model: #{klass}..."
ENV['CLASS'] = klass.to_s
Rake::Task["elasticsearch:import:model"].invoke
Rake::Task["elasticsearch:import:model"].reenable
puts
end
end
end
end