# frozen_string_literal: true module Gitlab module BackgroundMigration # EncryptColumn migrates data from an unencrypted column - `foo`, say - to # an encrypted column - `encrypted_foo`, say. # # For this background migration to work, the table that is migrated _has_ to # have an `id` column as the primary key. Additionally, the encrypted column # should be managed by attr_encrypted, and map to an attribute with the same # name as the unencrypted column (i.e., the unencrypted column should be # shadowed). # # To avoid depending on a particular version of the model in app/, add a # model to `lib/gitlab/background_migration/models/encrypt_columns` and use # it in the migration that enqueues the jobs, so code can be shared. class EncryptColumns def perform(model, attributes, from, to) model = model.constantize if model.is_a?(String) # If sidekiq hasn't undergone a restart, its idea of what columns are # present may be inaccurate, so ensure this is as fresh as possible model.reset_column_information model.define_attribute_methods attributes = expand_attributes(model, Array(attributes).map(&:to_sym)) model.transaction do # Use SELECT ... FOR UPDATE to prevent the value being changed while # we are encrypting it relation = model.where(id: from..to).lock relation.each do |instance| encrypt!(instance, attributes) end end end private # Build a hash of { attribute => encrypted column name } def expand_attributes(klass, attributes) expanded = attributes.flat_map do |attribute| attr_config = klass.encrypted_attributes[attribute] crypt_column_name = attr_config&.fetch(:attribute) raise "Couldn't determine encrypted column for #{klass}##{attribute}" if crypt_column_name.nil? raise "#{klass} source column: #{attribute} is missing" unless klass.column_names.include?(attribute.to_s) # Running the migration without the destination column being present # leads to data loss raise "#{klass} destination column: #{crypt_column_name} is missing" unless klass.column_names.include?(crypt_column_name.to_s) [attribute, crypt_column_name] end Hash[*expanded] end # Generate ciphertext for each column and update the database def encrypt!(instance, attributes) to_clear = attributes .map { |plain, crypt| apply_attribute!(instance, plain, crypt) } .compact .flat_map { |plain| [plain, nil] } to_clear = Hash[*to_clear] if instance.changed? instance.save! instance.update_columns(to_clear) end end def apply_attribute!(instance, plain_column, crypt_column) plaintext = instance[plain_column] ciphertext = instance[crypt_column] # No need to do anything if the plaintext is nil, or an encrypted # value already exists return nil unless plaintext.present? && !ciphertext.present? # attr_encrypted will calculate and set the expected value for us instance.public_send("#{plain_column}=", plaintext) # rubocop:disable GitlabSecurity/PublicSend plain_column end end end end