debian-mirror-gitlab/lib/banzai/reference_parser/base_parser.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

285 lines
9.1 KiB
Ruby
Raw Normal View History

2018-12-13 13:39:08 +05:30
# frozen_string_literal: true
module Banzai
module ReferenceParser
# Base class for reference parsing classes.
#
# Each parser should also specify its reference type by calling
# `self.reference_type = ...` in the body of the class. The value of this
# method should be a symbol such as `:issue` or `:merge_request`. For
# example:
#
# class IssueParser < BaseParser
# self.reference_type = :issue
# end
#
# The reference type is used to determine what nodes to pass to the
# `referenced_by` method.
#
# Parser classes should either implement the instance method
# `references_relation` or overwrite `referenced_by`. The
# `references_relation` method is supposed to return an
# ActiveRecord::Relation used as a base relation for retrieving the objects
# referenced in a set of HTML nodes.
#
# Each class can implement two additional methods:
#
# * `nodes_user_can_reference`: returns an Array of nodes the given user can
# refer to.
# * `nodes_visible_to_user`: returns an Array of nodes that are visible to
# the given user.
#
# You only need to overwrite these methods if you want to tweak who can see
# which references. For example, the IssueParser class defines its own
# `nodes_visible_to_user` method so it can ensure users can only see issues
# they have access to.
class BaseParser
class << self
2017-08-17 22:00:37 +05:30
attr_accessor :reference_type, :reference_options
end
# Returns the attribute name containing the value for every object to be
# parsed by the current parser.
#
# For example, for a parser class that returns "Animal" objects this
# attribute would be "data-animal".
def self.data_attribute
@data_attribute ||= "data-#{reference_type.to_s.dasherize}"
end
2023-03-04 22:38:38 +05:30
# Returns a model class to use as a reference.
# By default, the method does not take namespaces into account,
# thus parser classes can customize the reference class to use
# a model name with a namespace
def self.reference_class
reference_type.to_s.classify.constantize
end
2018-05-09 12:01:36 +05:30
# context - An instance of `Banzai::RenderContext`.
def initialize(context)
@context = context
end
def project_for_node(node)
context.project_for_node(node)
end
# Returns all the nodes containing references that the user can refer to.
def nodes_user_can_reference(user, nodes)
nodes
end
# Returns all the nodes that are visible to the given user.
def nodes_visible_to_user(user, nodes)
projects = lazy { projects_for_nodes(nodes) }
project_attr = 'data-project'
2023-01-13 00:05:48 +05:30
preload_associations(projects, user)
nodes.select do |node|
if node.has_attribute?(project_attr)
2017-08-17 22:00:37 +05:30
can_read_reference?(user, projects[node], node)
else
true
end
end
end
# Returns an Array of objects referenced by any of the given HTML nodes.
2021-09-30 23:02:18 +05:30
def referenced_by(nodes, options = {})
ids = unique_attribute_values(nodes, self.class.data_attribute)
2021-09-30 23:02:18 +05:30
return ids if options.fetch(:ids_only, false)
2016-11-03 12:29:30 +05:30
if ids.empty?
references_relation.none
else
references_relation.where(id: ids)
end
end
# Returns the ActiveRecord::Relation to use for querying references in the
# DB.
def references_relation
raise NotImplementedError,
"#{self.class} does not implement #{__method__}"
end
# Returns a Hash containing attribute values per project ID.
#
# The returned Hash uses the following format:
#
# { project id => [value1, value2, ...] }
#
# nodes - An Array of HTML nodes to process.
# attribute - The name of the attribute (as a String) for which to gather
# values.
#
# Returns a Hash.
def gather_attributes_per_project(nodes, attribute)
per_project = Hash.new { |hash, key| hash[key] = Set.new }
nodes.each do |node|
project_id = node.attr('data-project').to_i
id = node.attr(attribute)
per_project[project_id] << id if id
end
per_project
end
2017-08-17 22:00:37 +05:30
# Returns a Hash containing objects for an attribute grouped per the
# nodes that reference them.
#
# The returned Hash uses the following format:
#
2017-08-17 22:00:37 +05:30
# { node => row }
#
# nodes - An Array of HTML nodes to process.
#
# collection - The model or ActiveRecord relation to use for retrieving
# rows from the database.
#
# attribute - The name of the attribute containing the primary key values
# for every row.
#
# Returns a Hash.
def grouped_objects_for_nodes(nodes, collection, attribute)
return {} if nodes.empty?
ids = unique_attribute_values(nodes, attribute)
2017-08-17 22:00:37 +05:30
collection_objects = collection_objects_for_ids(collection, ids)
objects_by_id = collection_objects.index_by(&:id)
2017-08-17 22:00:37 +05:30
nodes.each_with_object({}) do |node, hash|
if node.has_attribute?(attribute)
obj = objects_by_id[node.attr(attribute).to_i]
hash[node] = obj if obj
end
end
end
# Returns an Array containing all unique values of an attribute of the
# given nodes.
def unique_attribute_values(nodes, attribute)
values = Set.new
nodes.each do |node|
if node.has_attribute?(attribute)
values << node.attr(attribute)
end
end
values.to_a
end
2016-08-24 12:49:21 +05:30
# Queries the collection for the objects with the given IDs.
#
# If the RequestStore module is enabled this method will only query any
# objects that have not yet been queried. For objects that have already
# been queried the object is returned from the cache.
def collection_objects_for_ids(collection, ids)
2018-12-05 23:21:45 +05:30
if Gitlab::SafeRequestStore.active?
2020-10-24 23:57:45 +05:30
ids = ids.map(&:to_i).uniq
2016-08-24 12:49:21 +05:30
cache = collection_cache[collection_cache_key(collection)]
2017-09-10 17:25:29 +05:30
to_query = ids - cache.keys
2016-08-24 12:49:21 +05:30
unless to_query.empty?
collection.where(id: to_query).each { |row| cache[row.id] = row }
end
2021-01-29 00:20:46 +05:30
ids.each_with_object([]) do |id, array|
row = cache[id]
array << row if row
end
2016-08-24 12:49:21 +05:30
else
collection.where(id: ids)
end
end
# Returns the cache key to use for a collection.
def collection_cache_key(collection)
collection.respond_to?(:model) ? collection.model : collection
end
# Processes the list of HTML documents and returns an Array containing all
# the references.
2021-09-30 23:02:18 +05:30
def process(documents, ids_only: false)
type = self.class.reference_type
2017-08-17 22:00:37 +05:30
reference_options = self.class.reference_options
nodes = documents.flat_map do |document|
2017-08-17 22:00:37 +05:30
Querying.css(document, "a[data-reference-type='#{type}'].gfm", reference_options).to_a
end
2021-09-30 23:02:18 +05:30
gather_references(nodes, ids_only: ids_only)
end
2020-02-01 01:16:34 +05:30
# Gathers the references for the given HTML nodes. Returns visible
# references and a list of nodes which are not visible to the user
2021-09-30 23:02:18 +05:30
def gather_references(nodes, ids_only: false)
nodes = nodes_user_can_reference(current_user, nodes)
2020-02-01 01:16:34 +05:30
visible = nodes_visible_to_user(current_user, nodes)
2021-11-11 11:23:49 +05:30
{ visible: referenced_by(visible, ids_only: ids_only), nodes: nodes, visible_nodes: visible }
end
# Returns a Hash containing the projects for a given list of HTML nodes.
#
# The returned Hash uses the following format:
#
2017-08-17 22:00:37 +05:30
# { node => project }
#
def projects_for_nodes(nodes)
@projects_for_nodes ||=
2018-12-05 23:21:45 +05:30
grouped_objects_for_nodes(nodes, Project.includes(:project_feature), 'data-project')
end
2017-08-17 22:00:37 +05:30
def can?(user, permission, subject = :global)
2016-09-29 09:46:39 +05:30
Ability.allowed?(user, permission, subject)
end
def find_projects_for_hash_keys(hash)
2016-08-24 12:49:21 +05:30
collection_objects_for_ids(Project, hash.keys)
end
private
2018-05-09 12:01:36 +05:30
attr_reader :context
def current_user
context.current_user
end
2016-11-24 13:41:30 +05:30
# When a feature is disabled or visible only for
# team members we should not allow team members
# see reference comments.
# Override this method on subclasses
# to check if user can read resource
2017-08-17 22:00:37 +05:30
def can_read_reference?(user, ref_project, node)
2016-11-24 13:41:30 +05:30
raise NotImplementedError
end
def lazy(&block)
Gitlab::Lazy.new(&block)
end
2016-08-24 12:49:21 +05:30
def collection_cache
2018-12-05 23:21:45 +05:30
Gitlab::SafeRequestStore[:banzai_collection_cache] ||= Hash.new do |hash, key|
2016-08-24 12:49:21 +05:30
hash[key] = {}
end
end
2023-01-13 00:05:48 +05:30
# For any preloading of project associations
# needed to avoid N+1s.
# Note: `projects` param is a hash of { node => project }.
# See #projects_for_nodes for more information.
def preload_associations(projects, user)
::Preloaders::ProjectPolicyPreloader.new(projects.values, user).execute
end
end
end
end