debian-mirror-gitlab/lib/banzai/querying.rb

69 lines
2 KiB
Ruby
Raw Normal View History

2018-12-13 13:39:08 +05:30
# frozen_string_literal: true
module Banzai
module Querying
2017-08-17 22:00:37 +05:30
module_function
# Searches a Nokogiri document using a CSS query, optionally optimizing it
# whenever possible.
#
2017-08-17 22:00:37 +05:30
# document - A document/element to search.
# query - The CSS query to use.
# reference_options - A hash with nodes filter options
#
2017-08-17 22:00:37 +05:30
# Returns an array of Nokogiri::XML::Element objects if location is specified
# in reference_options. Otherwise it would a Nokogiri::XML::NodeSet.
def css(document, query, reference_options = {})
# When using "a.foo" Nokogiri compiles this to "//a[...]" but
# "descendant::a[...]" is quite a bit faster and achieves the same result.
xpath = Nokogiri::CSS.xpath_for(query)[0].gsub(%r{^//}, 'descendant::')
2017-08-17 22:00:37 +05:30
xpath = restrict_to_p_nodes_at_root(xpath) if filter_nodes_at_beginning?(reference_options)
nodes = document.xpath(xpath)
filter_nodes(nodes, reference_options)
end
def restrict_to_p_nodes_at_root(xpath)
xpath.gsub('descendant::', './p/')
end
def filter_nodes(nodes, reference_options)
if filter_nodes_at_beginning?(reference_options)
filter_nodes_at_beginning(nodes)
else
nodes
end
end
def filter_nodes_at_beginning?(reference_options)
reference_options && reference_options[:location] == :beginning
end
# Selects child nodes if they are present in the beginning among other siblings.
#
# nodes - A Nokogiri::XML::NodeSet.
#
# Returns an array of Nokogiri::XML::Element objects.
def filter_nodes_at_beginning(nodes)
parents_and_nodes = nodes.group_by(&:parent)
filtered_nodes = []
parents_and_nodes.each do |parent, nodes|
children = parent.children
nodes = nodes.to_a
children.each do |child|
next if child.text.blank?
2018-03-17 18:26:18 +05:30
2017-08-17 22:00:37 +05:30
node = nodes.shift
break unless node == child
2018-03-17 18:26:18 +05:30
2017-08-17 22:00:37 +05:30
filtered_nodes << node
end
end
2017-08-17 22:00:37 +05:30
filtered_nodes
end
end
end