2016-01-14 18:37:52 +05:30
|
|
|
module Banzai
|
|
|
|
module Querying
|
2017-08-17 22:00:37 +05:30
|
|
|
module_function
|
|
|
|
|
2016-01-14 18:37:52 +05:30
|
|
|
# Searches a Nokogiri document using a CSS query, optionally optimizing it
|
|
|
|
# whenever possible.
|
|
|
|
#
|
2017-08-17 22:00:37 +05:30
|
|
|
# document - A document/element to search.
|
|
|
|
# query - The CSS query to use.
|
|
|
|
# reference_options - A hash with nodes filter options
|
2016-01-14 18:37:52 +05:30
|
|
|
#
|
2017-08-17 22:00:37 +05:30
|
|
|
# Returns an array of Nokogiri::XML::Element objects if location is specified
|
|
|
|
# in reference_options. Otherwise it would a Nokogiri::XML::NodeSet.
|
|
|
|
def css(document, query, reference_options = {})
|
2016-01-14 18:37:52 +05:30
|
|
|
# When using "a.foo" Nokogiri compiles this to "//a[...]" but
|
|
|
|
# "descendant::a[...]" is quite a bit faster and achieves the same result.
|
|
|
|
xpath = Nokogiri::CSS.xpath_for(query)[0].gsub(%r{^//}, 'descendant::')
|
2017-08-17 22:00:37 +05:30
|
|
|
xpath = restrict_to_p_nodes_at_root(xpath) if filter_nodes_at_beginning?(reference_options)
|
|
|
|
nodes = document.xpath(xpath)
|
|
|
|
|
|
|
|
filter_nodes(nodes, reference_options)
|
|
|
|
end
|
|
|
|
|
|
|
|
def restrict_to_p_nodes_at_root(xpath)
|
|
|
|
xpath.gsub('descendant::', './p/')
|
|
|
|
end
|
|
|
|
|
|
|
|
def filter_nodes(nodes, reference_options)
|
|
|
|
if filter_nodes_at_beginning?(reference_options)
|
|
|
|
filter_nodes_at_beginning(nodes)
|
|
|
|
else
|
|
|
|
nodes
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def filter_nodes_at_beginning?(reference_options)
|
|
|
|
reference_options && reference_options[:location] == :beginning
|
|
|
|
end
|
|
|
|
|
|
|
|
# Selects child nodes if they are present in the beginning among other siblings.
|
|
|
|
#
|
|
|
|
# nodes - A Nokogiri::XML::NodeSet.
|
|
|
|
#
|
|
|
|
# Returns an array of Nokogiri::XML::Element objects.
|
|
|
|
def filter_nodes_at_beginning(nodes)
|
|
|
|
parents_and_nodes = nodes.group_by(&:parent)
|
|
|
|
filtered_nodes = []
|
|
|
|
|
|
|
|
parents_and_nodes.each do |parent, nodes|
|
|
|
|
children = parent.children
|
|
|
|
nodes = nodes.to_a
|
|
|
|
|
|
|
|
children.each do |child|
|
|
|
|
next if child.text.blank?
|
2018-03-17 18:26:18 +05:30
|
|
|
|
2017-08-17 22:00:37 +05:30
|
|
|
node = nodes.shift
|
|
|
|
break unless node == child
|
2018-03-17 18:26:18 +05:30
|
|
|
|
2017-08-17 22:00:37 +05:30
|
|
|
filtered_nodes << node
|
|
|
|
end
|
|
|
|
end
|
2016-01-14 18:37:52 +05:30
|
|
|
|
2017-08-17 22:00:37 +05:30
|
|
|
filtered_nodes
|
2016-01-14 18:37:52 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|