debian-mirror-gitlab/scripts/lib/glfm/update_specification.rb
2023-03-05 14:24:40 +05:30

332 lines
16 KiB
Ruby

# frozen_string_literal: true
require 'erb'
require 'fileutils'
require 'open-uri'
require 'pathname'
require 'tempfile'
require 'yaml'
require_relative 'constants'
require_relative 'shared'
# IMPORTANT NOTE: See https://docs.gitlab.com/ee/development/gitlab_flavored_markdown/specification_guide/#update-specificationrb-script
# for details on the implementation and usage of this script. This developers guide
# contains diagrams and documentation of this script,
# including explanations and examples of all files it reads and writes.
#
# Also note that this script is intentionally written in a pure-functional (not OO) style,
# with no dependencies on Rails or the GitLab libraries. These choices are intended to make
# it faster and easier to test and debug.
module Glfm
class UpdateSpecification
include Constants
include Shared
def process(skip_spec_html_generation: false)
output('Updating specification...')
# read and optionally update `input/github_flavored_markdown/ghfm_spec_v_x.yy.md`
ghfm_spec_lines = load_ghfm_spec
# create `output_spec/spec.txt`
glfm_spec_txt_header_lines = GLFM_SPEC_TXT_HEADER.split("\n").map { |line| "#{line}\n" }
official_spec_lines = readlines_from_path!(GLFM_OFFICIAL_SPECIFICATION_MD_PATH)
glfm_spec_txt_string = (glfm_spec_txt_header_lines + official_spec_lines).join('')
write_glfm_spec_txt(glfm_spec_txt_string)
# create `output_example_snapshots/snapshot_spec.md`
snapshot_spec_md_header_lines = ES_SNAPSHOT_SPEC_MD_HEADER.split("\n").map { |line| "#{line}\n" }
ghfm_spec_example_lines = extract_ghfm_spec_example_lines(ghfm_spec_lines)
official_spec_example_lines =
extract_glfm_spec_example_lines(official_spec_lines, GLFM_OFFICIAL_SPECIFICATION_MD_PATH)
internal_extension_lines = readlines_from_path!(GLFM_INTERNAL_EXTENSIONS_MD_PATH)
validate_internal_extensions_md(internal_extension_lines)
internal_extension_example_lines =
extract_glfm_spec_example_lines(internal_extension_lines, GLFM_INTERNAL_EXTENSIONS_MD_PATH)
snapshot_spec_md_string = (
snapshot_spec_md_header_lines +
ghfm_spec_example_lines +
official_spec_example_lines +
["\n"] +
internal_extension_example_lines
).join('')
write_snapshot_spec_md(snapshot_spec_md_string)
# Some unit tests can skip HTML generation if they don't need it, so they run faster
if skip_spec_html_generation
output("Skipping GLFM spec.html and snapshot_spec.html generation...")
return
end
# Use the backend markdown processing to render un-styled GLFM specification HTML files from the markdown
# We strip off the frontmatter headers before rendering.
spec_html_unstyled_string, snapshot_spec_html_unstyled_string =
generate_spec_html_files(
glfm_spec_txt_string.gsub!(GLFM_SPEC_TXT_HEADER, "[TOC]\n\n"),
snapshot_spec_md_string.gsub!(ES_SNAPSHOT_SPEC_MD_HEADER, "[TOC]\n\n"),
ghfm_spec_example_lines.join('')
)
# Add styling to the rendered HTML files, to make them look like the CommonMark and
# GitHub Flavored Markdown HTML-rendered specifications
spec_html_styled_string = add_styling_to_specification_html(
body: spec_html_unstyled_string,
title: GLFM_SPEC_TXT_TITLE,
version: GLFM_SPEC_VERSION
)
snapshot_spec_html_styled_string = add_styling_to_specification_html(
body: snapshot_spec_html_unstyled_string,
title: ES_SNAPSHOT_SPEC_TITLE,
version: GLFM_SPEC_VERSION
)
# Write out the styled HTML GLFM specification HTML files
write_spec_html(spec_html_styled_string)
write_snapshot_spec_html(snapshot_spec_html_styled_string)
end
private
def load_ghfm_spec
# We only re-download the GitHub Flavored Markdown specification if the
# UPDATE_GHFM_SPEC_MD environment variable is set to true, which should only
# ever be done manually and locally, never in CI. This provides some security
# protection against a possible injection attack vector, if the GitHub-hosted
# version of the spec is ever temporarily compromised with an injection attack.
#
# This also avoids doing external network access to download the file
# in CI jobs, which can avoid potentially flaky builds if the GitHub-hosted
# version of the file is temporarily unavailable.
if ENV['UPDATE_GHFM_SPEC_MD'] == 'true'
update_ghfm_spec_md
else
read_existing_ghfm_spec_md
end
end
def read_existing_ghfm_spec_md
output("Reading existing #{GHFM_SPEC_MD_PATH}...")
File.open(GHFM_SPEC_MD_PATH).readlines
end
def update_ghfm_spec_md
output("Downloading #{GHFM_SPEC_TXT_URI}...")
# NOTE: We use `URI.parse` to avoid RuboCop warning "Security/Open",
# even though we are using a trusted URI from a string literal constant.
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/98656#note_1138595002 for details.
ghfm_spec_txt_uri_parsed = URI.parse(GHFM_SPEC_TXT_URI)
ghfm_spec_txt_uri_io = ghfm_spec_txt_uri_parsed.open
ghfm_spec_lines = readlines_from_io!(ghfm_spec_txt_uri_io, GHFM_SPEC_TXT_URI)
# Make sure the GHFM spec version has not changed
validate_expected_spec_version!(ghfm_spec_lines[2])
# Reset IO stream and re-read into a single string for easy writing
# noinspection RubyNilAnalysis
ghfm_spec_txt_uri_io.seek(0)
ghfm_spec_string = ghfm_spec_txt_uri_io.read
raise "Unable to read string from #{GHFM_SPEC_TXT_URI}" unless ghfm_spec_string
output("Writing #{GHFM_SPEC_MD_PATH}...")
GHFM_SPEC_MD_PATH.dirname.mkpath
write_file(GHFM_SPEC_MD_PATH, ghfm_spec_string)
ghfm_spec_lines
end
def validate_expected_spec_version!(version_line)
return if version_line =~ /\Aversion: #{GHFM_SPEC_VERSION}\Z/o
raise "GitHub Flavored Markdown spec.txt version mismatch! " \
"Expected 'version: #{GHFM_SPEC_VERSION}', got '#{version_line}'"
end
def extract_ghfm_spec_example_lines(spec_lines)
# In the GHFM spec.txt format, all we have to identify the headers containing examples
# is the presence of a single initial H1 named "Introduction" before the first
# header containing examples, and the <!-- END TESTS --> comment after the last header
# containing examples.
path = GHFM_SPEC_MD_PATH
first_examples_header_index = spec_lines.index do |line|
line.start_with?('# ') && !line.start_with?(INTRODUCTION_HEADER_LINE_TEXT)
end
raise "Unable to find first examples header in #{path}" unless first_examples_header_index
end_tests_comment_index = spec_lines.index do |line|
line.start_with?(END_TESTS_COMMENT_LINE_TEXT)
end
raise "Unable to locate 'END TESTS' comment line in #{path}" if end_tests_comment_index.nil?
spec_lines[first_examples_header_index..(end_tests_comment_index - 1)]
end
def extract_glfm_spec_example_lines(spec_lines, path)
# In the GLFM input markdown files (unlike the GLFM spec.txt format), we have control over
# the contents, so we can use explicit <!-- BEGIN TESTS --> and <!-- END TESTS -->
# is the presence of a single initial H1 named "Introduction" before the first
# header containing examples, and the <!-- END TESTS --> comment after the last header
# containing examples.
begin_tests_comment_line_index = spec_lines.index do |line|
line.start_with?(BEGIN_TESTS_COMMENT_LINE_TEXT)
end
raise "Unable to locate 'BEGIN TESTS' comment line in #{path}" unless begin_tests_comment_line_index
end_tests_comment_index = spec_lines.index do |line|
line.start_with?(END_TESTS_COMMENT_LINE_TEXT)
end
raise "Unable to locate 'END TESTS' comment line in #{path}" if end_tests_comment_index.nil?
spec_lines[(begin_tests_comment_line_index + 1)..(end_tests_comment_index - 1)]
end
def validate_internal_extensions_md(internal_extension_lines)
first_line = internal_extension_lines[0].strip
last_line = internal_extension_lines[-1].strip
return unless first_line != BEGIN_TESTS_COMMENT_LINE_TEXT || last_line != END_TESTS_COMMENT_LINE_TEXT
raise "Error: No content is allowed outside of the " \
"'#{BEGIN_TESTS_COMMENT_LINE_TEXT}' and '#{END_TESTS_COMMENT_LINE_TEXT}' comments " \
"in '#{GLFM_INTERNAL_EXTENSIONS_MD_PATH}'."
end
def write_glfm_spec_txt(glfm_spec_txt_string)
output("Writing #{GLFM_SPEC_TXT_PATH}...")
FileUtils.mkdir_p(Pathname.new(GLFM_SPEC_TXT_PATH).dirname)
write_file(GLFM_SPEC_TXT_PATH, glfm_spec_txt_string)
end
def write_snapshot_spec_md(snapshot_spec_md_string)
output("Writing #{ES_SNAPSHOT_SPEC_MD_PATH}...")
FileUtils.mkdir_p(Pathname.new(ES_SNAPSHOT_SPEC_MD_PATH).dirname)
write_file(ES_SNAPSHOT_SPEC_MD_PATH, snapshot_spec_md_string)
end
def generate_spec_html_files(spec_txt_string, snapshot_spec_md_string, ghfm_spec_examples_string)
output("Generating spec.html and snapshot_spec.html from spec.txt and snapshot_spec.md markdown...")
# NOTE: spec.txt only contains official GLFM examples, but snapshot_spec.md contains ALL examples, with the
# official GLFM examples coming _after_ the GHFM (which contains CommonMark + GHFM) examples, and the
# internal extension examples coming last. In the snapshot_spec.md, The CommonMark and GLFM examples come
# first, in order for the example numbers to match tne numbers in those separate specifications [1]. But, we
# also need for the numbering of the official examples in spec.txt to match the numbering of the official
# examples in snapshot_spec.md. Here's the ordering:
#
# spec.txt:
# 1. GLFM Official
#
# snapshot_spec.md:
# 1. GHFM (contains CommonMark + GHFM)
# 2. GLFM Official
# 3. GLFM Internal
#
# [1] Note that the example numbering in the GLFM spec.html is currently out of sync with its corresponding
# spec.txt because its rendering is out of date. This has been reported in the following issue:
# https://github.com/github/cmark-gfm/issues/288
ghfm_spec_examples_count = ghfm_spec_examples_string.scan(EXAMPLE_BEGIN_STRING).length
spec_txt_string_split_examples =
transform_examples_for_rendering(spec_txt_string, starting_example_number: ghfm_spec_examples_count + 1)
snapshot_spec_md_string_split_examples = transform_examples_for_rendering(snapshot_spec_md_string)
input_markdown_yml_string = <<~MARKDOWN
---
spec_txt: |
#{spec_txt_string_split_examples.gsub(/^/, ' ')}
snapshot_spec_md: |
#{snapshot_spec_md_string_split_examples.gsub(/^/, ' ')}
MARKDOWN
# NOTE: We must copy the input YAML file used by the `render_static_html.rb`
# to a separate temporary file in order for the script to read them, because it is run in
# a separate subprocess, and during unit testing we are unable to substitute the mock
# StringIO when reading the input files in the subprocess.
ENV['INPUT_MARKDOWN_YML_PATH'] = Dir::Tmpname.create(MARKDOWN_TEMPFILE_BASENAME) do |path|
write_file(path, input_markdown_yml_string)
end
# NOTE 1: We shell out to perform the conversion of markdown to static HTML by invoking a
# separate subprocess. This allows us to avoid using the Rails API or environment in this
# script, which makes developing and running the unit tests for this script much faster,
# because they can use 'fast_spec_helper' which does not require the entire Rails environment.
# NOTE 2: We run this as an RSpec process, for the same reasons we run via Jest process below:
# because that's the easiest way to ensure a reliable, fully-configured environment in which
# to execute the markdown-processing logic. Also, in the static/backend case.
# Dir::Tmpname.create requires a block, but we are using the non-block form to get the path
# via the return value, so we pass an empty block to avoid an error.
static_html_tempfile_path = Dir::Tmpname.create(STATIC_HTML_TEMPFILE_BASENAME) {}
ENV['OUTPUT_STATIC_HTML_TEMPFILE_PATH'] = static_html_tempfile_path
cmd = %(bin/rspec #{__dir__}/render_static_html.rb)
run_external_cmd(cmd)
output("Reading generated html from tempfile #{static_html_tempfile_path}...")
rendered_html_hash = YAML.safe_load(File.open(static_html_tempfile_path), symbolize_names: true)
[rendered_html_hash.fetch(:spec_txt), rendered_html_hash.fetch(:snapshot_spec_md)]
end
# NOTE: body, title, and version are used by the ERB binding.
# noinspection RubyUnusedLocalVariable
def add_styling_to_specification_html(body:, title:, version:)
# noinspection RubyMismatchedArgumentType
ERB.new(File.read(File.expand_path('specification_html_template.erb', __dir__))).result(binding)
end
def transform_examples_for_rendering(spec_md_string, starting_example_number: 1)
# This method:
# 1. Splits the single example code block which has a period between the markdown and HTML into two code blocks
# 2. Adds a wrapper div for use in styling and target for the example number named anchor. This will get the
# 'class="example" id="example-n"' attributes applied via javascript (since markdown rendering does not
# preserve classes or IDs)
# 3. Adds a div which includes the example number named anchor and text. This will get the 'class="examplenum"'
# attribute applied via javascript.
#
# NOTE: Even though they will get stripped durning markdown rendering, we will go ahead and add the class and id
# attributes here, for easier debugging and comparison to the source markdown.
example_replacement_regex = /(^#{EXAMPLE_BEGIN_STRING}.*?$(?:.|\n)*?)^\.$(\n(?:.|\n)*?^#{EXAMPLE_END_STRING}$)/mo
example_num = starting_example_number
spec_md_string.gsub(example_replacement_regex) do |_example_string|
markdown_part = ::Regexp.last_match(1)
html_part = ::Regexp.last_match(2)
example_anchor_name = "example-#{example_num}"
examplenum_div = %(<div class="examplenum"><a href="##{example_anchor_name}">Example #{example_num}</a></div>\n)
example_num += 1
# NOTE: We need blank lines before the markdown code blocks so they will be rendered properly
%(<div class="example" id="#{example_anchor_name}">\n) +
"#{examplenum_div}\n" \
"#{markdown_part}" \
"#{EXAMPLE_BACKTICKS_STRING}" \
"\n\n" \
"#{EXAMPLE_BACKTICKS_STRING}" \
"#{html_part}\n" \
'</div>'
end
end
def write_spec_html(spec_html_string)
output("Writing #{GLFM_SPEC_HTML_PATH}...")
FileUtils.mkdir_p(Pathname.new(GLFM_SPEC_HTML_PATH).dirname)
write_file(GLFM_SPEC_HTML_PATH, "#{spec_html_string}\n")
end
def write_snapshot_spec_html(snapshot_spec_html_string)
output("Writing #{ES_SNAPSHOT_SPEC_HTML_PATH}...")
FileUtils.mkdir_p(Pathname.new(ES_SNAPSHOT_SPEC_HTML_PATH).dirname)
write_file(ES_SNAPSHOT_SPEC_HTML_PATH, "#{snapshot_spec_html_string}\n")
end
def readlines_from_path!(path)
io = File.open(path)
readlines_from_io!(io, path)
end
def readlines_from_io!(io, uri_or_path)
lines = io.readlines
raise "Unable to read lines from #{uri_or_path}" if lines.empty?
lines
end
end
end