debian-mirror-gitlab/scripts/lib/glfm/update_specification.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

247 lines
11 KiB
Ruby
Raw Normal View History

2022-07-16 23:28:13 +05:30
# frozen_string_literal: true
require 'fileutils'
require 'open-uri'
require 'pathname'
2022-11-25 23:54:43 +05:30
require 'tempfile'
require 'yaml'
2022-07-16 23:28:13 +05:30
require_relative 'constants'
require_relative 'shared'
2022-11-25 23:54:43 +05:30
# IMPORTANT NOTE: See https://docs.gitlab.com/ee/development/gitlab_flavored_markdown/specification_guide/#update-specificationrb-script
# for details on the implementation and usage of this script. This developers guide
# contains diagrams and documentation of this script,
# including explanations and examples of all files it reads and writes.
#
# Also note that this script is intentionally written in a pure-functional (not OO) style,
# with no dependencies on Rails or the GitLab libraries. These choices are intended to make
# it faster and easier to test and debug.
2022-07-16 23:28:13 +05:30
module Glfm
class UpdateSpecification
include Constants
include Shared
2022-11-25 23:54:43 +05:30
def process(skip_spec_html_generation: false)
2022-07-16 23:28:13 +05:30
output('Updating specification...')
2022-11-25 23:54:43 +05:30
2023-01-13 00:05:48 +05:30
# read and optionally update `input/github_flavored_markdown/ghfm_spec_v_x.yy.md`
2022-11-25 23:54:43 +05:30
ghfm_spec_lines = load_ghfm_spec
2023-01-13 00:05:48 +05:30
# create `output_spec/spec.txt`
glfm_spec_txt_header_lines = GLFM_SPEC_TXT_HEADER.split("\n").map { |line| "#{line}\n" }
official_spec_lines = readlines_from_path!(GLFM_OFFICIAL_SPECIFICATION_MD_PATH)
glfm_spec_txt_string = (glfm_spec_txt_header_lines + official_spec_lines).join('')
2022-07-16 23:28:13 +05:30
write_glfm_spec_txt(glfm_spec_txt_string)
2022-11-25 23:54:43 +05:30
2023-01-13 00:05:48 +05:30
# create `output_example_snapshots/snapshot_spec.md`
ghfm_spec_example_lines = extract_ghfm_spec_example_lines(ghfm_spec_lines)
official_spec_example_lines =
extract_glfm_spec_example_lines(official_spec_lines, GLFM_OFFICIAL_SPECIFICATION_MD_PATH)
internal_extension_lines = readlines_from_path!(GLFM_INTERNAL_EXTENSIONS_MD_PATH)
internal_extension_example_lines =
extract_glfm_spec_example_lines(internal_extension_lines, GLFM_INTERNAL_EXTENSIONS_MD_PATH)
snapshot_spec_md_string = (
glfm_spec_txt_header_lines +
ghfm_spec_example_lines +
official_spec_example_lines +
["\n"] +
internal_extension_example_lines
).join('')
write_snapshot_spec_md(snapshot_spec_md_string)
2022-11-25 23:54:43 +05:30
if skip_spec_html_generation
2023-01-13 00:05:48 +05:30
output("Skipping GLFM spec.html and snapshot_spec.html generation...")
2022-11-25 23:54:43 +05:30
return
end
2023-01-13 00:05:48 +05:30
# create `output_spec/spec.html` and `output_snapshot_examples/snapshot_spec.html`
spec_html_string, snapshot_spec_html_string =
generate_spec_html_files(glfm_spec_txt_string, snapshot_spec_md_string)
write_spec_html(spec_html_string)
write_snapshot_spec_html(snapshot_spec_html_string)
2022-07-16 23:28:13 +05:30
end
private
2022-11-25 23:54:43 +05:30
def load_ghfm_spec
2022-07-16 23:28:13 +05:30
# We only re-download the GitHub Flavored Markdown specification if the
2022-11-25 23:54:43 +05:30
# UPDATE_GHFM_SPEC_MD environment variable is set to true, which should only
2022-07-16 23:28:13 +05:30
# ever be done manually and locally, never in CI. This provides some security
# protection against a possible injection attack vector, if the GitHub-hosted
# version of the spec is ever temporarily compromised with an injection attack.
#
# This also avoids doing external network access to download the file
# in CI jobs, which can avoid potentially flaky builds if the GitHub-hosted
# version of the file is temporarily unavailable.
2022-11-25 23:54:43 +05:30
if ENV['UPDATE_GHFM_SPEC_MD'] == 'true'
update_ghfm_spec_md
2022-07-16 23:28:13 +05:30
else
2022-11-25 23:54:43 +05:30
read_existing_ghfm_spec_md
2022-07-16 23:28:13 +05:30
end
end
2022-11-25 23:54:43 +05:30
def read_existing_ghfm_spec_md
output("Reading existing #{GHFM_SPEC_MD_PATH}...")
File.open(GHFM_SPEC_MD_PATH).readlines
2022-07-16 23:28:13 +05:30
end
2022-11-25 23:54:43 +05:30
def update_ghfm_spec_md
2022-07-16 23:28:13 +05:30
output("Downloading #{GHFM_SPEC_TXT_URI}...")
2022-11-25 23:54:43 +05:30
# NOTE: We use `URI.parse` to avoid RuboCop warning "Security/Open",
# even though we are using a trusted URI from a string literal constant.
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/98656#note_1138595002 for details.
ghfm_spec_txt_uri_parsed = URI.parse(GHFM_SPEC_TXT_URI)
ghfm_spec_txt_uri_io = ghfm_spec_txt_uri_parsed.open
2022-07-16 23:28:13 +05:30
2023-01-13 00:05:48 +05:30
ghfm_spec_lines = readlines_from_io!(ghfm_spec_txt_uri_io, GHFM_SPEC_TXT_URI)
2022-07-16 23:28:13 +05:30
# Make sure the GHFM spec version has not changed
2022-11-25 23:54:43 +05:30
validate_expected_spec_version!(ghfm_spec_lines[2])
2022-07-16 23:28:13 +05:30
# Reset IO stream and re-read into a single string for easy writing
# noinspection RubyNilAnalysis
ghfm_spec_txt_uri_io.seek(0)
2022-11-25 23:54:43 +05:30
ghfm_spec_string = ghfm_spec_txt_uri_io.read
raise "Unable to read string from #{GHFM_SPEC_TXT_URI}" unless ghfm_spec_string
2022-07-16 23:28:13 +05:30
2022-11-25 23:54:43 +05:30
output("Writing #{GHFM_SPEC_MD_PATH}...")
GHFM_SPEC_MD_PATH.dirname.mkpath
write_file(GHFM_SPEC_MD_PATH, ghfm_spec_string)
2022-07-16 23:28:13 +05:30
2022-11-25 23:54:43 +05:30
ghfm_spec_lines
2022-07-16 23:28:13 +05:30
end
def validate_expected_spec_version!(version_line)
return if version_line =~ /\Aversion: #{GHFM_SPEC_VERSION}\Z/o
raise "GitHub Flavored Markdown spec.txt version mismatch! " \
"Expected 'version: #{GHFM_SPEC_VERSION}', got '#{version_line}'"
end
2023-01-13 00:05:48 +05:30
def extract_ghfm_spec_example_lines(spec_lines)
# In the GHFM spec.txt format, all we have to identify the headers containing examples
# is the presence of a single initial H1 named "Introduction" before the first
# header containing examples, and the <!-- END TESTS --> comment after the last header
# containing examples.
path = GHFM_SPEC_MD_PATH
first_examples_header_index = spec_lines.index do |line|
line.start_with?('# ') && !line.start_with?(INTRODUCTION_HEADER_LINE_TEXT)
2022-07-16 23:28:13 +05:30
end
2023-01-13 00:05:48 +05:30
raise "Unable to find first examples header in #{path}" unless first_examples_header_index
2022-07-16 23:28:13 +05:30
2023-01-13 00:05:48 +05:30
end_tests_comment_index = spec_lines.index do |line|
line.start_with?(END_TESTS_COMMENT_LINE_TEXT)
2022-07-16 23:28:13 +05:30
end
2023-01-13 00:05:48 +05:30
raise "Unable to locate 'END TESTS' comment line in #{path}" if end_tests_comment_index.nil?
2022-07-16 23:28:13 +05:30
2023-01-13 00:05:48 +05:30
spec_lines[first_examples_header_index..(end_tests_comment_index - 1)]
2022-07-16 23:28:13 +05:30
end
2023-01-13 00:05:48 +05:30
def extract_glfm_spec_example_lines(spec_lines, path)
# In the GLFM input markdown files (unlike the GLFM spec.txt format), we have control over
# the contents, so we can use explicit <!-- BEGIN TESTS --> and <!-- END TESTS -->
# is the presence of a single initial H1 named "Introduction" before the first
# header containing examples, and the <!-- END TESTS --> comment after the last header
# containing examples.
begin_tests_comment_line_index = spec_lines.index do |line|
line.start_with?(BEGIN_TESTS_COMMENT_LINE_TEXT)
end
raise "Unable to locate 'BEGIN TESTS' comment line in #{path}" unless begin_tests_comment_line_index
2022-07-16 23:28:13 +05:30
2023-01-13 00:05:48 +05:30
end_tests_comment_index = spec_lines.index do |line|
line.start_with?(END_TESTS_COMMENT_LINE_TEXT)
2022-07-16 23:28:13 +05:30
end
2023-01-13 00:05:48 +05:30
raise "Unable to locate 'END TESTS' comment line in #{path}" if end_tests_comment_index.nil?
spec_lines[(begin_tests_comment_line_index + 1)..(end_tests_comment_index - 1)]
2022-07-16 23:28:13 +05:30
end
def write_glfm_spec_txt(glfm_spec_txt_string)
output("Writing #{GLFM_SPEC_TXT_PATH}...")
FileUtils.mkdir_p(Pathname.new(GLFM_SPEC_TXT_PATH).dirname)
write_file(GLFM_SPEC_TXT_PATH, glfm_spec_txt_string)
end
2022-11-25 23:54:43 +05:30
2023-01-13 00:05:48 +05:30
def write_snapshot_spec_md(snapshot_spec_md_string)
output("Writing #{ES_SNAPSHOT_SPEC_MD_PATH}...")
FileUtils.mkdir_p(Pathname.new(ES_SNAPSHOT_SPEC_MD_PATH).dirname)
write_file(ES_SNAPSHOT_SPEC_MD_PATH, snapshot_spec_md_string)
end
def generate_spec_html_files(spec_txt_string, snapshot_spec_md_string)
output("Generating spec.html and snapshot_spec.html from spec.txt and snapshot_spec.md markdown...")
spec_txt_string_split_examples = split_examples_into_html_and_md(spec_txt_string)
snapshot_spec_md_string_split_examples = split_examples_into_html_and_md(snapshot_spec_md_string)
2022-11-25 23:54:43 +05:30
input_markdown_yml_string = <<~MARKDOWN
---
spec_txt: |
2023-01-13 00:05:48 +05:30
#{spec_txt_string_split_examples.gsub(/^/, ' ')}
snapshot_spec_md: |
#{snapshot_spec_md_string_split_examples.gsub(/^/, ' ')}
2022-11-25 23:54:43 +05:30
MARKDOWN
# NOTE: We must copy the input YAML file used by the `render_static_html.rb`
# to a separate temporary file in order for the script to read them, because it is run in
# a separate subprocess, and during unit testing we are unable to substitute the mock
# StringIO when reading the input files in the subprocess.
ENV['INPUT_MARKDOWN_YML_PATH'] = Dir::Tmpname.create(MARKDOWN_TEMPFILE_BASENAME) do |path|
write_file(path, input_markdown_yml_string)
end
# NOTE 1: We shell out to perform the conversion of markdown to static HTML by invoking a
# separate subprocess. This allows us to avoid using the Rails API or environment in this
# script, which makes developing and running the unit tests for this script much faster,
# because they can use 'fast_spec_helper' which does not require the entire Rails environment.
# NOTE 2: We run this as an RSpec process, for the same reasons we run via Jest process below:
# because that's the easiest way to ensure a reliable, fully-configured environment in which
# to execute the markdown-processing logic. Also, in the static/backend case.
# Dir::Tmpname.create requires a block, but we are using the non-block form to get the path
# via the return value, so we pass an empty block to avoid an error.
static_html_tempfile_path = Dir::Tmpname.create(STATIC_HTML_TEMPFILE_BASENAME) {}
ENV['OUTPUT_STATIC_HTML_TEMPFILE_PATH'] = static_html_tempfile_path
cmd = %(bin/rspec #{__dir__}/render_static_html.rb)
run_external_cmd(cmd)
2023-01-13 00:05:48 +05:30
output("Reading generated html from tempfile #{static_html_tempfile_path}...")
rendered_html_hash = YAML.safe_load(File.open(static_html_tempfile_path), symbolize_names: true)
[rendered_html_hash.fetch(:spec_txt), rendered_html_hash.fetch(:snapshot_spec_md)]
end
def split_examples_into_html_and_md(spec_md_string)
spec_md_string.gsub(
/(^#{EXAMPLE_BEGIN_STRING}.*?$(?:.|\n)*?)^\.$(\n(?:.|\n)*?^#{EXAMPLE_END_STRING}$)/mo,
"\\1#{EXAMPLE_BACKTICKS_STRING}\n\n#{EXAMPLE_BACKTICKS_STRING}\\2"
)
2022-11-25 23:54:43 +05:30
end
2023-01-13 00:05:48 +05:30
def write_spec_html(spec_html_string)
2022-11-25 23:54:43 +05:30
output("Writing #{GLFM_SPEC_TXT_PATH}...")
FileUtils.mkdir_p(Pathname.new(GLFM_SPEC_HTML_PATH).dirname)
2023-01-13 00:05:48 +05:30
write_file(GLFM_SPEC_HTML_PATH, "#{spec_html_string}\n")
end
def write_snapshot_spec_html(snapshot_spec_html_string)
output("Writing #{ES_SNAPSHOT_SPEC_HTML_PATH}...")
FileUtils.mkdir_p(Pathname.new(ES_SNAPSHOT_SPEC_HTML_PATH).dirname)
write_file(ES_SNAPSHOT_SPEC_HTML_PATH, "#{snapshot_spec_html_string}\n")
end
def readlines_from_path!(path)
io = File.open(path)
readlines_from_io!(io, path)
end
def readlines_from_io!(io, uri_or_path)
lines = io.readlines
raise "Unable to read lines from #{uri_or_path}" if lines.empty?
lines
2022-11-25 23:54:43 +05:30
end
2022-07-16 23:28:13 +05:30
end
end