# frozen_string_literal: true require 'erb' require 'fileutils' require 'open-uri' require 'pathname' require 'tempfile' require 'yaml' require_relative 'constants' require_relative 'shared' # IMPORTANT NOTE: See https://docs.gitlab.com/ee/development/gitlab_flavored_markdown/specification_guide/#update-specificationrb-script # for details on the implementation and usage of this script. This developers guide # contains diagrams and documentation of this script, # including explanations and examples of all files it reads and writes. # # Also note that this script is intentionally written in a pure-functional (not OO) style, # with no dependencies on Rails or the GitLab libraries. These choices are intended to make # it faster and easier to test and debug. module Glfm class UpdateSpecification include Constants include Shared def process(skip_spec_html_generation: false) output('Updating specification...') # read and optionally update `input/github_flavored_markdown/ghfm_spec_v_x.yy.md` ghfm_spec_lines = load_ghfm_spec # create `output_spec/spec.txt` glfm_spec_txt_header_lines = GLFM_SPEC_TXT_HEADER.split("\n").map { |line| "#{line}\n" } official_spec_lines = readlines_from_path!(GLFM_OFFICIAL_SPECIFICATION_MD_PATH) glfm_spec_txt_string = (glfm_spec_txt_header_lines + official_spec_lines).join('') write_glfm_spec_txt(glfm_spec_txt_string) # create `output_example_snapshots/snapshot_spec.md` snapshot_spec_md_header_lines = ES_SNAPSHOT_SPEC_MD_HEADER.split("\n").map { |line| "#{line}\n" } ghfm_spec_example_lines = extract_ghfm_spec_example_lines(ghfm_spec_lines) official_spec_example_lines = extract_glfm_spec_example_lines(official_spec_lines, GLFM_OFFICIAL_SPECIFICATION_MD_PATH) internal_extension_lines = readlines_from_path!(GLFM_INTERNAL_EXTENSIONS_MD_PATH) validate_internal_extensions_md(internal_extension_lines) internal_extension_example_lines = extract_glfm_spec_example_lines(internal_extension_lines, GLFM_INTERNAL_EXTENSIONS_MD_PATH) snapshot_spec_md_string = ( snapshot_spec_md_header_lines + ghfm_spec_example_lines + official_spec_example_lines + ["\n"] + internal_extension_example_lines ).join('') write_snapshot_spec_md(snapshot_spec_md_string) # Some unit tests can skip HTML generation if they don't need it, so they run faster if skip_spec_html_generation output("Skipping GLFM spec.html and snapshot_spec.html generation...") return end # Use the backend markdown processing to render un-styled GLFM specification HTML files from the markdown # We strip off the frontmatter headers before rendering. spec_html_unstyled_string, snapshot_spec_html_unstyled_string = generate_spec_html_files( glfm_spec_txt_string.gsub!(GLFM_SPEC_TXT_HEADER, "[TOC]\n\n"), snapshot_spec_md_string.gsub!(ES_SNAPSHOT_SPEC_MD_HEADER, "[TOC]\n\n"), ghfm_spec_example_lines.join('') ) # Add styling to the rendered HTML files, to make them look like the CommonMark and # GitHub Flavored Markdown HTML-rendered specifications spec_html_styled_string = add_styling_to_specification_html( body: spec_html_unstyled_string, title: GLFM_SPEC_TXT_TITLE, version: GLFM_SPEC_VERSION ) snapshot_spec_html_styled_string = add_styling_to_specification_html( body: snapshot_spec_html_unstyled_string, title: ES_SNAPSHOT_SPEC_TITLE, version: GLFM_SPEC_VERSION ) # Write out the styled HTML GLFM specification HTML files write_spec_html(spec_html_styled_string) write_snapshot_spec_html(snapshot_spec_html_styled_string) end private def load_ghfm_spec # We only re-download the GitHub Flavored Markdown specification if the # UPDATE_GHFM_SPEC_MD environment variable is set to true, which should only # ever be done manually and locally, never in CI. This provides some security # protection against a possible injection attack vector, if the GitHub-hosted # version of the spec is ever temporarily compromised with an injection attack. # # This also avoids doing external network access to download the file # in CI jobs, which can avoid potentially flaky builds if the GitHub-hosted # version of the file is temporarily unavailable. if ENV['UPDATE_GHFM_SPEC_MD'] == 'true' update_ghfm_spec_md else read_existing_ghfm_spec_md end end def read_existing_ghfm_spec_md output("Reading existing #{GHFM_SPEC_MD_PATH}...") File.open(GHFM_SPEC_MD_PATH).readlines end def update_ghfm_spec_md output("Downloading #{GHFM_SPEC_TXT_URI}...") # NOTE: We use `URI.parse` to avoid RuboCop warning "Security/Open", # even though we are using a trusted URI from a string literal constant. # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/98656#note_1138595002 for details. ghfm_spec_txt_uri_parsed = URI.parse(GHFM_SPEC_TXT_URI) ghfm_spec_txt_uri_io = ghfm_spec_txt_uri_parsed.open ghfm_spec_lines = readlines_from_io!(ghfm_spec_txt_uri_io, GHFM_SPEC_TXT_URI) # Make sure the GHFM spec version has not changed validate_expected_spec_version!(ghfm_spec_lines[2]) # Reset IO stream and re-read into a single string for easy writing # noinspection RubyNilAnalysis ghfm_spec_txt_uri_io.seek(0) ghfm_spec_string = ghfm_spec_txt_uri_io.read raise "Unable to read string from #{GHFM_SPEC_TXT_URI}" unless ghfm_spec_string output("Writing #{GHFM_SPEC_MD_PATH}...") GHFM_SPEC_MD_PATH.dirname.mkpath write_file(GHFM_SPEC_MD_PATH, ghfm_spec_string) ghfm_spec_lines end def validate_expected_spec_version!(version_line) return if version_line =~ /\Aversion: #{GHFM_SPEC_VERSION}\Z/o raise "GitHub Flavored Markdown spec.txt version mismatch! " \ "Expected 'version: #{GHFM_SPEC_VERSION}', got '#{version_line}'" end def extract_ghfm_spec_example_lines(spec_lines) # In the GHFM spec.txt format, all we have to identify the headers containing examples # is the presence of a single initial H1 named "Introduction" before the first # header containing examples, and the comment after the last header # containing examples. path = GHFM_SPEC_MD_PATH first_examples_header_index = spec_lines.index do |line| line.start_with?('# ') && !line.start_with?(INTRODUCTION_HEADER_LINE_TEXT) end raise "Unable to find first examples header in #{path}" unless first_examples_header_index end_tests_comment_index = spec_lines.index do |line| line.start_with?(END_TESTS_COMMENT_LINE_TEXT) end raise "Unable to locate 'END TESTS' comment line in #{path}" if end_tests_comment_index.nil? spec_lines[first_examples_header_index..(end_tests_comment_index - 1)] end def extract_glfm_spec_example_lines(spec_lines, path) # In the GLFM input markdown files (unlike the GLFM spec.txt format), we have control over # the contents, so we can use explicit and # is the presence of a single initial H1 named "Introduction" before the first # header containing examples, and the comment after the last header # containing examples. begin_tests_comment_line_index = spec_lines.index do |line| line.start_with?(BEGIN_TESTS_COMMENT_LINE_TEXT) end raise "Unable to locate 'BEGIN TESTS' comment line in #{path}" unless begin_tests_comment_line_index end_tests_comment_index = spec_lines.index do |line| line.start_with?(END_TESTS_COMMENT_LINE_TEXT) end raise "Unable to locate 'END TESTS' comment line in #{path}" if end_tests_comment_index.nil? spec_lines[(begin_tests_comment_line_index + 1)..(end_tests_comment_index - 1)] end def validate_internal_extensions_md(internal_extension_lines) first_line = internal_extension_lines[0].strip last_line = internal_extension_lines[-1].strip return unless first_line != BEGIN_TESTS_COMMENT_LINE_TEXT || last_line != END_TESTS_COMMENT_LINE_TEXT raise "Error: No content is allowed outside of the " \ "'#{BEGIN_TESTS_COMMENT_LINE_TEXT}' and '#{END_TESTS_COMMENT_LINE_TEXT}' comments " \ "in '#{GLFM_INTERNAL_EXTENSIONS_MD_PATH}'." end def write_glfm_spec_txt(glfm_spec_txt_string) output("Writing #{GLFM_SPEC_TXT_PATH}...") FileUtils.mkdir_p(Pathname.new(GLFM_SPEC_TXT_PATH).dirname) write_file(GLFM_SPEC_TXT_PATH, glfm_spec_txt_string) end def write_snapshot_spec_md(snapshot_spec_md_string) output("Writing #{ES_SNAPSHOT_SPEC_MD_PATH}...") FileUtils.mkdir_p(Pathname.new(ES_SNAPSHOT_SPEC_MD_PATH).dirname) write_file(ES_SNAPSHOT_SPEC_MD_PATH, snapshot_spec_md_string) end def generate_spec_html_files(spec_txt_string, snapshot_spec_md_string, ghfm_spec_examples_string) output("Generating spec.html and snapshot_spec.html from spec.txt and snapshot_spec.md markdown...") # NOTE: spec.txt only contains official GLFM examples, but snapshot_spec.md contains ALL examples, with the # official GLFM examples coming _after_ the GHFM (which contains CommonMark + GHFM) examples, and the # internal extension examples coming last. In the snapshot_spec.md, The CommonMark and GLFM examples come # first, in order for the example numbers to match tne numbers in those separate specifications [1]. But, we # also need for the numbering of the official examples in spec.txt to match the numbering of the official # examples in snapshot_spec.md. Here's the ordering: # # spec.txt: # 1. GLFM Official # # snapshot_spec.md: # 1. GHFM (contains CommonMark + GHFM) # 2. GLFM Official # 3. GLFM Internal # # [1] Note that the example numbering in the GLFM spec.html is currently out of sync with its corresponding # spec.txt because its rendering is out of date. This has been reported in the following issue: # https://github.com/github/cmark-gfm/issues/288 ghfm_spec_examples_count = ghfm_spec_examples_string.scan(EXAMPLE_BEGIN_STRING).length spec_txt_string_split_examples = transform_examples_for_rendering(spec_txt_string, starting_example_number: ghfm_spec_examples_count + 1) snapshot_spec_md_string_split_examples = transform_examples_for_rendering(snapshot_spec_md_string) input_markdown_yml_string = <<~MARKDOWN --- spec_txt: | #{spec_txt_string_split_examples.gsub(/^/, ' ')} snapshot_spec_md: | #{snapshot_spec_md_string_split_examples.gsub(/^/, ' ')} MARKDOWN # NOTE: We must copy the input YAML file used by the `render_static_html.rb` # to a separate temporary file in order for the script to read them, because it is run in # a separate subprocess, and during unit testing we are unable to substitute the mock # StringIO when reading the input files in the subprocess. ENV['INPUT_MARKDOWN_YML_PATH'] = Dir::Tmpname.create(MARKDOWN_TEMPFILE_BASENAME) do |path| write_file(path, input_markdown_yml_string) end # NOTE 1: We shell out to perform the conversion of markdown to static HTML by invoking a # separate subprocess. This allows us to avoid using the Rails API or environment in this # script, which makes developing and running the unit tests for this script much faster, # because they can use 'fast_spec_helper' which does not require the entire Rails environment. # NOTE 2: We run this as an RSpec process, for the same reasons we run via Jest process below: # because that's the easiest way to ensure a reliable, fully-configured environment in which # to execute the markdown-processing logic. Also, in the static/backend case. # Dir::Tmpname.create requires a block, but we are using the non-block form to get the path # via the return value, so we pass an empty block to avoid an error. static_html_tempfile_path = Dir::Tmpname.create(STATIC_HTML_TEMPFILE_BASENAME) {} ENV['OUTPUT_STATIC_HTML_TEMPFILE_PATH'] = static_html_tempfile_path cmd = %(bin/rspec #{__dir__}/render_static_html.rb) run_external_cmd(cmd) output("Reading generated html from tempfile #{static_html_tempfile_path}...") rendered_html_hash = YAML.safe_load(File.open(static_html_tempfile_path), symbolize_names: true) [rendered_html_hash.fetch(:spec_txt), rendered_html_hash.fetch(:snapshot_spec_md)] end # NOTE: body, title, and version are used by the ERB binding. # noinspection RubyUnusedLocalVariable def add_styling_to_specification_html(body:, title:, version:) # noinspection RubyMismatchedArgumentType ERB.new(File.read(File.expand_path('specification_html_template.erb', __dir__))).result(binding) end def transform_examples_for_rendering(spec_md_string, starting_example_number: 1) # This method: # 1. Splits the single example code block which has a period between the markdown and HTML into two code blocks # 2. Adds a wrapper div for use in styling and target for the example number named anchor. This will get the # 'class="example" id="example-n"' attributes applied via javascript (since markdown rendering does not # preserve classes or IDs) # 3. Adds a div which includes the example number named anchor and text. This will get the 'class="examplenum"' # attribute applied via javascript. # # NOTE: Even though they will get stripped durning markdown rendering, we will go ahead and add the class and id # attributes here, for easier debugging and comparison to the source markdown. example_replacement_regex = /(^#{EXAMPLE_BEGIN_STRING}.*?$(?:.|\n)*?)^\.$(\n(?:.|\n)*?^#{EXAMPLE_END_STRING}$)/mo example_num = starting_example_number spec_md_string.gsub(example_replacement_regex) do |_example_string| markdown_part = ::Regexp.last_match(1) html_part = ::Regexp.last_match(2) example_anchor_name = "example-#{example_num}" examplenum_div = %(
\n) example_num += 1 # NOTE: We need blank lines before the markdown code blocks so they will be rendered properly %(