# frozen_string_literal: true require 'fileutils' require 'open-uri' require 'yaml' require 'psych' require 'tempfile' require 'open3' require 'active_support/core_ext/enumerable' require_relative 'constants' require_relative 'shared' require_relative 'parse_examples' # IMPORTANT NOTE: See https://docs.gitlab.com/ee/development/gitlab_flavored_markdown/specification_guide/#update-example-snapshotsrb-script # for details on the implementation and usage of this script. This developers guide # contains diagrams and documentation of this script, # including explanations and examples of all files it reads and writes. # # Also note that this script is intentionally written in a pure-functional (not OO) style, # with no dependencies on Rails or the GitLab libraries. These choices are intended to make # it faster and easier to test and debug. module Glfm class UpdateExampleSnapshots include Constants include Shared include ParseExamples # skip_static_and_wysiwyg can be used to skip the backend/frontend html and prosemirror JSON # generation which depends on external calls. This allows for faster processing in unit tests # which do not require it. def process(skip_static_and_wysiwyg: false) output('Updating example snapshots...') output("Reading #{GLFM_SPEC_TXT_PATH}...") glfm_spec_txt_lines = File.open(GLFM_SPEC_TXT_PATH).readlines # Parse all the examples from `spec.txt`, using a Ruby port of the Python `get_tests` # function the from original CommonMark/GFM `spec_test.py` script. all_examples = parse_examples(glfm_spec_txt_lines) add_example_names(all_examples) reject_disabled_examples(all_examples) write_snapshot_example_files(all_examples, skip_static_and_wysiwyg: skip_static_and_wysiwyg) end private def add_example_names(all_examples) # NOTE: This method and the parse_examples method assume: # 1. Section 2 is the first section which contains examples # 2. Examples are always nested in an H2 or an H3, never directly in an H1 # 3. There may exist headings with no examples (e.g. "Motivation" in the GLFM spec.txt) # 4. The Appendix doesn't ever contain any examples, so it doesn't show up # in the H1 header count. So, even though due to the concatenation it appears before the # GitLab examples sections, it doesn't result in their header counts being off by +1. # 5. If an example contains the 'disabled' string extension, it is skipped (and will thus # result in a skip in the `spec_txt_example_position`). This behavior is taken from the # GFM `spec_test.py` script (but it's NOT in the original CommonMark `spec_test.py`). # 6. If a section contains ONLY disabled examples, the section numbering will still be # incremented to match the rendered HTML specification section numbering. # 7. Every H2 or H3 must contain at least one example, but it is allowed that they are # all disabled. h1_count = 1 # examples start in H1 section 2; section 1 is the overview with no examples. h2_count = 0 h3_count = 0 previous_h1 = '' previous_h2 = '' previous_h3 = '' index_within_current_heading = 0 all_examples.each do |example| headers = example[:headers] if headers[0] != previous_h1 h1_count += 1 h2_count = 0 h3_count = 0 previous_h1 = headers[0] end if headers[1] != previous_h2 h2_count += 1 h3_count = 0 previous_h2 = headers[1] index_within_current_heading = 0 end if headers[2] && headers[2] != previous_h3 h3_count += 1 previous_h3 = headers[2] index_within_current_heading = 0 end index_within_current_heading += 1 # convert headers array to lowercase string with underscores, and double underscores between headers formatted_headers_text = headers.join('__').tr('-', '_').tr(' ', '_').downcase hierarchy_level = "#{h1_count.to_s.rjust(2, '0')}_" \ "#{h2_count.to_s.rjust(2, '0')}_" \ "#{h3_count.to_s.rjust(2, '0')}" position_within_section = index_within_current_heading.to_s.rjust(3, '0') name = "#{hierarchy_level}__#{formatted_headers_text}__#{position_within_section}" converted_name = name.tr('(', '').tr(')', '') # remove any parens from the name example[:name] = converted_name end end def reject_disabled_examples(all_examples) all_examples.reject! { |example| example[:disabled] } end def write_snapshot_example_files(all_examples, skip_static_and_wysiwyg:) output("Reading #{GLFM_EXAMPLE_STATUS_YML_PATH}...") glfm_examples_statuses = YAML.safe_load(File.open(GLFM_EXAMPLE_STATUS_YML_PATH), symbolize_names: true) || {} validate_glfm_example_status_yml(glfm_examples_statuses) write_examples_index_yml(all_examples) validate_glfm_config_file_example_names(all_examples) write_markdown_yml(all_examples) if skip_static_and_wysiwyg output("Skipping static/WYSIWYG HTML and prosemirror JSON generation...") return end # NOTE: We pass the INPUT_MARKDOWN_YML_PATH and INPUT_METADATA_YML_PATH via # environment variables to the static/wysiwyg HTML generation scripts. This is because they # are implemented as subprocesses which invoke rspec/jest scripts, and rspec/jest do not make # it straightforward to pass arguments via the command line. ENV['INPUT_MARKDOWN_YML_PATH'], ENV['INPUT_METADATA_YML_PATH'] = copy_tempfiles_for_subprocesses static_html_hash = generate_static_html wysiwyg_html_and_json_hash = generate_wysiwyg_html_and_json write_html_yml(all_examples, static_html_hash, wysiwyg_html_and_json_hash, glfm_examples_statuses) write_prosemirror_json_yml(all_examples, wysiwyg_html_and_json_hash, glfm_examples_statuses) end def validate_glfm_example_status_yml(glfm_examples_statuses) glfm_examples_statuses.each do |example_name, statuses| next unless statuses && statuses[:skip_update_example_snapshots] && statuses.any? { |key, value| key.to_s.include?('skip_update_example_snapshot_') && !!value } raise "Error: '#{example_name}' must not have any 'skip_update_example_snapshot_*' values specified " \ "if 'skip_update_example_snapshots' is truthy" end end def validate_glfm_config_file_example_names(all_examples) valid_example_names = all_examples.pluck(:name).map(&:to_sym) # rubocop:disable CodeReuse/ActiveRecord # We are re-reading GLFM_EXAMPLE_STATUS_YML_PATH here, but that's OK, it's a small file, and rereading it # allows us to handle it in the same loop as the other manually-curated config files. [ GLFM_EXAMPLE_STATUS_YML_PATH, GLFM_EXAMPLE_METADATA_YML_PATH, GLFM_EXAMPLE_NORMALIZATIONS_YML_PATH ].each do |path| output("Reading #{path}...") io = File.open(path) config_file_examples = YAML.safe_load(io, symbolize_names: true, aliases: true) # Skip validation if the config file is empty next unless config_file_examples config_file_example_names = config_file_examples.keys # Validate that all example names exist in the config file refer to an existing example in `examples_index.yml`, # unless it starts with the special prefix `00_`, which is preserved for usage as YAML anchors. invalid_name = config_file_example_names.detect do |name| !name.start_with?('00_') && valid_example_names.exclude?(name) end next unless invalid_name # NOTE: The extra spaces before punctuation in the error message allows for easier copy/pasting of the paths. err_msg = <<~TXT Error in input specification config file #{path} : Config file entry named #{invalid_name} does not have a corresponding example entry in #{ES_EXAMPLES_INDEX_YML_PATH} . Please delete or rename this config file entry. If this entry is being used as a YAML anchor, please rename it to start with '00_'. TXT raise err_msg end end def write_examples_index_yml(all_examples) generate_and_write_for_all_examples( all_examples, ES_EXAMPLES_INDEX_YML_PATH, literal_scalars: false ) do |example, hash| name = example.fetch(:name).to_sym hash[name] = { 'spec_txt_example_position' => example.fetch(:example), 'source_specification' => source_specification_for_extensions(example.fetch(:extensions)) } end end def source_specification_for_extensions(extensions) unprocessed_extensions = extensions.map(&:to_sym) unprocessed_extensions.delete(:disabled) source_specification = if unprocessed_extensions.empty? 'commonmark' elsif unprocessed_extensions.include?(:gitlab) unprocessed_extensions.delete(:gitlab) 'gitlab' else 'github' end # We should only be left with at most one extension, which is an optional name for the example raise "Error: Invalid extension(s) found: #{unprocessed_extensions.join(', ')}" if unprocessed_extensions.size > 1 source_specification end def write_markdown_yml(all_examples) generate_and_write_for_all_examples(all_examples, ES_MARKDOWN_YML_PATH) do |example, hash| name = example.fetch(:name).to_sym hash[name] = example.fetch(:markdown) end end def copy_tempfiles_for_subprocesses # NOTE: We must copy the input YAML files used by the `render_static_html.rb` # and `render_wysiwyg_html_and_json.js` scripts to a separate temporary file in order for # the scripts to read them, because the scripts are run in # separate subprocesses, and during unit testing we are unable to substitute the mock # StringIO when reading the input files in the subprocess. { ES_MARKDOWN_YML_PATH => MARKDOWN_TEMPFILE_BASENAME, GLFM_EXAMPLE_METADATA_YML_PATH => METADATA_TEMPFILE_BASENAME }.map do |original_file_path, tempfile_basename| Dir::Tmpname.create(tempfile_basename) do |path| io = File.open(original_file_path) io.seek(0) # rewind the file. This is necessary when testing with a mock StringIO contents = io.read write_file(path, contents) end end end def generate_static_html output("Generating static HTML from markdown examples...") # NOTE 1: We shell out to perform the conversion of markdown to static HTML by invoking a # separate subprocess. This allows us to avoid using the Rails API or environment in this # script, which makes developing and running the unit tests for this script much faster, # because they can use 'fast_spec_helper' which does not require the entire Rails environment. # NOTE 2: We run this as an RSpec process, for the same reasons we run via Jest process below: # because that's the easiest way to ensure a reliable, fully-configured environment in which # to execute the markdown-processing logic. Also, in the static/backend case, Rspec # provides the easiest and most reliable way to generate example data via Factorybot # creation of stable model records. This ensures consistent snapshot values across # machines/environments. # Dir::Tmpname.create requires a block, but we are using the non-block form to get the path # via the return value, so we pass an empty block to avoid an error. static_html_tempfile_path = Dir::Tmpname.create(STATIC_HTML_TEMPFILE_BASENAME) {} ENV['OUTPUT_STATIC_HTML_TEMPFILE_PATH'] = static_html_tempfile_path cmd = %(bin/rspec #{__dir__}/render_static_html.rb) run_external_cmd(cmd) output("Reading generated static HTML from tempfile #{static_html_tempfile_path}...") YAML.safe_load(File.open(static_html_tempfile_path), symbolize_names: true) end def generate_wysiwyg_html_and_json output("Generating WYSIWYG HTML and prosemirror JSON from markdown examples...") # Dir::Tmpname.create requires a block, but we are using the non-block form to get the path # via the return value, so we pass an empty block to avoid an error. wysiwyg_html_and_json_tempfile_path = Dir::Tmpname.create(WYSIWYG_HTML_AND_JSON_TEMPFILE_BASENAME) {} ENV['OUTPUT_WYSIWYG_HTML_AND_JSON_TEMPFILE_PATH'] = wysiwyg_html_and_json_tempfile_path cmd = "yarn jest --testMatch '**/render_wysiwyg_html_and_json.js' #{__dir__}/render_wysiwyg_html_and_json.js" run_external_cmd(cmd) output("Reading generated WYSIWYG HTML and prosemirror JSON from tempfile " \ "#{wysiwyg_html_and_json_tempfile_path}...") YAML.safe_load(File.open(wysiwyg_html_and_json_tempfile_path), symbolize_names: true) end def write_html_yml(all_examples, static_html_hash, wysiwyg_html_and_json_hash, glfm_examples_statuses) generate_and_write_for_all_examples( all_examples, ES_HTML_YML_PATH, glfm_examples_statuses: glfm_examples_statuses ) do |example, hash, existing_hash| name = example.fetch(:name).to_sym example_statuses = glfm_examples_statuses[name] || {} static = if example_statuses[:skip_update_example_snapshot_html_static] existing_hash.dig(name, :static) else static_html_hash[name] end wysiwyg = if example_statuses[:skip_update_example_snapshot_html_wysiwyg] existing_hash.dig(name, :wysiwyg) else wysiwyg_html_and_json_hash.dig(name, :html) end hash[name] = { 'canonical' => example.fetch(:html), 'static' => static, 'wysiwyg' => wysiwyg }.compact # Do not assign nil values end end def write_prosemirror_json_yml(all_examples, wysiwyg_html_and_json_hash, glfm_examples_statuses) generate_and_write_for_all_examples( all_examples, ES_PROSEMIRROR_JSON_YML_PATH, glfm_examples_statuses: glfm_examples_statuses ) do |example, hash, existing_hash| name = example.fetch(:name).to_sym json = if glfm_examples_statuses.dig(name, :skip_update_example_snapshot_prosemirror_json) existing_hash[name] else wysiwyg_html_and_json_hash.dig(name, :json) end # Do not assign nil values hash[name] = json if json end end def generate_and_write_for_all_examples( all_examples, output_file_path, glfm_examples_statuses: {}, literal_scalars: true ) preserve_existing = !glfm_examples_statuses.empty? output("#{preserve_existing ? 'Creating/Updating' : 'Creating/Overwriting'} #{output_file_path}...") existing_hash = preserve_existing ? YAML.safe_load(File.open(output_file_path), symbolize_names: true) : {} output_hash = all_examples.each_with_object({}) do |example, hash| name = example.fetch(:name).to_sym if (reason = glfm_examples_statuses.dig(name, :skip_update_example_snapshots)) # Output the reason for skipping the example, but only once, not multiple times for each file output("Skipping '#{name}'. Reason: #{reason}") unless glfm_examples_statuses.dig(name, :already_printed) # We just store the `:already_printed` flag in the hash entry itself. Then we # don't need an instance variable to keep the state, and this can remain a pure function ;) glfm_examples_statuses[name][:already_printed] = true # Copy over the existing example only if it exists and preserve_existing is true, otherwise omit this example # noinspection RubyScope hash[name] = existing_hash[name] if existing_hash[name] next end yield(example, hash, existing_hash) end yaml_string = dump_yaml_with_formatting(output_hash, literal_scalars: literal_scalars) write_file(output_file_path, yaml_string) end end end