debian-mirror-gitlab/scripts/generate-rspec-foss-impact-pipeline

#!/usr/bin/env bash
set -euo pipefail

# Script to generate `rspec foss-impact` test child pipeline with dynamically parallelized jobs.

source scripts/utils.sh

rspec_matching_tests_foss_path="${1}"
pipeline_yml="${2}"

test_file_count=$(wc -w "${rspec_matching_tests_foss_path}" | awk '{ print $1 }')
echoinfo "test_file_count: ${test_file_count}"

if [[ "${test_file_count}" -eq 0 ]]; then
  skip_pipeline=".gitlab/ci/_skip.yml"

  echo "Using ${skip_pipeline} due to no impacted FOSS rspec tests to run"
  cp $skip_pipeline "$pipeline_yml"
  exit
fi

# As of 2022-09-01:
# $ find spec -type f | wc -l
#  12825
# and
# $ find ee/spec -type f | wc -l
#  5610
# which gives a total of 18435 test files (`number_of_tests_in_total_in_the_test_suite`).
#
# Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/) is 170183 seconds (`duration_of_the_test_suite_in_seconds`).
#
# This gives an approximate 170183 / 18435 = 9.2 seconds per test file (`average_test_file_duration_in_seconds`).
#
# If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`setup_duration_in_seconds`), then we need to give 7 minutes of testing to each test node (`optimal_test_runtime_duration_in_seconds`).
# (7 * 60) / 9.2 = 45.6
#
# So if we'd want to run the full test suites in 10 minutes (`optimal_test_job_duration_in_seconds`), we'd need to run at max 45 test file per nodes (`optimal_test_file_count_per_node`).
number_of_tests_in_total_in_the_test_suite=18435
duration_of_the_test_suite_in_seconds=170183
optimal_test_job_duration_in_seconds=600 # 10 minutes
setup_duration_in_seconds=180 # 3 minutes

optimal_test_runtime_duration_in_seconds=$(( optimal_test_job_duration_in_seconds - setup_duration_in_seconds ))
echoinfo "optimal_test_runtime_duration_in_seconds: ${optimal_test_runtime_duration_in_seconds}"

average_test_file_duration_in_seconds=$(( duration_of_the_test_suite_in_seconds / number_of_tests_in_total_in_the_test_suite ))
echoinfo "average_test_file_duration_in_seconds: ${average_test_file_duration_in_seconds}"

optimal_test_file_count_per_node=$(( optimal_test_runtime_duration_in_seconds / average_test_file_duration_in_seconds ))
echoinfo "optimal_test_file_count_per_node: ${optimal_test_file_count_per_node}"

node_count=$(( test_file_count / optimal_test_file_count_per_node ))
echoinfo "node_count: ${node_count}"

echoinfo "Optimal node count for 'rspec foss-impact' jobs is ${node_count}."

MAX_NODES_COUNT=50 # Maximum parallelization allowed by GitLab
if [[ "${node_count}" -gt "${MAX_NODES_COUNT}" ]]; then
  echoinfo "We don't want to parallelize 'rspec foss-impact' to more than ${MAX_NODES_COUNT} jobs for now! Decreasing the parallelization to ${MAX_NODES_COUNT}."
  node_count=${MAX_NODES_COUNT}
fi

ruby -rerb -e "puts ERB.new(File.read('.gitlab/ci/rails/rspec-foss-impact.gitlab-ci.yml.erb')).result_with_hash(parallel_value: ${node_count})" > "${pipeline_yml}"

echosuccess "Generated ${pipeline_yml} pipeline with following content:"
cat "${pipeline_yml}"