From 1df89cbc21fa598f4de38eeaea53ec0059e274e6 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen <4643818-rmlarsen1@users.noreply.gitlab.com> Date: Tue, 31 Mar 2026 19:10:34 -0700 Subject: [PATCH] Right-size CI runners to reduce waste and shuffle build order to avoid OOM libeigen/eigen!2367 Co-authored-by: Rasmus Munk Larsen --- ci/build.linux.gitlab-ci.yml | 8 +++- ci/scripts/build.linux.script.sh | 57 ++++++++++++++++++++++++++++- ci/scripts/build.windows.script.ps1 | 4 +- ci/scripts/test.linux.script.sh | 7 +++- ci/test.linux.gitlab-ci.yml | 12 ++++++ 5 files changed, 82 insertions(+), 6 deletions(-) diff --git a/ci/build.linux.gitlab-ci.yml b/ci/build.linux.gitlab-ci.yml index 3a5c651ba..4e5295a41 100644 --- a/ci/build.linux.gitlab-ci.yml +++ b/ci/build.linux.gitlab-ci.yml @@ -104,6 +104,8 @@ build:linux:docs: EIGEN_CI_C_COMPILER: clang EIGEN_CI_CXX_COMPILER: clang++ EIGEN_CI_BEFORE_SCRIPT: ". ci/scripts/build_and_install_doxygen.sh Release_1_13_2" + tags: + - saas-linux-medium-amd64 rules: - if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen" - if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen" @@ -154,8 +156,10 @@ build:linux:x86-64:nvhpc-26.1:default: EIGEN_CI_INSTALL: "" EIGEN_CI_CROSS_INSTALL: "" # NVHPC's compiler frontend is very memory-hungry with template-heavy code. - # Limit parallelism to avoid OOM kills from the kernel. - EIGEN_CI_BUILD_JOBS: "8" + # The 2xlarge runner has 128 GB, so 16 jobs gives ~8 GB per process. + # The shuffled-batch build strategy spreads memory-hungry targets across + # batches, preventing simultaneous OOM-prone compilations. + EIGEN_CI_BUILD_JOBS: "16" ######## CUDA ################################################################## diff --git a/ci/scripts/build.linux.script.sh b/ci/scripts/build.linux.script.sh index 38e43ab94..b1adb7d53 100755 --- a/ci/scripts/build.linux.script.sh +++ b/ci/scripts/build.linux.script.sh @@ -29,7 +29,62 @@ jobs="" if [[ -n "${EIGEN_CI_BUILD_JOBS}" ]]; then jobs="-j${EIGEN_CI_BUILD_JOBS}" fi -cmake --build . ${target} -- -k0 ${jobs} || cmake --build . ${target} -- -k0 -j1 + +# For phony meta-targets (e.g. buildtests), shuffle the dependency list and +# build in batches so that memory-hungry compilations (like bdcsvd with +# nvc++) are spread out instead of all running at once. Ninja ignores the +# command-line target order and schedules by its dependency graph, so we +# must feed it small batches to actually influence scheduling. +# Falls back to the normal build if the target is not a phony or if +# ninja/shuf are not available. +batch_size=${EIGEN_CI_BUILD_BATCH_SIZE:-48} +shuffled=false +if [[ -n "${EIGEN_CI_BUILD_TARGET}" ]] && command -v ninja >/dev/null 2>&1; then + # Suppress xtrace while extracting and shuffling the target list + # to avoid dumping ~1200 lines to the CI log. + { set +x; } 2>/dev/null + deps=$(ninja -t query "${EIGEN_CI_BUILD_TARGET}" 2>/dev/null \ + | awk '/^ input:/{found=1; next} /^ outputs:/{found=0} found && /^ /{print $1}') + # Deterministic shuffle: hash each target name and sort by hash. + # Stable across runs (helps ninja's .ninja_log and build caches), + # portable (no shuf dependency), and spreads same-family targets apart. + # Uses Knuth's multiplicative hash (golden-ratio prime 2654435761) for + # good avalanche — similar names like bdcsvd_1..bdcsvd_51 get widely + # dispersed instead of clustering together. + shuffled_deps=$(echo "$deps" | awk ' + BEGIN { for(i=0;i<128;i++) ord[sprintf("%c",i)]=i } + { h=0 + for(i=1;i<=length($0);i++) h=(h+ord[substr($0,i,1)])*2654435761%2147483647 + printf "%010d %s\n",h,$0 }' | sort | sed 's/^[^ ]* //') + if [[ -n "$shuffled_deps" ]]; then + ndeps=$(echo "$shuffled_deps" | wc -l) + echo "Building ${ndeps} targets in batches of ${batch_size}" + shuffled=true + # Build in batches: ninja parallelises within each batch, but batches + # run sequentially so memory-hungry targets from different families + # don't pile up simultaneously. Track failures so we can report the + # right exit code at the end. + # Note: xtrace stays off to avoid dumping the full target list. + # Use process substitution so the while loop runs in the current + # shell and build_failed propagates. + batch_num=0 + build_failed=false + while IFS= read -r batch; do + batch_num=$((batch_num + 1)) + echo "=== Batch ${batch_num} ===" + ninja -k0 ${jobs} ${batch} || ninja -k0 -j1 ${batch} || build_failed=true + done < <(echo "$shuffled_deps" | xargs -n "${batch_size}") + if [[ "$build_failed" == "true" ]]; then + echo "Some batches failed." + exit 1 + fi + fi + set -x +fi + +if [[ "$shuffled" != "true" ]]; then + cmake --build . ${target} -- -k0 ${jobs} || cmake --build . ${target} -- -k0 -j1 +fi # Return to root directory. cd ${rootdir} diff --git a/ci/scripts/build.windows.script.ps1 b/ci/scripts/build.windows.script.ps1 index 2249d008d..8252d7782 100644 --- a/ci/scripts/build.windows.script.ps1 +++ b/ci/scripts/build.windows.script.ps1 @@ -5,8 +5,8 @@ $VS_INSTALL_DIR = &"${Env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\v # http://allen-mack.blogspot.com/2008/03/replace-visual-studio-command-prompt.html cmd.exe /c "`"${VS_INSTALL_DIR}\VC\Auxiliary\Build\vcvarsall.bat`" $EIGEN_CI_MSVC_ARCH -vcvars_ver=$EIGEN_CI_MSVC_VER & set" | foreach { - if ($_ -match "=") { - $v = $_.split("="); set-item -force -path "ENV:\$($v[0])" -value "$($v[1])" + if ($_ -match "^([^=]+)=(.*)$") { + set-item -force -LiteralPath "ENV:\$($Matches[1])" -value "$($Matches[2])" } } diff --git a/ci/scripts/test.linux.script.sh b/ci/scripts/test.linux.script.sh index dcf9cb646..26cf32c01 100755 --- a/ci/scripts/test.linux.script.sh +++ b/ci/scripts/test.linux.script.sh @@ -13,11 +13,16 @@ elif [[ ${EIGEN_CI_CTEST_LABEL} ]]; then target="-L ${EIGEN_CI_CTEST_LABEL}" fi +exclude="" +if [[ -n "${EIGEN_CI_CTEST_EXCLUDE}" ]]; then + exclude="-E ${EIGEN_CI_CTEST_EXCLUDE}" +fi + set +x EIGEN_CI_CTEST_PARALLEL=${EIGEN_CI_CTEST_PARALLEL:-${NPROC}} EIGEN_CI_CTEST_REPEAT=${EIGEN_CI_CTEST_REPEAT:-3} -ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${EIGEN_CI_CTEST_PARALLEL} --output-on-failure --no-compress-output --build-noclean ${target}" +ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${EIGEN_CI_CTEST_PARALLEL} --output-on-failure --no-compress-output --build-noclean ${target} ${exclude}" echo "Running initial tests..." if ${ctest_cmd} -T test; then diff --git a/ci/test.linux.gitlab-ci.yml b/ci/test.linux.gitlab-ci.yml index 6e9a47032..3c5d4f8fd 100644 --- a/ci/test.linux.gitlab-ci.yml +++ b/ci/test.linux.gitlab-ci.yml @@ -266,6 +266,10 @@ test:linux:cuda-12.2:clang-12: EIGEN_CI_CROSS_TARGET_TRIPLE: arm-linux-gnueabihf EIGEN_CI_CTEST_ARGS: --timeout 2000 EIGEN_CI_CTEST_PARALLEL: "4" + # Thread pool tests are too slow under qemu emulation. + EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool + tags: + - saas-linux-large-amd64 .test:linux:arm:gcc-10:default: extends: .test:linux:arm @@ -350,6 +354,10 @@ test:linux:aarch64:clang-14:default:unsupported: EIGEN_CI_CROSS_TARGET_TRIPLE: powerpc64le-linux-gnu EIGEN_CI_CTEST_ARGS: --timeout 2000 EIGEN_CI_CTEST_PARALLEL: "4" + # Thread pool tests are too slow under qemu emulation. + EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool + tags: + - saas-linux-large-amd64 .test:linux:ppc64le:gcc-14:default: extends: .test:linux:ppc64le @@ -393,6 +401,10 @@ test:linux:ppc64le:clang-16:default:unsupported: EIGEN_CI_CROSS_INSTALL: g++-14-loongarch64-linux-gnu qemu-user-static EIGEN_CI_CTEST_ARGS: --timeout 2000 EIGEN_CI_CTEST_PARALLEL: "4" + # Thread pool tests are too slow under qemu emulation. + EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool + tags: + - saas-linux-large-amd64 # GCC-14 (Ubuntu 24) .test:linux:loongarch64:gcc-14:default: