Right-size CI runners to reduce waste and shuffle build order to avoid OOM

libeigen/eigen!2367

Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
Rasmus Munk Larsen
2026-03-31 19:10:34 -07:00
parent b54640df19
commit 1df89cbc21
5 changed files with 82 additions and 6 deletions

View File

@@ -104,6 +104,8 @@ build:linux:docs:
EIGEN_CI_C_COMPILER: clang
EIGEN_CI_CXX_COMPILER: clang++
EIGEN_CI_BEFORE_SCRIPT: ". ci/scripts/build_and_install_doxygen.sh Release_1_13_2"
tags:
- saas-linux-medium-amd64
rules:
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
@@ -154,8 +156,10 @@ build:linux:x86-64:nvhpc-26.1:default:
EIGEN_CI_INSTALL: ""
EIGEN_CI_CROSS_INSTALL: ""
# NVHPC's compiler frontend is very memory-hungry with template-heavy code.
# Limit parallelism to avoid OOM kills from the kernel.
EIGEN_CI_BUILD_JOBS: "8"
# The 2xlarge runner has 128 GB, so 16 jobs gives ~8 GB per process.
# The shuffled-batch build strategy spreads memory-hungry targets across
# batches, preventing simultaneous OOM-prone compilations.
EIGEN_CI_BUILD_JOBS: "16"
######## CUDA ##################################################################

View File

@@ -29,7 +29,62 @@ jobs=""
if [[ -n "${EIGEN_CI_BUILD_JOBS}" ]]; then
jobs="-j${EIGEN_CI_BUILD_JOBS}"
fi
cmake --build . ${target} -- -k0 ${jobs} || cmake --build . ${target} -- -k0 -j1
# For phony meta-targets (e.g. buildtests), shuffle the dependency list and
# build in batches so that memory-hungry compilations (like bdcsvd with
# nvc++) are spread out instead of all running at once. Ninja ignores the
# command-line target order and schedules by its dependency graph, so we
# must feed it small batches to actually influence scheduling.
# Falls back to the normal build if the target is not a phony or if
# ninja/shuf are not available.
batch_size=${EIGEN_CI_BUILD_BATCH_SIZE:-48}
shuffled=false
if [[ -n "${EIGEN_CI_BUILD_TARGET}" ]] && command -v ninja >/dev/null 2>&1; then
# Suppress xtrace while extracting and shuffling the target list
# to avoid dumping ~1200 lines to the CI log.
{ set +x; } 2>/dev/null
deps=$(ninja -t query "${EIGEN_CI_BUILD_TARGET}" 2>/dev/null \
| awk '/^ input:/{found=1; next} /^ outputs:/{found=0} found && /^ /{print $1}')
# Deterministic shuffle: hash each target name and sort by hash.
# Stable across runs (helps ninja's .ninja_log and build caches),
# portable (no shuf dependency), and spreads same-family targets apart.
# Uses Knuth's multiplicative hash (golden-ratio prime 2654435761) for
# good avalanche — similar names like bdcsvd_1..bdcsvd_51 get widely
# dispersed instead of clustering together.
shuffled_deps=$(echo "$deps" | awk '
BEGIN { for(i=0;i<128;i++) ord[sprintf("%c",i)]=i }
{ h=0
for(i=1;i<=length($0);i++) h=(h+ord[substr($0,i,1)])*2654435761%2147483647
printf "%010d %s\n",h,$0 }' | sort | sed 's/^[^ ]* //')
if [[ -n "$shuffled_deps" ]]; then
ndeps=$(echo "$shuffled_deps" | wc -l)
echo "Building ${ndeps} targets in batches of ${batch_size}"
shuffled=true
# Build in batches: ninja parallelises within each batch, but batches
# run sequentially so memory-hungry targets from different families
# don't pile up simultaneously. Track failures so we can report the
# right exit code at the end.
# Note: xtrace stays off to avoid dumping the full target list.
# Use process substitution so the while loop runs in the current
# shell and build_failed propagates.
batch_num=0
build_failed=false
while IFS= read -r batch; do
batch_num=$((batch_num + 1))
echo "=== Batch ${batch_num} ==="
ninja -k0 ${jobs} ${batch} || ninja -k0 -j1 ${batch} || build_failed=true
done < <(echo "$shuffled_deps" | xargs -n "${batch_size}")
if [[ "$build_failed" == "true" ]]; then
echo "Some batches failed."
exit 1
fi
fi
set -x
fi
if [[ "$shuffled" != "true" ]]; then
cmake --build . ${target} -- -k0 ${jobs} || cmake --build . ${target} -- -k0 -j1
fi
# Return to root directory.
cd ${rootdir}

View File

@@ -5,8 +5,8 @@ $VS_INSTALL_DIR = &"${Env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\v
# http://allen-mack.blogspot.com/2008/03/replace-visual-studio-command-prompt.html
cmd.exe /c "`"${VS_INSTALL_DIR}\VC\Auxiliary\Build\vcvarsall.bat`" $EIGEN_CI_MSVC_ARCH -vcvars_ver=$EIGEN_CI_MSVC_VER & set" |
foreach {
if ($_ -match "=") {
$v = $_.split("="); set-item -force -path "ENV:\$($v[0])" -value "$($v[1])"
if ($_ -match "^([^=]+)=(.*)$") {
set-item -force -LiteralPath "ENV:\$($Matches[1])" -value "$($Matches[2])"
}
}

View File

@@ -13,11 +13,16 @@ elif [[ ${EIGEN_CI_CTEST_LABEL} ]]; then
target="-L ${EIGEN_CI_CTEST_LABEL}"
fi
exclude=""
if [[ -n "${EIGEN_CI_CTEST_EXCLUDE}" ]]; then
exclude="-E ${EIGEN_CI_CTEST_EXCLUDE}"
fi
set +x
EIGEN_CI_CTEST_PARALLEL=${EIGEN_CI_CTEST_PARALLEL:-${NPROC}}
EIGEN_CI_CTEST_REPEAT=${EIGEN_CI_CTEST_REPEAT:-3}
ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${EIGEN_CI_CTEST_PARALLEL} --output-on-failure --no-compress-output --build-noclean ${target}"
ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${EIGEN_CI_CTEST_PARALLEL} --output-on-failure --no-compress-output --build-noclean ${target} ${exclude}"
echo "Running initial tests..."
if ${ctest_cmd} -T test; then

View File

@@ -266,6 +266,10 @@ test:linux:cuda-12.2:clang-12:
EIGEN_CI_CROSS_TARGET_TRIPLE: arm-linux-gnueabihf
EIGEN_CI_CTEST_ARGS: --timeout 2000
EIGEN_CI_CTEST_PARALLEL: "4"
# Thread pool tests are too slow under qemu emulation.
EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool
tags:
- saas-linux-large-amd64
.test:linux:arm:gcc-10:default:
extends: .test:linux:arm
@@ -350,6 +354,10 @@ test:linux:aarch64:clang-14:default:unsupported:
EIGEN_CI_CROSS_TARGET_TRIPLE: powerpc64le-linux-gnu
EIGEN_CI_CTEST_ARGS: --timeout 2000
EIGEN_CI_CTEST_PARALLEL: "4"
# Thread pool tests are too slow under qemu emulation.
EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool
tags:
- saas-linux-large-amd64
.test:linux:ppc64le:gcc-14:default:
extends: .test:linux:ppc64le
@@ -393,6 +401,10 @@ test:linux:ppc64le:clang-16:default:unsupported:
EIGEN_CI_CROSS_INSTALL: g++-14-loongarch64-linux-gnu qemu-user-static
EIGEN_CI_CTEST_ARGS: --timeout 2000
EIGEN_CI_CTEST_PARALLEL: "4"
# Thread pool tests are too slow under qemu emulation.
EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool
tags:
- saas-linux-large-amd64
# GCC-14 (Ubuntu 24)
.test:linux:loongarch64:gcc-14:default: