mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Right-size CI runners to reduce waste and shuffle build order to avoid OOM
libeigen/eigen!2367 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
@@ -104,6 +104,8 @@ build:linux:docs:
|
||||
EIGEN_CI_C_COMPILER: clang
|
||||
EIGEN_CI_CXX_COMPILER: clang++
|
||||
EIGEN_CI_BEFORE_SCRIPT: ". ci/scripts/build_and_install_doxygen.sh Release_1_13_2"
|
||||
tags:
|
||||
- saas-linux-medium-amd64
|
||||
rules:
|
||||
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
|
||||
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
|
||||
@@ -154,8 +156,10 @@ build:linux:x86-64:nvhpc-26.1:default:
|
||||
EIGEN_CI_INSTALL: ""
|
||||
EIGEN_CI_CROSS_INSTALL: ""
|
||||
# NVHPC's compiler frontend is very memory-hungry with template-heavy code.
|
||||
# Limit parallelism to avoid OOM kills from the kernel.
|
||||
EIGEN_CI_BUILD_JOBS: "8"
|
||||
# The 2xlarge runner has 128 GB, so 16 jobs gives ~8 GB per process.
|
||||
# The shuffled-batch build strategy spreads memory-hungry targets across
|
||||
# batches, preventing simultaneous OOM-prone compilations.
|
||||
EIGEN_CI_BUILD_JOBS: "16"
|
||||
|
||||
######## CUDA ##################################################################
|
||||
|
||||
|
||||
@@ -29,7 +29,62 @@ jobs=""
|
||||
if [[ -n "${EIGEN_CI_BUILD_JOBS}" ]]; then
|
||||
jobs="-j${EIGEN_CI_BUILD_JOBS}"
|
||||
fi
|
||||
cmake --build . ${target} -- -k0 ${jobs} || cmake --build . ${target} -- -k0 -j1
|
||||
|
||||
# For phony meta-targets (e.g. buildtests), shuffle the dependency list and
|
||||
# build in batches so that memory-hungry compilations (like bdcsvd with
|
||||
# nvc++) are spread out instead of all running at once. Ninja ignores the
|
||||
# command-line target order and schedules by its dependency graph, so we
|
||||
# must feed it small batches to actually influence scheduling.
|
||||
# Falls back to the normal build if the target is not a phony or if
|
||||
# ninja/shuf are not available.
|
||||
batch_size=${EIGEN_CI_BUILD_BATCH_SIZE:-48}
|
||||
shuffled=false
|
||||
if [[ -n "${EIGEN_CI_BUILD_TARGET}" ]] && command -v ninja >/dev/null 2>&1; then
|
||||
# Suppress xtrace while extracting and shuffling the target list
|
||||
# to avoid dumping ~1200 lines to the CI log.
|
||||
{ set +x; } 2>/dev/null
|
||||
deps=$(ninja -t query "${EIGEN_CI_BUILD_TARGET}" 2>/dev/null \
|
||||
| awk '/^ input:/{found=1; next} /^ outputs:/{found=0} found && /^ /{print $1}')
|
||||
# Deterministic shuffle: hash each target name and sort by hash.
|
||||
# Stable across runs (helps ninja's .ninja_log and build caches),
|
||||
# portable (no shuf dependency), and spreads same-family targets apart.
|
||||
# Uses Knuth's multiplicative hash (golden-ratio prime 2654435761) for
|
||||
# good avalanche — similar names like bdcsvd_1..bdcsvd_51 get widely
|
||||
# dispersed instead of clustering together.
|
||||
shuffled_deps=$(echo "$deps" | awk '
|
||||
BEGIN { for(i=0;i<128;i++) ord[sprintf("%c",i)]=i }
|
||||
{ h=0
|
||||
for(i=1;i<=length($0);i++) h=(h+ord[substr($0,i,1)])*2654435761%2147483647
|
||||
printf "%010d %s\n",h,$0 }' | sort | sed 's/^[^ ]* //')
|
||||
if [[ -n "$shuffled_deps" ]]; then
|
||||
ndeps=$(echo "$shuffled_deps" | wc -l)
|
||||
echo "Building ${ndeps} targets in batches of ${batch_size}"
|
||||
shuffled=true
|
||||
# Build in batches: ninja parallelises within each batch, but batches
|
||||
# run sequentially so memory-hungry targets from different families
|
||||
# don't pile up simultaneously. Track failures so we can report the
|
||||
# right exit code at the end.
|
||||
# Note: xtrace stays off to avoid dumping the full target list.
|
||||
# Use process substitution so the while loop runs in the current
|
||||
# shell and build_failed propagates.
|
||||
batch_num=0
|
||||
build_failed=false
|
||||
while IFS= read -r batch; do
|
||||
batch_num=$((batch_num + 1))
|
||||
echo "=== Batch ${batch_num} ==="
|
||||
ninja -k0 ${jobs} ${batch} || ninja -k0 -j1 ${batch} || build_failed=true
|
||||
done < <(echo "$shuffled_deps" | xargs -n "${batch_size}")
|
||||
if [[ "$build_failed" == "true" ]]; then
|
||||
echo "Some batches failed."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
set -x
|
||||
fi
|
||||
|
||||
if [[ "$shuffled" != "true" ]]; then
|
||||
cmake --build . ${target} -- -k0 ${jobs} || cmake --build . ${target} -- -k0 -j1
|
||||
fi
|
||||
|
||||
# Return to root directory.
|
||||
cd ${rootdir}
|
||||
|
||||
@@ -5,8 +5,8 @@ $VS_INSTALL_DIR = &"${Env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\v
|
||||
# http://allen-mack.blogspot.com/2008/03/replace-visual-studio-command-prompt.html
|
||||
cmd.exe /c "`"${VS_INSTALL_DIR}\VC\Auxiliary\Build\vcvarsall.bat`" $EIGEN_CI_MSVC_ARCH -vcvars_ver=$EIGEN_CI_MSVC_VER & set" |
|
||||
foreach {
|
||||
if ($_ -match "=") {
|
||||
$v = $_.split("="); set-item -force -path "ENV:\$($v[0])" -value "$($v[1])"
|
||||
if ($_ -match "^([^=]+)=(.*)$") {
|
||||
set-item -force -LiteralPath "ENV:\$($Matches[1])" -value "$($Matches[2])"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,11 +13,16 @@ elif [[ ${EIGEN_CI_CTEST_LABEL} ]]; then
|
||||
target="-L ${EIGEN_CI_CTEST_LABEL}"
|
||||
fi
|
||||
|
||||
exclude=""
|
||||
if [[ -n "${EIGEN_CI_CTEST_EXCLUDE}" ]]; then
|
||||
exclude="-E ${EIGEN_CI_CTEST_EXCLUDE}"
|
||||
fi
|
||||
|
||||
set +x
|
||||
|
||||
EIGEN_CI_CTEST_PARALLEL=${EIGEN_CI_CTEST_PARALLEL:-${NPROC}}
|
||||
EIGEN_CI_CTEST_REPEAT=${EIGEN_CI_CTEST_REPEAT:-3}
|
||||
ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${EIGEN_CI_CTEST_PARALLEL} --output-on-failure --no-compress-output --build-noclean ${target}"
|
||||
ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${EIGEN_CI_CTEST_PARALLEL} --output-on-failure --no-compress-output --build-noclean ${target} ${exclude}"
|
||||
|
||||
echo "Running initial tests..."
|
||||
if ${ctest_cmd} -T test; then
|
||||
|
||||
@@ -266,6 +266,10 @@ test:linux:cuda-12.2:clang-12:
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: arm-linux-gnueabihf
|
||||
EIGEN_CI_CTEST_ARGS: --timeout 2000
|
||||
EIGEN_CI_CTEST_PARALLEL: "4"
|
||||
# Thread pool tests are too slow under qemu emulation.
|
||||
EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool
|
||||
tags:
|
||||
- saas-linux-large-amd64
|
||||
|
||||
.test:linux:arm:gcc-10:default:
|
||||
extends: .test:linux:arm
|
||||
@@ -350,6 +354,10 @@ test:linux:aarch64:clang-14:default:unsupported:
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: powerpc64le-linux-gnu
|
||||
EIGEN_CI_CTEST_ARGS: --timeout 2000
|
||||
EIGEN_CI_CTEST_PARALLEL: "4"
|
||||
# Thread pool tests are too slow under qemu emulation.
|
||||
EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool
|
||||
tags:
|
||||
- saas-linux-large-amd64
|
||||
|
||||
.test:linux:ppc64le:gcc-14:default:
|
||||
extends: .test:linux:ppc64le
|
||||
@@ -393,6 +401,10 @@ test:linux:ppc64le:clang-16:default:unsupported:
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-loongarch64-linux-gnu qemu-user-static
|
||||
EIGEN_CI_CTEST_ARGS: --timeout 2000
|
||||
EIGEN_CI_CTEST_PARALLEL: "4"
|
||||
# Thread pool tests are too slow under qemu emulation.
|
||||
EIGEN_CI_CTEST_EXCLUDE: tensor_thread_pool
|
||||
tags:
|
||||
- saas-linux-large-amd64
|
||||
|
||||
# GCC-14 (Ubuntu 24)
|
||||
.test:linux:loongarch64:gcc-14:default:
|
||||
|
||||
Reference in New Issue
Block a user