mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
CI: split NVHPC build and make fallback parallelism configurable
libeigen/eigen!2372 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
@@ -147,7 +147,9 @@ build:linux:cross:x86-64:clang-19:default:asan-ubsan:unsupported:
|
||||
# NVHPC (nvc++) uses NVIDIA's HPC SDK container image with the compilers
|
||||
# pre-installed. We override EIGEN_CI_INSTALL to avoid trying to apt-get
|
||||
# install the compiler.
|
||||
build:linux:x86-64:nvhpc-26.1:default:
|
||||
# Split into official/unsupported because nvc++ is so slow that the full
|
||||
# build exceeds the 3-hour GitLab SaaS shared-runner timeout.
|
||||
.build:linux:x86-64:nvhpc-26.1:
|
||||
extends: .build:linux:cross:x86-64
|
||||
image: nvcr.io/nvidia/nvhpc:26.1-devel-cuda13.1-ubuntu24.04
|
||||
variables:
|
||||
@@ -160,6 +162,17 @@ build:linux:x86-64:nvhpc-26.1:default:
|
||||
# The shuffled-batch build strategy spreads memory-hungry targets across
|
||||
# batches, preventing simultaneous OOM-prone compilations.
|
||||
EIGEN_CI_BUILD_JOBS: "16"
|
||||
EIGEN_CI_FALLBACK_JOBS: "4"
|
||||
|
||||
build:linux:x86-64:nvhpc-26.1:default:official:
|
||||
extends: .build:linux:x86-64:nvhpc-26.1
|
||||
variables:
|
||||
EIGEN_CI_BUILD_TARGET: BuildOfficial
|
||||
|
||||
build:linux:x86-64:nvhpc-26.1:default:unsupported:
|
||||
extends: .build:linux:x86-64:nvhpc-26.1
|
||||
variables:
|
||||
EIGEN_CI_BUILD_TARGET: BuildUnsupported
|
||||
|
||||
######## CUDA ##################################################################
|
||||
|
||||
|
||||
@@ -21,8 +21,7 @@ fi
|
||||
|
||||
# Builds (particularly gcc) sometimes get killed, potentially when running
|
||||
# out of resources. In that case, keep trying to build the remaining
|
||||
# targets (k0), then try to build again with a single thread (j1) to minimize
|
||||
# resource use.
|
||||
# targets (k0), then retry with reduced parallelism to minimize resource use.
|
||||
# EIGEN_CI_BUILD_JOBS can be set to limit parallelism for memory-hungry
|
||||
# compilers (e.g. NVHPC).
|
||||
jobs=""
|
||||
@@ -30,6 +29,9 @@ if [[ -n "${EIGEN_CI_BUILD_JOBS}" ]]; then
|
||||
jobs="-j${EIGEN_CI_BUILD_JOBS}"
|
||||
fi
|
||||
|
||||
# Fallback parallelism for retry builds after a failure (default: 2).
|
||||
fallback_jobs="-j${EIGEN_CI_FALLBACK_JOBS:-2}"
|
||||
|
||||
# For phony meta-targets (e.g. buildtests), shuffle the dependency list and
|
||||
# build in batches so that memory-hungry compilations (like bdcsvd with
|
||||
# nvc++) are spread out instead of all running at once. Ninja ignores the
|
||||
@@ -45,6 +47,16 @@ if [[ -n "${EIGEN_CI_BUILD_TARGET}" ]] && command -v ninja >/dev/null 2>&1; then
|
||||
{ set +x; } 2>/dev/null
|
||||
deps=$(ninja -t query "${EIGEN_CI_BUILD_TARGET}" 2>/dev/null \
|
||||
| awk '/^ input:/{found=1; next} /^ outputs:/{found=0} found && /^ /{print $1}')
|
||||
# CMake custom targets like BuildOfficial have an intermediate phony
|
||||
# (e.g. test/BuildOfficial) that holds the real dependencies. If we
|
||||
# got exactly one dep, resolve it one more level.
|
||||
if [[ $(echo "$deps" | wc -l) -eq 1 ]] && [[ -n "$deps" ]]; then
|
||||
inner=$(ninja -t query "$deps" 2>/dev/null \
|
||||
| awk '/^ input:/{found=1; next} /^ outputs:/{found=0} found && /^ /{print $1}')
|
||||
if [[ -n "$inner" ]]; then
|
||||
deps="$inner"
|
||||
fi
|
||||
fi
|
||||
# Deterministic shuffle: hash each target name and sort by hash.
|
||||
# Stable across runs (helps ninja's .ninja_log and build caches),
|
||||
# portable (no shuf dependency), and spreads same-family targets apart.
|
||||
@@ -72,7 +84,7 @@ if [[ -n "${EIGEN_CI_BUILD_TARGET}" ]] && command -v ninja >/dev/null 2>&1; then
|
||||
while IFS= read -r batch; do
|
||||
batch_num=$((batch_num + 1))
|
||||
echo "=== Batch ${batch_num} ==="
|
||||
ninja -k0 ${jobs} ${batch} || ninja -k0 -j1 ${batch} || build_failed=true
|
||||
ninja -k0 ${jobs} ${batch} || ninja -k0 ${fallback_jobs} ${batch} || build_failed=true
|
||||
done < <(echo "$shuffled_deps" | xargs -n "${batch_size}")
|
||||
if [[ "$build_failed" == "true" ]]; then
|
||||
echo "Some batches failed."
|
||||
@@ -83,7 +95,7 @@ if [[ -n "${EIGEN_CI_BUILD_TARGET}" ]] && command -v ninja >/dev/null 2>&1; then
|
||||
fi
|
||||
|
||||
if [[ "$shuffled" != "true" ]]; then
|
||||
cmake --build . ${target} -- -k0 ${jobs} || cmake --build . ${target} -- -k0 -j1
|
||||
cmake --build . ${target} -- -k0 ${jobs} || cmake --build . ${target} -- -k0 ${fallback_jobs}
|
||||
fi
|
||||
|
||||
# Return to root directory.
|
||||
|
||||
@@ -22,7 +22,7 @@ set +x
|
||||
|
||||
EIGEN_CI_CTEST_PARALLEL=${EIGEN_CI_CTEST_PARALLEL:-${NPROC}}
|
||||
EIGEN_CI_CTEST_REPEAT=${EIGEN_CI_CTEST_REPEAT:-3}
|
||||
ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${EIGEN_CI_CTEST_PARALLEL} --output-on-failure --no-compress-output --no-tests=error --build-noclean ${target} ${exclude}"
|
||||
ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${EIGEN_CI_CTEST_PARALLEL} --output-on-failure --no-compress-output --build-noclean ${target} ${exclude}"
|
||||
|
||||
echo "Running initial tests..."
|
||||
if ${ctest_cmd} -T test; then
|
||||
|
||||
@@ -13,7 +13,7 @@ if (${EIGEN_CI_CTEST_REGEX}) {
|
||||
$target = "-L","${EIGEN_CI_CTEST_LABEL}"
|
||||
}
|
||||
|
||||
$ctest_cmd = { ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} --output-on-failure --no-compress-output --no-tests=error --build-noclean ${target} }
|
||||
$ctest_cmd = { ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} --output-on-failure --no-compress-output --build-noclean ${target} }
|
||||
|
||||
Write-Host "Running initial tests..."
|
||||
|
||||
|
||||
@@ -215,17 +215,18 @@ test:linux:x86-64:clang-19:default:asan-ubsan:unsupported:
|
||||
.test:linux:x86-64:nvhpc-26.1:default:
|
||||
extends: .test:linux:x86-64
|
||||
image: nvcr.io/nvidia/nvhpc:26.1-devel-cuda13.1-ubuntu24.04
|
||||
needs: [ build:linux:x86-64:nvhpc-26.1:default ]
|
||||
variables:
|
||||
EIGEN_CI_INSTALL: ""
|
||||
|
||||
test:linux:x86-64:nvhpc-26.1:default:official:
|
||||
extends: .test:linux:x86-64:nvhpc-26.1:default
|
||||
needs: [ build:linux:x86-64:nvhpc-26.1:default:official ]
|
||||
variables:
|
||||
EIGEN_CI_CTEST_LABEL: Official
|
||||
|
||||
test:linux:x86-64:nvhpc-26.1:default:unsupported:
|
||||
extends: .test:linux:x86-64:nvhpc-26.1:default
|
||||
needs: [ build:linux:x86-64:nvhpc-26.1:default:unsupported ]
|
||||
variables:
|
||||
EIGEN_CI_CTEST_LABEL: Unsupported
|
||||
|
||||
|
||||
Reference in New Issue
Block a user