From 9071c1cd0784a39b3668d950c0ec6d8a5e9719df Mon Sep 17 00:00:00 2001 From: Charles Schlosser Date: Wed, 22 Oct 2025 04:51:51 +0000 Subject: [PATCH] CI enhancements: visual indication of flaky tests ### Reference issue ### What does this implement/fix? Currently, we run each test 3 times to account for flaky tests. Sometimes, the test fails so quickly that the random seed is the same for the subsequent test, which fails the exact same way. This MR uses a nanosecond seed which resolves the issue described above. Now, if the test does not pass on the first attempt but passes on the retries, the gitlab job status will be yellow but still be treated as a pass in the ci/cd pipeline. Hopefully, this means we will get more passes and help us identify room for improvement. ### Additional information See merge request libeigen/eigen!2025 (cherry picked from commit 40da5b64ce8db35e09f90733f67932c278808e24) --- ci/build.linux.gitlab-ci.yml | 4 ++-- ci/scripts/test.linux.script.sh | 32 ++++++++++++++++++---------- ci/scripts/test.windows.script.ps1 | 34 +++++++++++++++++++++--------- ci/test.linux.gitlab-ci.yml | 6 ++++-- ci/test.windows.gitlab-ci.yml | 3 ++- cmake/EigenSmokeTestList.cmake | 2 +- test/main.h | 12 ++++++++--- 7 files changed, 63 insertions(+), 30 deletions(-) diff --git a/ci/build.linux.gitlab-ci.yml b/ci/build.linux.gitlab-ci.yml index 7aadd2551..45eaeda69 100644 --- a/ci/build.linux.gitlab-ci.yml +++ b/ci/build.linux.gitlab-ci.yml @@ -290,7 +290,7 @@ build:linux:cross:x86-64:gcc-10:default:smoketest: rules: - if: $CI_PIPELINE_SOURCE == "merge_request_event" tags: - - saas-linux-small-amd64 + - saas-linux-medium-amd64 build:linux:cross:x86-64:clang-12:default:smoketest: extends: build:linux:cross:x86-64:clang-12:default @@ -299,4 +299,4 @@ build:linux:cross:x86-64:clang-12:default:smoketest: rules: - if: $CI_PIPELINE_SOURCE == "merge_request_event" tags: - - saas-linux-small-amd64 + - saas-linux-medium-amd64 diff --git a/ci/scripts/test.linux.script.sh b/ci/scripts/test.linux.script.sh index 0bd18a859..6e2e1c58c 100755 --- a/ci/scripts/test.linux.script.sh +++ b/ci/scripts/test.linux.script.sh @@ -13,19 +13,29 @@ elif [[ ${EIGEN_CI_CTEST_LABEL} ]]; then target="-L ${EIGEN_CI_CTEST_LABEL}" fi -# Repeat tests up to three times to ignore flakes. Do not re-run with -T test, -# otherwise we lose test results for those that passed. -# Note: starting with CMake 3.17, we can use --repeat until-pass:3, but we have -# no way of easily installing this on ppc64le. -ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} \ - --output-on-failure --no-compress-output \ - --build-no-clean -T test ${target} || \ - ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} \ - --output-on-failure --no-compress-output --rerun-failed || \ - ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} \ - --output-on-failure --no-compress-output --rerun-failed +set +x + +ctest_cmd="ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} --output-on-failure --no-compress-output --build-noclean ${target}" + +echo "Running initial tests..." +if ${ctest_cmd} -T test; then + echo "Tests passed on the first attempt." + exit_code=$? +else + echo "Initial tests failed with exit code $?. Retrying up to ${EIGEN_CI_CTEST_REPEAT} times..." + if ${ctest_cmd} --rerun-failed --repeat until-pass:${EIGEN_CI_CTEST_REPEAT}; then + echo "Tests passed on retry." + exit_code=42 + else + exit_code=$? + fi +fi + +set -x # Return to root directory. cd ${rootdir} set +x + +exit $exit_code diff --git a/ci/scripts/test.windows.script.ps1 b/ci/scripts/test.windows.script.ps1 index 4ea1e30f5..ba46ef1cc 100644 --- a/ci/scripts/test.windows.script.ps1 +++ b/ci/scripts/test.windows.script.ps1 @@ -13,18 +13,32 @@ if (${EIGEN_CI_CTEST_REGEX}) { $target = "-L","${EIGEN_CI_CTEST_LABEL}" } -# Repeat tests up to three times to ignore flakes. Do not re-run with -T test, -# otherwise we lose test results for those that passed. -# Note: starting with CMake 3.17, we can use --repeat until-pass:3, but we have -# no way of easily installing this on ppc64le. -ctest $EIGEN_CI_CTEST_ARGS -j$NPROC --output-on-failure --no-compress-output --build-no-clean -T test $target || ` - ctest $EIGEN_CI_CTEST_ARGS -j$NPROC --output-on-failure --no-compress-output --rerun-failed || ` - ctest $EIGEN_CI_CTEST_ARGS -j$NPROC --output-on-failure --no-compress-output --rerun-failed +$ctest_cmd = { ctest ${EIGEN_CI_CTEST_ARGS} --parallel ${NPROC} --output-on-failure --no-compress-output --build-noclean ${target} } -$success = $LASTEXITCODE +Write-Host "Running initial tests..." + +& $ctest_cmd "-T test" +$exit_code = $LASTEXITCODE + +if ($exit_code -eq 0) { + Write-Host "Tests passed on the first attempt." +} +else { + Write-Host "Initial tests failed with exit code $exit_code. Retrying up to $EIGEN_CI_CTEST_REPEAT times..." + # TODO: figure out how to use --repeat until-pass + for ($i = 1; $i -le $EIGEN_CI_CTEST_REPEAT; $i++) { + & $ctest_cmd "--rerun-failed" + $exit_code = $LASTEXITCODE + + if ($exit_code -eq 0) { + Write-Host "Tests passed on retry." + $exit_code = 42 + break + } + } +} # Return to root directory. cd ${rootdir} -# Explicitly propagate exit code to indicate pass/failure of test command. -if($success -ne 0) { Exit $success } +Exit $exit_code diff --git a/ci/test.linux.gitlab-ci.yml b/ci/test.linux.gitlab-ci.yml index b3599f38f..17c7ee0e2 100644 --- a/ci/test.linux.gitlab-ci.yml +++ b/ci/test.linux.gitlab-ci.yml @@ -11,6 +11,8 @@ - if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_NAMESPACE == "libeigen" && $CI_MERGE_REQUEST_LABELS =~ "/all-tests/" tags: - saas-linux-2xlarge-amd64 + allow_failure: + exit_codes: 42 ##### x86-64 ################################################################### .test:linux:x86-64: @@ -384,7 +386,7 @@ test:linux:x86-64:gcc-10:default:smoketest: rules: - if: $CI_PIPELINE_SOURCE == "merge_request_event" tags: - - saas-linux-small-amd64 + - saas-linux-medium-amd64 test:linux:x86-64:clang-12:default:smoketest: extends: .test:linux:x86-64:clang-12:default @@ -394,4 +396,4 @@ test:linux:x86-64:clang-12:default:smoketest: rules: - if: $CI_PIPELINE_SOURCE == "merge_request_event" tags: - - saas-linux-small-amd64 + - saas-linux-medium-amd64 diff --git a/ci/test.windows.gitlab-ci.yml b/ci/test.windows.gitlab-ci.yml index 3c398a4ec..34b1d7424 100644 --- a/ci/test.windows.gitlab-ci.yml +++ b/ci/test.windows.gitlab-ci.yml @@ -9,11 +9,12 @@ - if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen" - if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen" - if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_NAMESPACE == "libeigen" && $CI_MERGE_REQUEST_LABELS =~ "/all-tests/" - tags: - eigen-runner - windows - x86-64 + allow_failure: + exit_codes: 42 ##### MSVC ##################################################################### diff --git a/cmake/EigenSmokeTestList.cmake b/cmake/EigenSmokeTestList.cmake index db7d3ff5e..247b2c4a2 100644 --- a/cmake/EigenSmokeTestList.cmake +++ b/cmake/EigenSmokeTestList.cmake @@ -91,7 +91,7 @@ set(ei_smoke_test_list qr_1 qr_colpivoting_7 qr_fullpivoting_4 - rand + rand_1 real_qz_1 redux_1 ref_1 diff --git a/test/main.h b/test/main.h index db4d484fb..fa7053e96 100644 --- a/test/main.h +++ b/test/main.h @@ -186,7 +186,7 @@ inline void on_temporary_creation(long int size, int) { namespace Eigen { static std::vector g_test_stack; -// level == 0 <=> abort if test fail +// level == 0 <=> return 1 if test fail // level >= 1 <=> warning message to std::cerr if test fail static int g_test_level = 0; static int g_repeat = 1; @@ -356,7 +356,7 @@ inline void verify_impl(bool condition, const char* testname, const char* file, const int test_stack_size = static_cast(Eigen::g_test_stack.size()); for (int i = test_stack_size - 1; i >= 0; --i) std::cerr << " - " << Eigen::g_test_stack[i] << "\n"; std::cerr << "\n"; - if (Eigen::g_test_level == 0) abort(); + if (Eigen::g_test_level == 0) exit(1); } } @@ -858,6 +858,12 @@ inline void set_seed_from_string(const char* str) { g_has_set_seed = true; } +inline void set_seed_from_time() { + using namespace std::chrono; + long long ns = duration_cast(high_resolution_clock::now().time_since_epoch()).count(); + g_seed = static_cast(ns); +} + int main(int argc, char* argv[]) { g_has_set_repeat = false; g_has_set_seed = false; @@ -896,7 +902,7 @@ int main(int argc, char* argv[]) { char* env_EIGEN_SEED = getenv("EIGEN_SEED"); if (!g_has_set_seed && env_EIGEN_SEED) set_seed_from_string(env_EIGEN_SEED); - if (!g_has_set_seed) g_seed = (unsigned int)time(NULL); + if (!g_has_set_seed) set_seed_from_time(); if (!g_has_set_repeat) g_repeat = DEFAULT_REPEAT; std::cout << "Initializing random number generator with seed " << g_seed << std::endl;