Add internal ctz/clz implementation.

(cherry picked from commit 75e273afcc)
This commit is contained in:
Antonio Sánchez
2023-12-11 21:03:09 +00:00
committed by Antonio Sanchez
parent 4be2870267
commit b8f894947a
3 changed files with 234 additions and 19 deletions

View File

@@ -48,7 +48,7 @@ if(CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK)
set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ")
ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ")
@@ -61,7 +61,7 @@ if(UMFPACK_FOUND AND EIGEN_BUILD_BLAS)
set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ")
ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ")
@@ -74,7 +74,7 @@ if(KLU_FOUND AND EIGEN_BUILD_BLAS)
set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ")
ei_add_test(klu_support "" "${KLU_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ")
@@ -87,7 +87,7 @@ if(SuperLU_FOUND AND EIGEN_BUILD_BLAS)
set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "SuperLU, ")
ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "SuperLU, ")
@@ -160,6 +160,7 @@ endif()
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
add_custom_target(BuildOfficial)
ei_add_test(clz)
ei_add_test(rand)
ei_add_test(meta)
ei_add_test(numext)
@@ -383,7 +384,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
if(EIGEN_TEST_CUDA_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
@@ -401,11 +402,11 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
endif()
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(gpu_basic)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
@@ -418,8 +419,8 @@ if (EIGEN_TEST_HIP)
set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.")
if (EXISTS ${HIP_PATH})
list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)
list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)
find_package(HIP REQUIRED)
if (HIP_FOUND)
@@ -433,12 +434,12 @@ if (EIGEN_TEST_HIP)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(gpu_basic)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia"))
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
else ()
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
endif()
endif()
endif()
else ()
message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist")

74
test/clz.cpp Normal file
View File

@@ -0,0 +1,74 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2023 The Eigen Authors
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
template <typename T>
int ref_clz(T val) {
static const int kNumBits = sizeof(T) * CHAR_BIT;
T kMsbMask = T(1) << (kNumBits - 1);
int z = 0;
for (; z < kNumBits && ((val & kMsbMask) == 0); ++z) {
val <<= 1;
}
return z;
}
template <typename T>
int ref_ctz(T val) {
static const int kNumBits = sizeof(T) * CHAR_BIT;
T kLsbMask = T(1);
int z = 0;
for (; z < kNumBits && ((val & kLsbMask) == 0); ++z) {
val >>= 1;
}
return z;
}
template <typename T>
void test_clz_ctz() {
T step = sizeof(T) <= 2 ? 1 : (Eigen::NumTraits<T>::highest() / (T(1) << 16));
T iters = Eigen::NumTraits<T>::highest() / step;
for (T i = 0; i < iters; ++i) {
T val = i * step;
int expected_clz = ref_clz(val);
int actual_clz = Eigen::internal::clz(val);
VERIFY(expected_clz == actual_clz);
int expected_ctz = ref_ctz(val);
int actual_ctz = Eigen::internal::ctz(val);
VERIFY(expected_ctz == actual_ctz);
}
}
template <typename T>
void test_clz_ctz_random() {
for (int i = 0; i < 1024 * 1024; ++i) {
T val = Eigen::internal::random<T>();
int expected_clz = ref_clz(val);
int actual_clz = Eigen::internal::clz(val);
VERIFY(expected_clz == actual_clz);
int expected_ctz = ref_ctz(val);
int actual_ctz = Eigen::internal::ctz(val);
VERIFY(expected_ctz == actual_ctz);
}
}
EIGEN_DECLARE_TEST(clz) {
CALL_SUBTEST_1(test_clz_ctz<uint8_t>());
CALL_SUBTEST_2(test_clz_ctz<uint16_t>());
CALL_SUBTEST_3(test_clz_ctz<uint32_t>());
CALL_SUBTEST_4(test_clz_ctz<uint64_t>());
for (int i = 0; i < g_repeat; i++) {
test_clz_ctz_random<uint32_t>();
test_clz_ctz_random<uint64_t>();
}
}