diff --git a/CMakeLists.txt b/CMakeLists.txt index ca6914cc5..a34a2af5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,9 +74,6 @@ if (EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK) endif() endif() -option(EIGEN_BUILD_BTL "Build benchmark suite" OFF) -option(EIGEN_BUILD_SPBENCH "Build sparse benchmark suite" OFF) -option(EIGEN_BUILD_AOCL_BENCH "Build AOCL benchmark" OFF) # Avoid building docs if included from another project. # Building documentation requires creating and running executables on the host # platform. We shouldn't do this if cross-compiling. @@ -93,7 +90,7 @@ if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows) endif() option(EIGEN_BUILD_CMAKE_PACKAGE "Enables the creation of EigenConfig.cmake and related files" ${PROJECT_IS_TOP_LEVEL}) -if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILT_BTL OR EIGEN_BUILD_BTL OR EIGEN_BUILD_SPBENCH OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS) +if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS) set(EIGEN_IS_BUILDING_ ON) endif() @@ -764,66 +761,6 @@ if(EIGEN_BUILD_DOC) add_subdirectory(doc EXCLUDE_FROM_ALL) endif() -# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"? -if(EIGEN_BUILD_BTL) - add_subdirectory(bench/btl EXCLUDE_FROM_ALL) -endif() - -if(NOT WIN32 AND EIGEN_BUILD_SPBENCH) - add_subdirectory(bench/spbench EXCLUDE_FROM_ALL) -endif() -#--------------------------------------------------------------------------------------# -# AOCL BENCHMARK BUILD SECTION # -#--------------------------------------------------------------------------------------# -if(EIGEN_BUILD_AOCL_BENCH) - # Allow users to override the default architecture - set(EIGEN_AOCL_BENCH_ARCH "znver5" CACHE STRING "Target architecture for AOCL benchmark") - add_executable(benchmark_aocl EXCLUDE_FROM_ALL bench/benchmark_aocl.cpp) - include(CheckCXXCompilerFlag) - check_cxx_compiler_flag("-march=${EIGEN_AOCL_BENCH_ARCH}" COMPILER_SUPPORTS_AOCL_ARCH) - if(COMPILER_SUPPORTS_AOCL_ARCH) - target_compile_options(benchmark_aocl PRIVATE -O3 -Wno-shadow -march=${EIGEN_AOCL_BENCH_ARCH}) - else() - message(WARNING "${EIGEN_AOCL_BENCH_ARCH} architecture not supported by compiler") - target_compile_options(benchmark_aocl PRIVATE -O3) - endif() - - # Add custom flags if provided - if(EIGEN_AOCL_BENCH_FLAGS) - separate_arguments(CUSTOM_FLAGS NATIVE_COMMAND "${EIGEN_AOCL_BENCH_FLAGS}") - target_compile_options(benchmark_aocl PRIVATE ${CUSTOM_FLAGS}) - # Check if OpenMP is requested in custom flags and link it - string(FIND "${EIGEN_AOCL_BENCH_FLAGS}" "-fopenmp" OPENMP_REQUESTED) - if(NOT OPENMP_REQUESTED EQUAL -1) - find_package(OpenMP) - if(OpenMP_CXX_FOUND) - target_link_libraries(benchmark_aocl OpenMP::OpenMP_CXX) - else() - # Generic fallback: let compiler handle OpenMP linking - if(MSVC) - target_compile_options(benchmark_aocl PRIVATE "/openmp") - else() - target_compile_options(benchmark_aocl PRIVATE "-fopenmp") - target_link_options(benchmark_aocl PRIVATE "-fopenmp") - endif() - message(STATUS "Using compiler OpenMP flags as fallback") - endif() - endif() - endif() - - target_include_directories(benchmark_aocl PRIVATE ${INCLUDE_INSTALL_DIR}) - if(EIGEN_AOCL_BENCH_USE_MT) - target_compile_definitions(benchmark_aocl PRIVATE EIGEN_USE_AOCL_MT) - else() - target_compile_definitions(benchmark_aocl PRIVATE EIGEN_USE_AOCL_ALL) - endif() - target_link_libraries(benchmark_aocl Eigen3::Eigen) - if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) - target_link_libraries(benchmark_aocl ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) - endif() -endif() -#----------------------------------------------------------------------------------------# - if (EIGEN_BUILD_DEMOS) add_subdirectory(demos EXCLUDE_FROM_ALL) endif() @@ -872,9 +809,6 @@ if(PROJECT_IS_TOP_LEVEL) if (EIGEN_BUILD_LAPACK) message(STATUS "lapack | Build LAPACK subset library (not the same thing as Eigen)") endif() - if(EIGEN_BUILD_AOCL_BENCH) - message(STATUS "benchmark_aocl | Build AOCL benchmark executable") - endif() message(STATUS "------------+--------------------------------------------------------------") message(STATUS "") endif() diff --git a/COPYING.README b/COPYING.README index 52c077f81..ab0ca4201 100644 --- a/COPYING.README +++ b/COPYING.README @@ -6,6 +6,6 @@ Some files contain third-party code under BSD, LGPL, Apache, or other MPL2-compatible licenses, hence the other COPYING.* files here. Note that some optional external dependencies (e.g. FFTW, MPFR C++) -and some bundled benchmark code (bench/btl/) are distributed under -different licenses, including the GPL. Refer to the individual source -files and their respective COPYING files for details. +are distributed under different licenses, including the GPL. Refer to +the individual source files and their respective COPYING files for +details. diff --git a/bench/BenchSparseUtil.h b/bench/BenchSparseUtil.h deleted file mode 100644 index 663cd480b..000000000 --- a/bench/BenchSparseUtil.h +++ /dev/null @@ -1,129 +0,0 @@ - -#include -#include -#include - -using namespace std; -using namespace Eigen; -using namespace Eigen; - -#ifndef SIZE -#define SIZE 1024 -#endif - -#ifndef DENSITY -#define DENSITY 0.01 -#endif - -#ifndef SCALAR -#define SCALAR double -#endif - -typedef SCALAR Scalar; -typedef Matrix DenseMatrix; -typedef Matrix DenseVector; -typedef SparseMatrix EigenSparseMatrix; - -void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst) { - dst.reserve(double(rows) * cols * density); - for (int j = 0; j < cols; j++) { - for (int i = 0; i < rows; i++) { - Scalar v = (internal::random(0, 1) < density) ? internal::random() : 0; - if (v != 0) dst.insert(i, j) = v; - } - } - dst.finalize(); -} - -void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst) { - // std::cout << "alloc " << nnzPerCol*cols << "\n"; - dst.reserve(nnzPerCol * cols); - for (int j = 0; j < cols; j++) { - std::set aux; - for (int i = 0; i < nnzPerCol; i++) { - int k = internal::random(0, rows - 1); - while (aux.find(k) != aux.end()) k = internal::random(0, rows - 1); - aux.insert(k); - - dst.insert(k, j) = internal::random(); - } - } - dst.finalize(); -} - -void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst) { - dst.setZero(); - for (int j = 0; j < src.cols(); ++j) - for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value(); -} - -#ifndef NOGMM -#include "gmm/gmm.h" -typedef gmm::csc_matrix GmmSparse; -typedef gmm::col_matrix > GmmDynSparse; -void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst) { - GmmDynSparse tmp(src.rows(), src.cols()); - for (int j = 0; j < src.cols(); ++j) - for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) tmp(it.index(), j) = it.value(); - gmm::copy(tmp, dst); -} -#endif - -#ifndef NOMTL -#include -typedef mtl::compressed2D > MtlSparse; -typedef mtl::compressed2D > MtlSparseRowMajor; -void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst) { - mtl::matrix::inserter ins(dst); - for (int j = 0; j < src.cols(); ++j) - for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) ins[it.index()][j] = it.value(); -} -#endif - -#ifdef CSPARSE -extern "C" { -#include "cs.h" -} -void eiToCSparse(const EigenSparseMatrix& src, cs*& dst) { - cs* aux = cs_spalloc(0, 0, 1, 1, 1); - for (int j = 0; j < src.cols(); ++j) - for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) - if (!cs_entry(aux, it.index(), j, it.value())) { - std::cout << "cs_entry error\n"; - exit(2); - } - dst = cs_compress(aux); - // cs_spfree(aux); -} -#endif // CSPARSE - -#ifndef NOUBLAS -#include -#include -#include -#include -#include -#include -#include -#include - -typedef boost::numeric::ublas::compressed_matrix UBlasSparse; - -void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst) { - dst.resize(src.rows(), src.cols(), false); - for (int j = 0; j < src.cols(); ++j) - for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value(); -} - -template -void eiToUblasVec(const EigenType& src, UblasType& dst) { - dst.resize(src.size()); - for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j); -} -#endif - -#ifdef OSKI -extern "C" { -#include -} -#endif diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h deleted file mode 100644 index b3a26fbd6..000000000 --- a/bench/BenchTimer.h +++ /dev/null @@ -1,176 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// Copyright (C) 2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_BENCH_TIMERR_H -#define EIGEN_BENCH_TIMERR_H - -#if defined(_WIN32) || defined(__CYGWIN__) -#ifndef NOMINMAX -#define NOMINMAX -#define EIGEN_BT_UNDEF_NOMINMAX -#endif -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN -#endif -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - -static void escape(void *p) { -#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG - asm volatile("" : : "g"(p) : "memory"); -#endif -} - -static void clobber() { -#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG - asm volatile("" : : : "memory"); -#endif -} - -#include - -namespace Eigen { - -enum { CPU_TIMER = 0, REAL_TIMER = 1 }; - -/** Elapsed time timer keeping the best try. - * - * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID. - * On Windows we use QueryPerformanceCounter - * - * Important: on linux, you must link with -lrt - */ -class BenchTimer { - public: - BenchTimer() { -#if defined(_WIN32) || defined(__CYGWIN__) - LARGE_INTEGER freq; - QueryPerformanceFrequency(&freq); - m_frequency = (double)freq.QuadPart; -#endif - reset(); - } - - ~BenchTimer() {} - - inline void reset() { - m_bests.fill(1e9); - m_worsts.fill(0); - m_totals.setZero(); - } - inline void start() { - m_starts[CPU_TIMER] = getCpuTime(); - m_starts[REAL_TIMER] = getRealTime(); - } - inline void stop() { - m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER]; - m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER]; -#if EIGEN_VERSION_AT_LEAST(2, 90, 0) - m_bests = m_bests.cwiseMin(m_times); - m_worsts = m_worsts.cwiseMax(m_times); -#else - m_bests(0) = std::min(m_bests(0), m_times(0)); - m_bests(1) = std::min(m_bests(1), m_times(1)); - m_worsts(0) = std::max(m_worsts(0), m_times(0)); - m_worsts(1) = std::max(m_worsts(1), m_times(1)); -#endif - m_totals += m_times; - } - - /** Return the elapsed time in seconds between the last start/stop pair - */ - inline double value(int TIMER = CPU_TIMER) const { return m_times[TIMER]; } - - /** Return the best elapsed time in seconds - */ - inline double best(int TIMER = CPU_TIMER) const { return m_bests[TIMER]; } - - /** Return the worst elapsed time in seconds - */ - inline double worst(int TIMER = CPU_TIMER) const { return m_worsts[TIMER]; } - - /** Return the total elapsed time in seconds. - */ - inline double total(int TIMER = CPU_TIMER) const { return m_totals[TIMER]; } - - inline double getCpuTime() const { -#ifdef _WIN32 - LARGE_INTEGER query_ticks; - QueryPerformanceCounter(&query_ticks); - return query_ticks.QuadPart / m_frequency; -#elif __APPLE__ - return double(mach_absolute_time()) * 1e-9; -#else - timespec ts; - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); - return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec); -#endif - } - - inline double getRealTime() const { -#ifdef _WIN32 - SYSTEMTIME st; - GetSystemTime(&st); - return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds; -#elif __APPLE__ - return double(mach_absolute_time()) * 1e-9; -#else - timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec); -#endif - } - - protected: -#if defined(_WIN32) || defined(__CYGWIN__) - double m_frequency; -#endif - Vector2d m_starts; - Vector2d m_times; - Vector2d m_bests; - Vector2d m_worsts; - Vector2d m_totals; - - public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW -}; - -#define BENCH(TIMER, TRIES, REP, CODE) \ - { \ - TIMER.reset(); \ - for (int bench_tries_ = 0; bench_tries_ < TRIES; ++bench_tries_) { \ - TIMER.start(); \ - for (int bench_reps_ = 0; bench_reps_ < REP; ++bench_reps_) { \ - CODE; \ - } \ - TIMER.stop(); \ - clobber(); \ - } \ - } - -} // namespace Eigen - -// clean #defined tokens -#ifdef EIGEN_BT_UNDEF_NOMINMAX -#undef EIGEN_BT_UNDEF_NOMINMAX -#undef NOMINMAX -#endif - -#ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN -#undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN -#undef WIN32_LEAN_AND_MEAN -#endif - -#endif // EIGEN_BENCH_TIMERR_H diff --git a/bench/BenchUtil.h b/bench/BenchUtil.h deleted file mode 100644 index 4adf0ac34..000000000 --- a/bench/BenchUtil.h +++ /dev/null @@ -1,86 +0,0 @@ - -#ifndef EIGEN_BENCH_UTIL_H -#define EIGEN_BENCH_UTIL_H - -#include -#include "BenchTimer.h" - -using namespace std; -using namespace Eigen; - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -template -void initMatrix_random(MatrixType& mat) __attribute__((noinline)); -template -void initMatrix_random(MatrixType& mat) { - mat.setRandom(); // = MatrixType::random(mat.rows(), mat.cols()); -} - -template -void initMatrix_identity(MatrixType& mat) __attribute__((noinline)); -template -void initMatrix_identity(MatrixType& mat) { - mat.setIdentity(); -} - -#ifndef __INTEL_COMPILER -#define DISABLE_SSE_EXCEPTIONS() \ - { \ - int aux; \ - asm("stmxcsr %[aux] \n\t" \ - "orl $32832, %[aux] \n\t" \ - "ldmxcsr %[aux] \n\t" \ - : \ - : [aux] "m"(aux)); \ - } -#else -#define DISABLE_SSE_EXCEPTIONS() -#endif - -#ifdef BENCH_GMM -#include -template -void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst) { - dst.resize(src.rows(), src.cols()); - for (int j = 0; j < src.cols(); ++j) - for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j); -} -#endif - -#ifdef BENCH_GSL -#include -#include -#include -template -void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst) { - for (int j = 0; j < src.cols(); ++j) - for (int i = 0; i < src.rows(); ++i) gsl_matrix_set(*dst, i, j, src.coeff(i, j)); -} -#endif - -#ifdef BENCH_UBLAS -#include -#include -template -void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst) { - dst.resize(src.rows(), src.cols()); - for (int j = 0; j < src.cols(); ++j) - for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j); -} -template -void eiToUblasVec(const EigenType& src, UblasType& dst) { - dst.resize(src.size()); - for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j); -} -#endif - -#endif // EIGEN_BENCH_UTIL_H diff --git a/bench/README.txt b/bench/README.txt deleted file mode 100644 index 047f22422..000000000 --- a/bench/README.txt +++ /dev/null @@ -1,108 +0,0 @@ - -This folder contains a couple of benchmark utities and Eigen benchmarks. - -**************************** -* bench_multi_compilers.sh * -**************************** - -This script allows to run a benchmark on a set of different compilers/compiler options. -It takes two arguments: - - a file defining the list of the compilers with their options - - the .cpp file of the benchmark - -Examples: - -$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp - - g++-4.1 -O3 -DNDEBUG -finline-limit=10000 - 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / - 0.271102 0.131416 0.422322 0.198633 - 0.201658 0.102436 0.397566 0.207282 - - g++-4.2 -O3 -DNDEBUG -finline-limit=10000 - 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / - 0.107805 0.0890579 0.30265 0.161843 - 0.127157 0.0712581 0.278341 0.191029 - - g++-4.3 -O3 -DNDEBUG -finline-limit=10000 - 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / - 0.134318 0.105291 0.3704 0.180966 - 0.137703 0.0732472 0.31225 0.202204 - - icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size - 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / - 0.226145 0.0941319 0.371873 0.159433 - 0.109302 0.0837538 0.328102 0.173891 - - -$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp - - g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp - double, fixed-size 4x4: 0.00165105s 0.0778739s - double, 32x32: 0.0654769s 0.075289s => x0.869674 (2) - double, 128x128: 0.054148s 0.0419669s => x1.29025 (2) - double, 512x512: 0.913799s 0.428533s => x2.13239 (2) - double, 1024x1024: 14.5972s 9.3542s => x1.5605 (2) - - icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp - double, fixed-size 4x4: 0.000589848s 0.019949s - double, 32x32: 0.0682781s 0.0449722s => x1.51823 (2) - double, 128x128: 0.0547509s 0.0435519s => x1.25714 (2) - double, 512x512: 0.829436s 0.424438s => x1.9542 (2) - double, 1024x1024: 14.5243s 10.7735s => x1.34815 (2) - - - -************************ -* benchmark_aocl * -************************ - -This benchmark exercises Eigen operations using AMD Optimized Libraries -(AOCL). It is disabled by default and can be enabled when configuring the -build: - - cmake .. -DEIGEN_BUILD_AOCL_BENCH=ON - -The resulting `benchmark_aocl` target is compiled with `-O3` and, if the -compiler supports it, `-march=znver5` for optimal performance on AMD -processors. - -The benchmark also links to `libblis-mt.so` and `libflame.so` so BLAS and -LAPACK operations run with multithreaded AOCL when available. - -By default the CMake build defines `EIGEN_USE_AOCL_MT` via the option -`EIGEN_AOCL_BENCH_USE_MT` (enabled). Set this option to `OFF` if you want -to build the benchmark using the single-threaded AOCL libraries instead, -in which case `EIGEN_USE_AOCL_ALL` is defined. - - - -Alternatively you can build the same benchmark using the -`Makefile` in this directory. This allows experimenting with -different compiler flags without reconfiguring CMake: - -``` -cd bench && make # builds with -O3 -march=znver5 by default -make clean && make CXX="clang++" ## For different compiler apart from g++ -make clean && make MARCH="" CXXFLAGS="-O2" # example of custom flags -make AOCL_ROOT=/opt/aocl # use AOCL from a custom location - -This Makefile links against `libblis-mt.so` and `libflame.so` so the -matrix multiplication benchmark exercises multithreaded BLIS when -`EIGEN_USE_AOCL_MT` is defined (enabled by default in the Makefile). - -If you prefer to compile manually, ensure that the Eigen include path -points to the directory where `AOCL_Support.h` resides. For example: - - -clang++ -O3 -std=c++14 -I../build/install/include \ - -march=znver5 -DEIGEN_USE_AOCL_MT \ - benchmark_aocl.cpp -o benchmark_aocl \ - -lblis-mt -lflame -lamdlibm -lpthread -lm -``` -Replace `../install/include` with your actual Eigen install path. - -When invoking `make`, you can point `AOCL_ROOT` to your AOCL -installation directory so the Makefile links against `$(AOCL_ROOT)/lib`. - - diff --git a/bench/analyze-blocking-sizes.cpp b/bench/analyze-blocking-sizes.cpp deleted file mode 100644 index c436739af..000000000 --- a/bench/analyze-blocking-sizes.cpp +++ /dev/null @@ -1,772 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -using namespace std; - -const int default_precision = 4; - -// see --only-cubic-sizes -bool only_cubic_sizes = false; - -// see --dump-tables -bool dump_tables = false; - -uint8_t log2_pot(size_t x) { - size_t l = 0; - while (x >>= 1) l++; - return l; -} - -uint16_t compact_size_triple(size_t k, size_t m, size_t n) { - return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); -} - -// just a helper to store a triple of K,M,N sizes for matrix product -struct size_triple_t { - uint16_t k, m, n; - size_triple_t() : k(0), m(0), n(0) {} - size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} - size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} - size_triple_t(uint16_t compact) { - k = 1 << ((compact & 0xf00) >> 8); - m = 1 << ((compact & 0x0f0) >> 4); - n = 1 << ((compact & 0x00f) >> 0); - } - bool is_cubic() const { return k == m && m == n; } -}; - -ostream& operator<<(ostream& s, const size_triple_t& t) { return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; } - -struct inputfile_entry_t { - uint16_t product_size; - uint16_t pot_block_size; - size_triple_t nonpot_block_size; - float gflops; -}; - -struct inputfile_t { - enum class type_t { unknown, all_pot_sizes, default_sizes }; - - string filename; - vector entries; - type_t type; - - inputfile_t(const string& fname) : filename(fname), type(type_t::unknown) { - ifstream stream(filename); - if (!stream.is_open()) { - cerr << "couldn't open input file: " << filename << endl; - exit(1); - } - string line; - while (getline(stream, line)) { - if (line.empty()) continue; - if (line.find("BEGIN MEASUREMENTS ALL POT SIZES") == 0) { - if (type != type_t::unknown) { - cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines"; - exit(1); - } - type = type_t::all_pot_sizes; - continue; - } - if (line.find("BEGIN MEASUREMENTS DEFAULT SIZES") == 0) { - if (type != type_t::unknown) { - cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines"; - exit(1); - } - type = type_t::default_sizes; - continue; - } - - if (type == type_t::unknown) { - continue; - } - switch (type) { - case type_t::all_pot_sizes: { - unsigned int product_size, block_size; - float gflops; - int sscanf_result = sscanf(line.c_str(), "%x %x %f", &product_size, &block_size, &gflops); - if (3 != sscanf_result || !product_size || product_size > 0xfff || !block_size || block_size > 0xfff || - !isfinite(gflops)) { - cerr << "ill-formed input file: " << filename << endl; - cerr << "offending line:" << endl << line << endl; - exit(1); - } - if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) { - continue; - } - inputfile_entry_t entry; - entry.product_size = uint16_t(product_size); - entry.pot_block_size = uint16_t(block_size); - entry.gflops = gflops; - entries.push_back(entry); - break; - } - case type_t::default_sizes: { - unsigned int product_size; - float gflops; - int bk, bm, bn; - int sscanf_result = sscanf(line.c_str(), "%x default(%d, %d, %d) %f", &product_size, &bk, &bm, &bn, &gflops); - if (5 != sscanf_result || !product_size || product_size > 0xfff || !isfinite(gflops)) { - cerr << "ill-formed input file: " << filename << endl; - cerr << "offending line:" << endl << line << endl; - exit(1); - } - if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) { - continue; - } - inputfile_entry_t entry; - entry.product_size = uint16_t(product_size); - entry.pot_block_size = 0; - entry.nonpot_block_size = size_triple_t(bk, bm, bn); - entry.gflops = gflops; - entries.push_back(entry); - break; - } - - default: - break; - } - } - stream.close(); - if (type == type_t::unknown) { - cerr << "Unrecognized input file " << filename << endl; - exit(1); - } - if (entries.empty()) { - cerr << "didn't find any measurements in input file: " << filename << endl; - exit(1); - } - } -}; - -struct preprocessed_inputfile_entry_t { - uint16_t product_size; - uint16_t block_size; - - float efficiency; -}; - -bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) { - return e1.efficiency < e2.efficiency; -} - -struct preprocessed_inputfile_t { - string filename; - vector entries; - - preprocessed_inputfile_t(const inputfile_t& inputfile) : filename(inputfile.filename) { - if (inputfile.type != inputfile_t::type_t::all_pot_sizes) { - abort(); - } - auto it = inputfile.entries.begin(); - auto it_first_with_given_product_size = it; - while (it != inputfile.entries.end()) { - ++it; - if (it == inputfile.entries.end() || it->product_size != it_first_with_given_product_size->product_size) { - import_input_file_range_one_product_size(it_first_with_given_product_size, it); - it_first_with_given_product_size = it; - } - } - } - - private: - void import_input_file_range_one_product_size(const vector::const_iterator& begin, - const vector::const_iterator& end) { - uint16_t product_size = begin->product_size; - float max_gflops = 0.0f; - for (auto it = begin; it != end; ++it) { - if (it->product_size != product_size) { - cerr << "Unexpected ordering of entries in " << filename << endl; - cerr << "(Expected all entries for product size " << hex << product_size << dec << " to be grouped)" << endl; - exit(1); - } - max_gflops = max(max_gflops, it->gflops); - } - for (auto it = begin; it != end; ++it) { - preprocessed_inputfile_entry_t entry; - entry.product_size = it->product_size; - entry.block_size = it->pot_block_size; - entry.efficiency = it->gflops / max_gflops; - entries.push_back(entry); - } - } -}; - -void check_all_files_in_same_exact_order(const vector& preprocessed_inputfiles) { - if (preprocessed_inputfiles.empty()) { - return; - } - - const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[0]; - const size_t num_entries = first_file.entries.size(); - - for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) { - if (preprocessed_inputfiles[i].entries.size() != num_entries) { - cerr << "these files have different number of entries: " << preprocessed_inputfiles[i].filename << " and " - << first_file.filename << endl; - exit(1); - } - } - - for (size_t entry_index = 0; entry_index < num_entries; entry_index++) { - const uint16_t entry_product_size = first_file.entries[entry_index].product_size; - const uint16_t entry_block_size = first_file.entries[entry_index].block_size; - for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) { - const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index]; - if (cur_file.entries[entry_index].product_size != entry_product_size || - cur_file.entries[entry_index].block_size != entry_block_size) { - cerr << "entries not in same order between these files: " << first_file.filename << " and " << cur_file.filename - << endl; - exit(1); - } - } - } -} - -float efficiency_of_subset(const vector& preprocessed_inputfiles, - const vector& subset) { - if (subset.size() <= 1) { - return 1.0f; - } - const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]]; - const size_t num_entries = first_file.entries.size(); - float efficiency = 1.0f; - size_t entry_index = 0; - size_t first_entry_index_with_this_product_size = 0; - uint16_t product_size = first_file.entries[0].product_size; - while (entry_index < num_entries) { - ++entry_index; - if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) { - float efficiency_this_product_size = 0.0f; - for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) { - float efficiency_this_entry = 1.0f; - for (auto i = subset.begin(); i != subset.end(); ++i) { - efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency); - } - efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry); - } - efficiency = min(efficiency, efficiency_this_product_size); - if (entry_index < num_entries) { - first_entry_index_with_this_product_size = entry_index; - product_size = first_file.entries[entry_index].product_size; - } - } - } - - return efficiency; -} - -void dump_table_for_subset(const vector& preprocessed_inputfiles, - const vector& subset) { - const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]]; - const size_t num_entries = first_file.entries.size(); - size_t entry_index = 0; - size_t first_entry_index_with_this_product_size = 0; - uint16_t product_size = first_file.entries[0].product_size; - size_t i = 0; - size_triple_t min_product_size(first_file.entries.front().product_size); - size_triple_t max_product_size(first_file.entries.back().product_size); - if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) { - abort(); - } - if (only_cubic_sizes) { - cerr << "Can't generate tables with --only-cubic-sizes." << endl; - abort(); - } - cout << "struct LookupTable {" << endl; - cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl; - const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1; - const size_t TableSize = NumSizes * NumSizes * NumSizes; - cout << " static const size_t NumSizes = " << NumSizes << ";" << endl; - cout << " static const unsigned short* Data() {" << endl; - cout << " static const unsigned short data[" << TableSize << "] = {"; - while (entry_index < num_entries) { - ++entry_index; - if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) { - float best_efficiency_this_product_size = 0.0f; - uint16_t best_block_size_this_product_size = 0; - for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) { - float efficiency_this_entry = 1.0f; - for (auto i = subset.begin(); i != subset.end(); ++i) { - efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency); - } - if (efficiency_this_entry > best_efficiency_this_product_size) { - best_efficiency_this_product_size = efficiency_this_entry; - best_block_size_this_product_size = first_file.entries[e].block_size; - } - } - if ((i++) % NumSizes) { - cout << " "; - } else { - cout << endl << " "; - } - cout << "0x" << hex << best_block_size_this_product_size << dec; - if (entry_index < num_entries) { - cout << ","; - first_entry_index_with_this_product_size = entry_index; - product_size = first_file.entries[entry_index].product_size; - } - } - } - if (i != TableSize) { - cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl; - abort(); - } - cout << endl << " };" << endl; - cout << " return data;" << endl; - cout << " }" << endl; - cout << "};" << endl; -} - -float efficiency_of_partition(const vector& preprocessed_inputfiles, - const vector>& partition) { - float efficiency = 1.0f; - for (auto s = partition.begin(); s != partition.end(); ++s) { - efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s)); - } - return efficiency; -} - -void make_first_subset(size_t subset_size, vector& out_subset, size_t set_size) { - assert(subset_size >= 1 && subset_size <= set_size); - out_subset.resize(subset_size); - for (size_t i = 0; i < subset_size; i++) { - out_subset[i] = i; - } -} - -bool is_last_subset(const vector& subset, size_t set_size) { return subset[0] == set_size - subset.size(); } - -void next_subset(vector& inout_subset, size_t set_size) { - if (is_last_subset(inout_subset, set_size)) { - cerr << "iterating past the last subset" << endl; - abort(); - } - size_t i = 1; - while (inout_subset[inout_subset.size() - i] == set_size - i) { - i++; - assert(i <= inout_subset.size()); - } - size_t first_index_to_change = inout_subset.size() - i; - inout_subset[first_index_to_change]++; - size_t p = inout_subset[first_index_to_change]; - for (size_t j = first_index_to_change + 1; j < inout_subset.size(); j++) { - inout_subset[j] = ++p; - } -} - -const size_t number_of_subsets_limit = 100; -const size_t always_search_subsets_of_size_at_least = 2; - -bool is_number_of_subsets_feasible(size_t n, size_t p) { - assert(n > 0 && p > 0 && p <= n); - uint64_t numerator = 1, denominator = 1; - for (size_t i = 0; i < p; i++) { - numerator *= n - i; - denominator *= i + 1; - if (numerator > denominator * number_of_subsets_limit) { - return false; - } - } - return true; -} - -size_t max_feasible_subset_size(size_t n) { - assert(n > 0); - const size_t minresult = min(n - 1, always_search_subsets_of_size_at_least); - for (size_t p = 1; p <= n - 1; p++) { - if (!is_number_of_subsets_feasible(n, p + 1)) { - return max(p, minresult); - } - } - return n - 1; -} - -void find_subset_with_efficiency_higher_than(const vector& preprocessed_inputfiles, - float required_efficiency_to_beat, vector& inout_remainder, - vector& out_subset) { - out_subset.resize(0); - - if (required_efficiency_to_beat >= 1.0f) { - cerr << "can't beat efficiency 1." << endl; - abort(); - } - - while (!inout_remainder.empty()) { - vector candidate_indices(inout_remainder.size()); - for (size_t i = 0; i < candidate_indices.size(); i++) { - candidate_indices[i] = i; - } - - size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size()); - while (candidate_indices_subset_size >= 1) { - vector candidate_indices_subset; - make_first_subset(candidate_indices_subset_size, candidate_indices_subset, candidate_indices.size()); - - vector best_candidate_indices_subset; - float best_efficiency = 0.0f; - vector trial_subset = out_subset; - trial_subset.resize(out_subset.size() + candidate_indices_subset_size); - while (true) { - for (size_t i = 0; i < candidate_indices_subset_size; i++) { - trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]]; - } - - float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset); - if (trial_efficiency > best_efficiency) { - best_efficiency = trial_efficiency; - best_candidate_indices_subset = candidate_indices_subset; - } - if (is_last_subset(candidate_indices_subset, candidate_indices.size())) { - break; - } - next_subset(candidate_indices_subset, candidate_indices.size()); - } - - if (best_efficiency > required_efficiency_to_beat) { - for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) { - candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]]; - } - candidate_indices.resize(best_candidate_indices_subset.size()); - } - candidate_indices_subset_size--; - } - - size_t candidate_index = candidate_indices[0]; - auto candidate_iterator = inout_remainder.begin() + candidate_index; - vector trial_subset = out_subset; - - trial_subset.push_back(*candidate_iterator); - float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset); - if (trial_efficiency > required_efficiency_to_beat) { - out_subset.push_back(*candidate_iterator); - inout_remainder.erase(candidate_iterator); - } else { - break; - } - } -} - -void find_partition_with_efficiency_higher_than(const vector& preprocessed_inputfiles, - float required_efficiency_to_beat, - vector>& out_partition) { - out_partition.resize(0); - - vector remainder; - for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) { - remainder.push_back(i); - } - - while (!remainder.empty()) { - vector new_subset; - find_subset_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, remainder, - new_subset); - out_partition.push_back(new_subset); - } -} - -void print_partition(const vector& preprocessed_inputfiles, - const vector>& partition) { - float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); - cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl; - for (auto subset = partition.begin(); subset != partition.end(); ++subset) { - cout << " Subset " << (subset - partition.begin()) << ", efficiency " - << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" << endl; - for (auto file = subset->begin(); file != subset->end(); ++file) { - cout << " " << preprocessed_inputfiles[*file].filename << endl; - } - if (dump_tables) { - cout << " Table:" << endl; - dump_table_for_subset(preprocessed_inputfiles, *subset); - } - } - cout << endl; -} - -struct action_t { - virtual const char* invokation_name() const { - abort(); - return nullptr; - } - virtual void run(const vector&) const { abort(); } - virtual ~action_t() {} -}; - -struct partition_action_t : action_t { - virtual const char* invokation_name() const override { return "partition"; } - virtual void run(const vector& input_filenames) const override { - vector preprocessed_inputfiles; - - if (input_filenames.empty()) { - cerr << "The " << invokation_name() << " action needs a list of input files." << endl; - exit(1); - } - - for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) { - inputfile_t inputfile(*it); - switch (inputfile.type) { - case inputfile_t::type_t::all_pot_sizes: - preprocessed_inputfiles.emplace_back(inputfile); - break; - case inputfile_t::type_t::default_sizes: - cerr << "The " << invokation_name() << " action only uses measurements for all pot sizes, and " - << "has no use for " << *it << " which contains measurements for default sizes." << endl; - exit(1); - break; - default: - cerr << "Unrecognized input file: " << *it << endl; - exit(1); - } - } - - check_all_files_in_same_exact_order(preprocessed_inputfiles); - - float required_efficiency_to_beat = 0.0f; - vector>> partitions; - cerr << "searching for partitions...\r" << flush; - while (true) { - vector> partition; - find_partition_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, partition); - float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); - cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() << " subsets for " - << 100.0f * actual_efficiency << " % efficiency" - << " \r" << flush; - partitions.push_back(partition); - if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) { - break; - } - required_efficiency_to_beat = actual_efficiency; - } - cerr << " " << endl; - while (true) { - bool repeat = false; - for (size_t i = 0; i < partitions.size() - 1; i++) { - if (partitions[i].size() >= partitions[i + 1].size()) { - partitions.erase(partitions.begin() + i); - repeat = true; - break; - } - } - if (!repeat) { - break; - } - } - for (auto it = partitions.begin(); it != partitions.end(); ++it) { - print_partition(preprocessed_inputfiles, *it); - } - } -}; - -struct evaluate_defaults_action_t : action_t { - struct results_entry_t { - uint16_t product_size; - size_triple_t default_block_size; - uint16_t best_pot_block_size; - float default_gflops; - float best_pot_gflops; - float default_efficiency; - }; - friend ostream& operator<<(ostream& s, const results_entry_t& entry) { - return s << "Product size " << size_triple_t(entry.product_size) << ": default block size " - << entry.default_block_size << " -> " << entry.default_gflops - << " GFlop/s = " << entry.default_efficiency * 100.0f << " %" - << " of best POT block size " << size_triple_t(entry.best_pot_block_size) << " -> " - << entry.best_pot_gflops << " GFlop/s" << dec; - } - static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) { - return e1.default_efficiency < e2.default_efficiency; - } - virtual const char* invokation_name() const override { return "evaluate-defaults"; } - void show_usage_and_exit() const { - cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl; - cerr << "checks how well the performance with default sizes compares to the best " - << "performance measured over all POT sizes." << endl; - exit(1); - } - virtual void run(const vector& input_filenames) const override { - if (input_filenames.size() != 2) { - show_usage_and_exit(); - } - inputfile_t inputfile_default_sizes(input_filenames[0]); - inputfile_t inputfile_all_pot_sizes(input_filenames[1]); - if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) { - cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl; - show_usage_and_exit(); - } - if (inputfile_all_pot_sizes.type != inputfile_t::type_t::all_pot_sizes) { - cerr << inputfile_all_pot_sizes.filename << " is not an input file with all POT sizes." << endl; - show_usage_and_exit(); - } - vector results; - vector cubic_results; - - uint16_t product_size = 0; - auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin(); - for (auto it_default_sizes = inputfile_default_sizes.entries.begin(); - it_default_sizes != inputfile_default_sizes.entries.end(); ++it_default_sizes) { - if (it_default_sizes->product_size == product_size) { - continue; - } - product_size = it_default_sizes->product_size; - while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() && - it_all_pot_sizes->product_size != product_size) { - ++it_all_pot_sizes; - } - if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) { - break; - } - uint16_t best_pot_block_size = 0; - float best_pot_gflops = 0; - for (auto it = it_all_pot_sizes; it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size; - ++it) { - if (it->gflops > best_pot_gflops) { - best_pot_gflops = it->gflops; - best_pot_block_size = it->pot_block_size; - } - } - results_entry_t entry; - entry.product_size = product_size; - entry.default_block_size = it_default_sizes->nonpot_block_size; - entry.best_pot_block_size = best_pot_block_size; - entry.default_gflops = it_default_sizes->gflops; - entry.best_pot_gflops = best_pot_gflops; - entry.default_efficiency = entry.default_gflops / entry.best_pot_gflops; - results.push_back(entry); - - size_triple_t t(product_size); - if (t.k == t.m && t.m == t.n) { - cubic_results.push_back(entry); - } - } - - cout << "All results:" << endl; - for (auto it = results.begin(); it != results.end(); ++it) { - cout << *it << endl; - } - cout << endl; - - sort(results.begin(), results.end(), lower_efficiency); - - const size_t n = min(20, results.size()); - cout << n << " worst results:" << endl; - for (size_t i = 0; i < n; i++) { - cout << results[i] << endl; - } - cout << endl; - - cout << "cubic results:" << endl; - for (auto it = cubic_results.begin(); it != cubic_results.end(); ++it) { - cout << *it << endl; - } - cout << endl; - - sort(cubic_results.begin(), cubic_results.end(), lower_efficiency); - - cout.precision(2); - vector a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f}; - for (auto it = a.begin(); it != a.end(); ++it) { - size_t n = min(results.size() - 1, size_t(*it * results.size())); - cout << (100.0f * n / (results.size() - 1)) - << " % of product sizes have default efficiency <= " << 100.0f * results[n].default_efficiency << " %" - << endl; - } - cout.precision(default_precision); - } -}; - -void show_usage_and_exit(int argc, char* argv[], const vector>& available_actions) { - cerr << "usage: " << argv[0] << " [options...] " << endl; - cerr << "available actions:" << endl; - for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { - cerr << " " << (*it)->invokation_name() << endl; - } - cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; - exit(1); -} - -int main(int argc, char* argv[]) { - cout.precision(default_precision); - cerr.precision(default_precision); - - vector> available_actions; - available_actions.emplace_back(new partition_action_t); - available_actions.emplace_back(new evaluate_defaults_action_t); - - vector input_filenames; - - action_t* action = nullptr; - - if (argc < 2) { - show_usage_and_exit(argc, argv, available_actions); - } - for (int i = 1; i < argc; i++) { - bool arg_handled = false; - // Step 1. Try to match action invocation names. - for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { - if (!strcmp(argv[i], (*it)->invokation_name())) { - if (!action) { - action = it->get(); - arg_handled = true; - break; - } else { - cerr << "can't specify more than one action!" << endl; - show_usage_and_exit(argc, argv, available_actions); - } - } - } - if (arg_handled) { - continue; - } - // Step 2. Try to match option names. - if (argv[i][0] == '-') { - if (!strcmp(argv[i], "--only-cubic-sizes")) { - only_cubic_sizes = true; - arg_handled = true; - } - if (!strcmp(argv[i], "--dump-tables")) { - dump_tables = true; - arg_handled = true; - } - if (!arg_handled) { - cerr << "Unrecognized option: " << argv[i] << endl; - show_usage_and_exit(argc, argv, available_actions); - } - } - if (arg_handled) { - continue; - } - // Step 3. Default to interpreting args as input filenames. - input_filenames.emplace_back(argv[i]); - } - - if (dump_tables && only_cubic_sizes) { - cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl; - show_usage_and_exit(argc, argv, available_actions); - } - - if (!action) { - show_usage_and_exit(argc, argv, available_actions); - } - - action->run(input_filenames); -} diff --git a/bench/basicbench.cxxlist b/bench/basicbench.cxxlist deleted file mode 100755 index a8ab34e0d..000000000 --- a/bench/basicbench.cxxlist +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG" -# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG -finline-limit=20000" - -# CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG" -#CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000" - -# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG" -#CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000" -# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate" -# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use" - -# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG" -#CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000" -# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate" -# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use" - -# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-genx" -# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-use" - -#CLIST[((g++))]="/opt/intel/Compiler/11.1/072/bin/intel64/icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -lrt" -CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt" -CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -lrt" -CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt" -CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -lrt" -CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt" -CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -lrt" diff --git a/bench/basicbenchmark.cpp b/bench/basicbenchmark.cpp deleted file mode 100644 index dceb9fa38..000000000 --- a/bench/basicbenchmark.cpp +++ /dev/null @@ -1,34 +0,0 @@ - -#include -#include "BenchUtil.h" -#include "basicbenchmark.h" - -int main(int argc, char *argv[]) { - DISABLE_SSE_EXCEPTIONS(); - -// this is the list of matrix type and size we want to bench: -// ((suffix) (matrix size) (number of iterations)) -#define MODES ((3d)(3)(4000000))((4d)(4)(1000000))((Xd)(4)(1000000))((Xd)(20)(10000)) - // #define MODES ((Xd)(20)(10000)) - -#define _GENERATE_HEADER(R, ARG, EL) \ - << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) \ - << "-" \ - << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / " - - std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES) << endl; - - const int tries = 10; - -#define _RUN_BENCH(R, ARG, EL) \ - std::cout << ARG(BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL))(BOOST_PP_SEQ_ELEM(1, EL), BOOST_PP_SEQ_ELEM(1, EL)), \ - BOOST_PP_SEQ_ELEM(2, EL), tries) \ - << " "; - - BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES); - std::cout << endl; - BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES); - std::cout << endl; - - return 0; -} diff --git a/bench/basicbenchmark.h b/bench/basicbenchmark.h deleted file mode 100644 index 61ad15336..000000000 --- a/bench/basicbenchmark.h +++ /dev/null @@ -1,54 +0,0 @@ - -#ifndef EIGEN_BENCH_BASICBENCH_H -#define EIGEN_BENCH_BASICBENCH_H - -enum { LazyEval, EarlyEval, OmpEval }; - -template -void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline)); - -template -void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) { - for (int a = 0; a < iterations; a++) { - if (Mode == LazyEval) { - asm("#begin_bench_loop LazyEval"); - if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize"); - m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); - } else if (Mode == OmpEval) { - asm("#begin_bench_loop OmpEval"); - if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize"); - m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); - } else { - asm("#begin_bench_loop EarlyEval"); - if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize"); - m = I + 0.00005 * (m + m * m); - } - asm("#end_bench_loop"); - } -} - -template -double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline)); - -template -double benchBasic(const MatrixType& mat, int iterations, int tries) { - const int rows = mat.rows(); - const int cols = mat.cols(); - - MatrixType I(rows, cols); - MatrixType m(rows, cols); - - initMatrix_identity(I); - - Eigen::BenchTimer timer; - for (uint t = 0; t < tries; ++t) { - initMatrix_random(m); - timer.start(); - benchBasic_loop(I, m, iterations); - timer.stop(); - cerr << m; - } - return timer.value(); -}; - -#endif // EIGEN_BENCH_BASICBENCH_H diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp deleted file mode 100644 index a57966e8c..000000000 --- a/bench/benchBlasGemm.cpp +++ /dev/null @@ -1,199 +0,0 @@ -// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas -// possible options: -// -DEIGEN_DONT_VECTORIZE -// -msse2 - -// #define EIGEN_DEFAULT_TO_ROW_MAJOR -#define _FLOAT - -#include - -#include -#include "BenchTimer.h" - -// include the BLAS headers -extern "C" { -#include -} -#include - -#ifdef _FLOAT -typedef float Scalar; -#define CBLAS_GEMM cblas_sgemm -#else -typedef double Scalar; -#define CBLAS_GEMM cblas_dgemm -#endif - -typedef Eigen::Matrix MyMatrix; -void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); -void check_product(int M, int N, int K); -void check_product(void); - -int main(int argc, char* argv[]) { -// disable SSE exceptions -#ifdef __GNUC__ - { - int aux; - asm("stmxcsr %[aux] \n\t" - "orl $32832, %[aux] \n\t" - "ldmxcsr %[aux] \n\t" - : - : [aux] "m"(aux)); - } -#endif - - int nbtries = 1, nbloops = 1, M, N, K; - - if (argc == 2) { - if (std::string(argv[1]) == "check") - check_product(); - else - M = N = K = atoi(argv[1]); - } else if ((argc == 3) && (std::string(argv[1]) == "auto")) { - M = N = K = atoi(argv[2]); - nbloops = 1000000000 / (M * M * M); - if (nbloops < 1) nbloops = 1; - nbtries = 6; - } else if (argc == 4) { - M = N = K = atoi(argv[1]); - nbloops = atoi(argv[2]); - nbtries = atoi(argv[3]); - } else if (argc == 6) { - M = atoi(argv[1]); - N = atoi(argv[2]); - K = atoi(argv[3]); - nbloops = atoi(argv[4]); - nbtries = atoi(argv[5]); - } else { - std::cout << "Usage: " << argv[0] << " size \n"; - std::cout << "Usage: " << argv[0] << " auto size\n"; - std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; - std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; - std::cout << "Usage: " << argv[0] << " check\n"; - std::cout << "Options:\n"; - std::cout << " size unique size of the 2 matrices (integer)\n"; - std::cout << " auto automatically set the number of repetitions and tries\n"; - std::cout << " nbloops number of times the GEMM routines is executed\n"; - std::cout << " nbtries number of times the loop is benched (return the best try)\n"; - std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n"; - std::cout << " check check eigen product using cblas as a reference\n"; - exit(1); - } - - double nbmad = double(M) * double(N) * double(K) * double(nbloops); - - if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n"; - - Scalar alpha, beta; - MyMatrix ma(M, K), mb(K, N), mc(M, N); - ma = MyMatrix::Random(M, K); - mb = MyMatrix::Random(K, N); - mc = MyMatrix::Random(M, N); - - Eigen::BenchTimer timer; - - // we simply compute c += a*b, so: - alpha = 1; - beta = 1; - - // bench cblas - // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B); - if (!(std::string(argv[1]) == "auto")) { - timer.reset(); - for (uint k = 0; k < nbtries; ++k) { - timer.start(); - for (uint j = 0; j < nbloops; ++j) -#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR - CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, - mc.data(), N); -#else - CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, - mc.data(), M); -#endif - timer.stop(); - } - if (!(std::string(argv[1]) == "auto")) - std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n"; - else - std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n"; - } - - // clear - ma = MyMatrix::Random(M, K); - mb = MyMatrix::Random(K, N); - mc = MyMatrix::Random(M, N); - - // eigen - // if (!(std::string(argv[1])=="auto")) - { - timer.reset(); - for (uint k = 0; k < nbtries; ++k) { - timer.start(); - bench_eigengemm(mc, ma, mb, nbloops); - timer.stop(); - } - if (!(std::string(argv[1]) == "auto")) - std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n"; - else - std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n"; - } - - std::cout << "l1: " << Eigen::l1CacheSize() << std::endl; - std::cout << "l2: " << Eigen::l2CacheSize() << std::endl; - - return 0; -} - -using namespace Eigen; - -void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) { - for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb; -} - -#define MYVERIFY(A, M) \ - if (!(A)) { \ - std::cout << "FAIL: " << M << "\n"; \ - } -void check_product(int M, int N, int K) { - MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N); - ma = MyMatrix::Random(M, K); - mb = MyMatrix::Random(K, N); - maT = ma.transpose(); - mbT = mb.transpose(); - mc = MyMatrix::Random(M, N); - - MyMatrix::Scalar eps = 1e-4; - - meigen = mref = mc; - CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M); - meigen += ma * mb; - MYVERIFY(meigen.isApprox(mref, eps), ". * ."); - - meigen = mref = mc; - CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); - meigen += maT.transpose() * mb; - MYVERIFY(meigen.isApprox(mref, eps), "T * ."); - - meigen = mref = mc; - CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); - meigen += (maT.transpose()) * (mbT.transpose()); - MYVERIFY(meigen.isApprox(mref, eps), "T * T"); - - meigen = mref = mc; - CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); - meigen += ma * mbT.transpose(); - MYVERIFY(meigen.isApprox(mref, eps), ". * T"); -} - -void check_product(void) { - int M, N, K; - for (uint i = 0; i < 1000; ++i) { - M = internal::random(1, 64); - N = internal::random(1, 768); - K = internal::random(1, 768); - M = (0 + M) * 1; - std::cout << M << " x " << N << " x " << K << "\n"; - check_product(M, N, K); - } -} diff --git a/bench/benchCholesky.cpp b/bench/benchCholesky.cpp deleted file mode 100644 index 3d6655b53..000000000 --- a/bench/benchCholesky.cpp +++ /dev/null @@ -1,124 +0,0 @@ -// g++ -DNDEBUG -O3 -I.. benchCholesky.cpp -o benchCholesky && ./benchCholesky -// options: -// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3 -// -DEIGEN_DONT_VECTORIZE -// -msse2 -// -DREPEAT=100 -// -DTRIES=10 -// -DSCALAR=double - -#include - -#include -#include -#include -using namespace Eigen; - -#ifndef REPEAT -#define REPEAT 10000 -#endif - -#ifndef TRIES -#define TRIES 10 -#endif - -typedef float Scalar; - -template -__attribute__((noinline)) void benchLLT(const MatrixType& m) { - int rows = m.rows(); - int cols = m.cols(); - - double cost = 0; - for (int j = 0; j < rows; ++j) { - int r = std::max(rows - j - 1, 0); - cost += 2 * (r * j + r + j); - } - - int repeats = (REPEAT * 1000) / (rows * rows); - - typedef typename MatrixType::Scalar Scalar; - typedef Matrix SquareMatrixType; - - MatrixType a = MatrixType::Random(rows, cols); - SquareMatrixType covMat = a * a.adjoint(); - - BenchTimer timerNoSqrt, timerSqrt; - - Scalar acc = 0; - int r = internal::random(0, covMat.rows() - 1); - int c = internal::random(0, covMat.cols() - 1); - for (int t = 0; t < TRIES; ++t) { - timerNoSqrt.start(); - for (int k = 0; k < repeats; ++k) { - LDLT cholnosqrt(covMat); - acc += cholnosqrt.matrixL().coeff(r, c); - } - timerNoSqrt.stop(); - } - - for (int t = 0; t < TRIES; ++t) { - timerSqrt.start(); - for (int k = 0; k < repeats; ++k) { - LLT chol(covMat); - acc += chol.matrixL().coeff(r, c); - } - timerSqrt.stop(); - } - - if (MatrixType::RowsAtCompileTime == Dynamic) - std::cout << "dyn "; - else - std::cout << "fixed "; - std::cout << covMat.rows() << " \t" << (timerNoSqrt.best()) / repeats << "s " - << "(" << 1e-9 * cost * repeats / timerNoSqrt.best() << " GFLOPS)\t" << (timerSqrt.best()) / repeats << "s " - << "(" << 1e-9 * cost * repeats / timerSqrt.best() << " GFLOPS)\n"; - -#ifdef BENCH_GSL - if (MatrixType::RowsAtCompileTime == Dynamic) { - timerSqrt.reset(); - - gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols()); - gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols()); - - eiToGsl(covMat, &gslCovMat); - for (int t = 0; t < TRIES; ++t) { - timerSqrt.start(); - for (int k = 0; k < repeats; ++k) { - gsl_matrix_memcpy(gslCopy, gslCovMat); - gsl_linalg_cholesky_decomp(gslCopy); - acc += gsl_matrix_get(gslCopy, r, c); - } - timerSqrt.stop(); - } - - std::cout << " | \t" << timerSqrt.value() * REPEAT / repeats << "s"; - - gsl_matrix_free(gslCovMat); - } -#endif - std::cout << "\n"; - // make sure the compiler does not optimize too much - if (acc == 123) std::cout << acc; -} - -int main(int argc, char* argv[]) { - const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 1500, 0}; - std::cout << "size LDLT LLT"; - // #ifdef BENCH_GSL - // std::cout << " GSL (standard + double + ATLAS) "; - // #endif - std::cout << "\n"; - for (int i = 0; dynsizes[i] > 0; ++i) benchLLT(Matrix(dynsizes[i], dynsizes[i])); - - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - benchLLT(Matrix()); - return 0; -} diff --git a/bench/benchEigenSolver.cpp b/bench/benchEigenSolver.cpp deleted file mode 100644 index 839877729..000000000 --- a/bench/benchEigenSolver.cpp +++ /dev/null @@ -1,192 +0,0 @@ - -// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp -o benchEigenSolver && ./benchEigenSolver -// options: -// -DBENCH_GMM -// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3 -// -DEIGEN_DONT_VECTORIZE -// -msse2 -// -DREPEAT=100 -// -DTRIES=10 -// -DSCALAR=double - -#include - -#include -#include -#include -using namespace Eigen; - -#ifndef REPEAT -#define REPEAT 1000 -#endif - -#ifndef TRIES -#define TRIES 4 -#endif - -#ifndef SCALAR -#define SCALAR float -#endif - -typedef SCALAR Scalar; - -template -__attribute__((noinline)) void benchEigenSolver(const MatrixType& m) { - int rows = m.rows(); - int cols = m.cols(); - - int stdRepeats = std::max(1, int((REPEAT * 1000) / (rows * rows * sqrt(rows)))); - int saRepeats = stdRepeats * 4; - - typedef typename MatrixType::Scalar Scalar; - typedef Matrix SquareMatrixType; - - MatrixType a = MatrixType::Random(rows, cols); - SquareMatrixType covMat = a * a.adjoint(); - - BenchTimer timerSa, timerStd; - - Scalar acc = 0; - int r = internal::random(0, covMat.rows() - 1); - int c = internal::random(0, covMat.cols() - 1); - { - SelfAdjointEigenSolver ei(covMat); - for (int t = 0; t < TRIES; ++t) { - timerSa.start(); - for (int k = 0; k < saRepeats; ++k) { - ei.compute(covMat); - acc += ei.eigenvectors().coeff(r, c); - } - timerSa.stop(); - } - } - - { - EigenSolver ei(covMat); - for (int t = 0; t < TRIES; ++t) { - timerStd.start(); - for (int k = 0; k < stdRepeats; ++k) { - ei.compute(covMat); - acc += ei.eigenvectors().coeff(r, c); - } - timerStd.stop(); - } - } - - if (MatrixType::RowsAtCompileTime == Dynamic) - std::cout << "dyn "; - else - std::cout << "fixed "; - std::cout << covMat.rows() << " \t" << timerSa.value() * REPEAT / saRepeats << "s \t" - << timerStd.value() * REPEAT / stdRepeats << "s"; - -#ifdef BENCH_GMM - if (MatrixType::RowsAtCompileTime == Dynamic) { - timerSa.reset(); - timerStd.reset(); - - gmm::dense_matrix gmmCovMat(covMat.rows(), covMat.cols()); - gmm::dense_matrix eigvect(covMat.rows(), covMat.cols()); - std::vector eigval(covMat.rows()); - eiToGmm(covMat, gmmCovMat); - for (int t = 0; t < TRIES; ++t) { - timerSa.start(); - for (int k = 0; k < saRepeats; ++k) { - gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect); - acc += eigvect(r, c); - } - timerSa.stop(); - } - // the non-selfadjoint solver does not compute the eigen vectors - // for (int t=0; t 0; ++i) benchEigenSolver(Matrix(dynsizes[i], dynsizes[i])); - - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - benchEigenSolver(Matrix()); - return 0; -} diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp deleted file mode 100644 index 3c33e77ae..000000000 --- a/bench/benchFFT.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Mark Borgerding mark a borgerding net -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include - -#include -#include -#include -#include - -#include - -using namespace Eigen; -using namespace std; - -template -string nameof(); - -template <> -string nameof() { - return "float"; -} -template <> -string nameof() { - return "double"; -} -template <> -string nameof() { - return "long double"; -} - -#ifndef TYPE -#define TYPE float -#endif - -#ifndef NFFT -#define NFFT 1024 -#endif -#ifndef NDATA -#define NDATA 1000000 -#endif - -using namespace Eigen; - -template -void bench(int nfft, bool fwd, bool unscaled = false, bool halfspec = false) { - typedef typename NumTraits::Real Scalar; - typedef typename std::complex Complex; - int nits = NDATA / nfft; - vector inbuf(nfft); - vector outbuf(nfft); - FFT fft; - - if (unscaled) { - fft.SetFlag(fft.Unscaled); - cout << "unscaled "; - } - if (halfspec) { - fft.SetFlag(fft.HalfSpectrum); - cout << "halfspec "; - } - - std::fill(inbuf.begin(), inbuf.end(), 0); - fft.fwd(outbuf, inbuf); - - BenchTimer timer; - timer.reset(); - for (int k = 0; k < 8; ++k) { - timer.start(); - if (fwd) - for (int i = 0; i < nits; i++) fft.fwd(outbuf, inbuf); - else - for (int i = 0; i < nits; i++) fft.inv(inbuf, outbuf); - timer.stop(); - } - - cout << nameof() << " "; - double mflops = 5. * nfft * log2((double)nfft) / (1e6 * timer.value() / (double)nits); - if (NumTraits::IsComplex) { - cout << "complex"; - } else { - cout << "real "; - mflops /= 2; - } - - if (fwd) - cout << " fwd"; - else - cout << " inv"; - - cout << " NFFT=" << nfft << " " << (double(1e-6 * nfft * nits) / timer.value()) << " MS/s " << mflops << "MFLOPS\n"; -} - -int main(int argc, char** argv) { - bench >(NFFT, true); - bench >(NFFT, false); - bench(NFFT, true); - bench(NFFT, false); - bench(NFFT, false, true); - bench(NFFT, false, true, true); - - bench >(NFFT, true); - bench >(NFFT, false); - bench(NFFT, true); - bench(NFFT, false); - bench >(NFFT, true); - bench >(NFFT, false); - bench(NFFT, true); - bench(NFFT, false); - return 0; -} diff --git a/bench/benchGeometry.cpp b/bench/benchGeometry.cpp deleted file mode 100644 index 67c16a992..000000000 --- a/bench/benchGeometry.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include -#include -#include -#include - -using namespace Eigen; -using namespace std; - -#ifndef REPEAT -#define REPEAT 1000000 -#endif - -enum func_opt { - TV, - TMATV, - TMATVMAT, -}; - -template -struct func; - -template -struct func { - static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) { - asm(""); - return a1 * a2; - } -}; - -template -struct func { - static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) { - asm(""); - return a1.matrix() * a2; - } -}; - -template -struct func { - static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) { - asm(""); - return res(a1.matrix() * a2.matrix()); - } -}; - -template -struct test_transform { - static void run() { - arg1 a1; - a1.setIdentity(); - arg2 a2; - a2.setIdentity(); - - BenchTimer timer; - timer.reset(); - for (int k = 0; k < 10; ++k) { - timer.start(); - for (int k = 0; k < REPEAT; ++k) a2 = func::run(a1, a2); - timer.stop(); - } - cout << setprecision(4) << fixed << timer.value() << "s " << endl; - ; - } -}; - -#define run_vec(op, scalar, mode, option, vsize) \ - std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \ - { \ - typedef Transform Trans; \ - typedef Matrix Vec; \ - typedef func Func; \ - test_transform::run(); \ - } - -#define run_trans(op, scalar, mode, option) \ - std::cout << #scalar << "\t " << #mode << "\t " << #option << " "; \ - { \ - typedef Transform Trans; \ - typedef func Func; \ - test_transform::run(); \ - } - -int main(int argc, char* argv[]) { - cout << "vec = trans * vec" << endl; - run_vec(TV, float, Isometry, AutoAlign, 3); - run_vec(TV, float, Isometry, DontAlign, 3); - run_vec(TV, float, Isometry, AutoAlign, 4); - run_vec(TV, float, Isometry, DontAlign, 4); - run_vec(TV, float, Projective, AutoAlign, 4); - run_vec(TV, float, Projective, DontAlign, 4); - run_vec(TV, double, Isometry, AutoAlign, 3); - run_vec(TV, double, Isometry, DontAlign, 3); - run_vec(TV, double, Isometry, AutoAlign, 4); - run_vec(TV, double, Isometry, DontAlign, 4); - run_vec(TV, double, Projective, AutoAlign, 4); - run_vec(TV, double, Projective, DontAlign, 4); - - cout << "vec = trans.matrix() * vec" << endl; - run_vec(TMATV, float, Isometry, AutoAlign, 4); - run_vec(TMATV, float, Isometry, DontAlign, 4); - run_vec(TMATV, double, Isometry, AutoAlign, 4); - run_vec(TMATV, double, Isometry, DontAlign, 4); - - cout << "trans = trans1 * trans" << endl; - run_trans(TV, float, Isometry, AutoAlign); - run_trans(TV, float, Isometry, DontAlign); - run_trans(TV, double, Isometry, AutoAlign); - run_trans(TV, double, Isometry, DontAlign); - run_trans(TV, float, Projective, AutoAlign); - run_trans(TV, float, Projective, DontAlign); - run_trans(TV, double, Projective, AutoAlign); - run_trans(TV, double, Projective, DontAlign); - - cout << "trans = trans1.matrix() * trans.matrix()" << endl; - run_trans(TMATVMAT, float, Isometry, AutoAlign); - run_trans(TMATVMAT, float, Isometry, DontAlign); - run_trans(TMATVMAT, double, Isometry, AutoAlign); - run_trans(TMATVMAT, double, Isometry, DontAlign); -} diff --git a/bench/benchVecAdd.cpp b/bench/benchVecAdd.cpp deleted file mode 100644 index 509c64227..000000000 --- a/bench/benchVecAdd.cpp +++ /dev/null @@ -1,131 +0,0 @@ - -#include -#include -#include -using namespace Eigen; - -#ifndef SIZE -#define SIZE 50 -#endif - -#ifndef REPEAT -#define REPEAT 10000 -#endif - -typedef float Scalar; - -__attribute__((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size); -__attribute__((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c); -__attribute__((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c); - -int main(int argc, char* argv[]) { - int size = SIZE * 8; - int size2 = size * size; - Scalar* a = internal::aligned_new(size2); - Scalar* b = internal::aligned_new(size2 + 4) + 1; - Scalar* c = internal::aligned_new(size2); - - for (int i = 0; i < size; ++i) { - a[i] = b[i] = c[i] = 0; - } - - BenchTimer timer; - - timer.reset(); - for (int k = 0; k < 10; ++k) { - timer.start(); - benchVec(a, b, c, size2); - timer.stop(); - } - std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) - << " GFlops\n"; - return 0; - for (int innersize = size; innersize > 2; --innersize) { - if (size2 % innersize == 0) { - int outersize = size2 / innersize; - MatrixXf ma = Map(a, innersize, outersize); - MatrixXf mb = Map(b, innersize, outersize); - MatrixXf mc = Map(c, innersize, outersize); - timer.reset(); - for (int k = 0; k < 3; ++k) { - timer.start(); - benchVec(ma, mb, mc); - timer.stop(); - } - std::cout << innersize << " x " << outersize << " " << timer.value() << "s " - << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) << " GFlops\n"; - } - } - - VectorXf va = Map(a, size2); - VectorXf vb = Map(b, size2); - VectorXf vc = Map(c, size2); - timer.reset(); - for (int k = 0; k < 3; ++k) { - timer.start(); - benchVec(va, vb, vc); - timer.stop(); - } - std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) - << " GFlops\n"; - - return 0; -} - -void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) { - for (int k = 0; k < REPEAT; ++k) a = a + b; -} - -void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) { - for (int k = 0; k < REPEAT; ++k) a = a + b; -} - -void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) { - typedef internal::packet_traits::type PacketScalar; - const int PacketSize = internal::packet_traits::size; - PacketScalar a0, a1, a2, a3, b0, b1, b2, b3; - for (int k = 0; k < REPEAT; ++k) - for (int i = 0; i < size; i += PacketSize * 8) { - // a0 = internal::pload(&a[i]); - // b0 = internal::pload(&b[i]); - // a1 = internal::pload(&a[i+1*PacketSize]); - // b1 = internal::pload(&b[i+1*PacketSize]); - // a2 = internal::pload(&a[i+2*PacketSize]); - // b2 = internal::pload(&b[i+2*PacketSize]); - // a3 = internal::pload(&a[i+3*PacketSize]); - // b3 = internal::pload(&b[i+3*PacketSize]); - // internal::pstore(&a[i], internal::padd(a0, b0)); - // a0 = internal::pload(&a[i+4*PacketSize]); - // b0 = internal::pload(&b[i+4*PacketSize]); - // - // internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1)); - // a1 = internal::pload(&a[i+5*PacketSize]); - // b1 = internal::pload(&b[i+5*PacketSize]); - // - // internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2)); - // a2 = internal::pload(&a[i+6*PacketSize]); - // b2 = internal::pload(&b[i+6*PacketSize]); - // - // internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3)); - // a3 = internal::pload(&a[i+7*PacketSize]); - // b3 = internal::pload(&b[i+7*PacketSize]); - // - // internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0)); - // internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1)); - // internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2)); - // internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3)); - - internal::pstore(&a[i + 2 * PacketSize], internal::padd(internal::ploadu(&a[i + 2 * PacketSize]), - internal::ploadu(&b[i + 2 * PacketSize]))); - internal::pstore(&a[i + 3 * PacketSize], internal::padd(internal::ploadu(&a[i + 3 * PacketSize]), - internal::ploadu(&b[i + 3 * PacketSize]))); - internal::pstore(&a[i + 4 * PacketSize], internal::padd(internal::ploadu(&a[i + 4 * PacketSize]), - internal::ploadu(&b[i + 4 * PacketSize]))); - internal::pstore(&a[i + 5 * PacketSize], internal::padd(internal::ploadu(&a[i + 5 * PacketSize]), - internal::ploadu(&b[i + 5 * PacketSize]))); - internal::pstore(&a[i + 6 * PacketSize], internal::padd(internal::ploadu(&a[i + 6 * PacketSize]), - internal::ploadu(&b[i + 6 * PacketSize]))); - internal::pstore(&a[i + 7 * PacketSize], internal::padd(internal::ploadu(&a[i + 7 * PacketSize]), - internal::ploadu(&b[i + 7 * PacketSize]))); - } -} diff --git a/bench/bench_gemm.cpp b/bench/bench_gemm.cpp deleted file mode 100644 index e01bdb8cf..000000000 --- a/bench/bench_gemm.cpp +++ /dev/null @@ -1,393 +0,0 @@ - -// g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2 ./a.out -// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out - -// Compilation options: -// -// -DSCALAR=std::complex -// -DSCALARA=double or -DSCALARB=double -// -DHAVE_BLAS -// -DDECOUPLED -// - -#include -#include -#include - -using namespace std; -using namespace Eigen; - -#ifndef SCALAR -// #define SCALAR std::complex -#define SCALAR float -#endif - -#ifndef SCALARA -#define SCALARA SCALAR -#endif - -#ifndef SCALARB -#define SCALARB SCALAR -#endif - -#ifdef ROWMAJ_A -const int opt_A = RowMajor; -#else -const int opt_A = ColMajor; -#endif - -#ifdef ROWMAJ_B -const int opt_B = RowMajor; -#else -const int opt_B = ColMajor; -#endif - -typedef SCALAR Scalar; -typedef NumTraits::Real RealScalar; -typedef Matrix A; -typedef Matrix B; -typedef Matrix C; -typedef Matrix M; - -#ifdef HAVE_BLAS - -extern "C" { -#include -} - -static float fone = 1; -static float fzero = 0; -static double done = 1; -static double szero = 0; -static std::complex cfone = 1; -static std::complex cfzero = 0; -static std::complex cdone = 1; -static std::complex cdzero = 0; -static char notrans = 'N'; -static char trans = 'T'; -static char nonunit = 'N'; -static char lower = 'L'; -static char right = 'R'; -static int intone = 1; - -#ifdef ROWMAJ_A -const char transA = trans; -#else -const char transA = notrans; -#endif - -#ifdef ROWMAJ_B -const char transB = trans; -#else -const char transB = notrans; -#endif - -template -void blas_gemm(const A& a, const B& b, MatrixXf& c) { - int M = c.rows(); - int N = c.cols(); - int K = a.cols(); - int lda = a.outerStride(); - int ldb = b.outerStride(); - int ldc = c.rows(); - - sgemm_(&transA, &transB, &M, &N, &K, &fone, const_cast(a.data()), &lda, const_cast(b.data()), &ldb, - &fone, c.data(), &ldc); -} - -template -void blas_gemm(const A& a, const B& b, MatrixXd& c) { - int M = c.rows(); - int N = c.cols(); - int K = a.cols(); - int lda = a.outerStride(); - int ldb = b.outerStride(); - int ldc = c.rows(); - - dgemm_(&transA, &transB, &M, &N, &K, &done, const_cast(a.data()), &lda, const_cast(b.data()), &ldb, - &done, c.data(), &ldc); -} - -template -void blas_gemm(const A& a, const B& b, MatrixXcf& c) { - int M = c.rows(); - int N = c.cols(); - int K = a.cols(); - int lda = a.outerStride(); - int ldb = b.outerStride(); - int ldc = c.rows(); - - cgemm_(&transA, &transB, &M, &N, &K, (float*)&cfone, const_cast((const float*)a.data()), &lda, - const_cast((const float*)b.data()), &ldb, (float*)&cfone, (float*)c.data(), &ldc); -} - -template -void blas_gemm(const A& a, const B& b, MatrixXcd& c) { - int M = c.rows(); - int N = c.cols(); - int K = a.cols(); - int lda = a.outerStride(); - int ldb = b.outerStride(); - int ldc = c.rows(); - - zgemm_(&transA, &transB, &M, &N, &K, (double*)&cdone, const_cast((const double*)a.data()), &lda, - const_cast((const double*)b.data()), &ldb, (double*)&cdone, (double*)c.data(), &ldc); -} - -#endif - -void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) { - cr.noalias() += ar * br; - cr.noalias() -= ai * bi; - ci.noalias() += ar * bi; - ci.noalias() += ai * br; - // [cr ci] += [ar ai] * br + [-ai ar] * bi -} - -void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) { - cr.noalias() += a * br; - ci.noalias() += a * bi; -} - -void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) { - cr.noalias() += ar * b; - ci.noalias() += ai * b; -} - -template -EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) { - c.noalias() += a * b; -} - -int main(int argc, char** argv) { - std::ptrdiff_t l1 = internal::queryL1CacheSize(); - std::ptrdiff_t l2 = internal::queryTopLevelCacheSize(); - std::cout << "L1 cache size = " << (l1 > 0 ? l1 / 1024 : -1) << " KB\n"; - std::cout << "L2/L3 cache size = " << (l2 > 0 ? l2 / 1024 : -1) << " KB\n"; - typedef internal::gebp_traits Traits; - std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n"; - - int rep = 1; // number of repetitions per try - int tries = 2; // number of tries, we keep the best - - int s = 2048; - int m = s; - int n = s; - int p = s; - int cache_size1 = -1, cache_size2 = l2, cache_size3 = 0; - - bool need_help = false; - for (int i = 1; i < argc;) { - if (argv[i][0] == '-') { - if (argv[i][1] == 's') { - ++i; - s = atoi(argv[i++]); - m = n = p = s; - if (argv[i][0] != '-') { - n = atoi(argv[i++]); - p = atoi(argv[i++]); - } - } else if (argv[i][1] == 'c') { - ++i; - cache_size1 = atoi(argv[i++]); - if (argv[i][0] != '-') { - cache_size2 = atoi(argv[i++]); - if (argv[i][0] != '-') cache_size3 = atoi(argv[i++]); - } - } else if (argv[i][1] == 't') { - tries = atoi(argv[++i]); - ++i; - } else if (argv[i][1] == 'p') { - ++i; - rep = atoi(argv[i++]); - } - } else { - need_help = true; - break; - } - } - - if (need_help) { - std::cout << argv[0] << " -s -c -t -p \n"; - std::cout << " : size\n"; - std::cout << " : rows columns depth\n"; - return 1; - } - -#if EIGEN_VERSION_AT_LEAST(3, 2, 90) - if (cache_size1 > 0) setCpuCacheSizes(cache_size1, cache_size2, cache_size3); -#endif - - A a(m, p); - a.setRandom(); - B b(p, n); - b.setRandom(); - C c(m, n); - c.setOnes(); - C rc = c; - - std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n"; - std::ptrdiff_t mc(m), nc(n), kc(p); - internal::computeProductBlockingSizes(kc, mc, nc); - std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << " x " << nc << "\n"; - - C r = c; - -// check the parallel product is correct -#if defined EIGEN_HAS_OPENMP - Eigen::initParallel(); - int procs = omp_get_max_threads(); - if (procs > 1) { -#ifdef HAVE_BLAS - blas_gemm(a, b, r); -#else - omp_set_num_threads(1); - r.noalias() += a * b; - omp_set_num_threads(procs); -#endif - c.noalias() += a * b; - if (!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n"; - } -#elif defined HAVE_BLAS - blas_gemm(a, b, r); - c.noalias() += a * b; - if (!r.isApprox(c)) { - std::cout << (r - c).norm() / r.norm() << "\n"; - std::cerr << "Warning, your product is crap!\n\n"; - } -#else - if (1. * m * n * p < 2000. * 2000 * 2000) { - gemm(a, b, c); - r.noalias() += a.cast().lazyProduct(b.cast()); - if (!r.isApprox(c)) { - std::cout << (r - c).norm() / r.norm() << "\n"; - std::cerr << "Warning, your product is crap!\n\n"; - } - } -#endif - -#ifdef HAVE_BLAS - BenchTimer tblas; - c = rc; - BENCH(tblas, tries, rep, blas_gemm(a, b, c)); - std::cout << "blas cpu " << tblas.best(CPU_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / tblas.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) - << "s)\n"; - std::cout << "blas real " << tblas.best(REAL_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / tblas.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) - << "s)\n"; -#endif - - // warm start - if (b.norm() + a.norm() == 123.554) std::cout << "\n"; - - BenchTimer tmt; - c = rc; - BENCH(tmt, tries, rep, gemm(a, b, c)); - std::cout << "eigen cpu " << tmt.best(CPU_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) - << "s)\n"; - std::cout << "eigen real " << tmt.best(REAL_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) - << "s)\n"; - -#ifdef EIGEN_HAS_OPENMP - if (procs > 1) { - BenchTimer tmono; - omp_set_num_threads(1); - Eigen::setNbThreads(1); - c = rc; - BENCH(tmono, tries, rep, gemm(a, b, c)); - std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / tmono.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) - << "s)\n"; - std::cout << "eigen mono real " << tmono.best(REAL_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / tmono.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" - << tmono.total(REAL_TIMER) << "s)\n"; - std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " - << (100.0 * tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)) / procs << "%\n"; - } -#endif - - if (1. * m * n * p < 30 * 30 * 30) { - BenchTimer tmt; - c = rc; - BENCH(tmt, tries, rep, c.noalias() += a.lazyProduct(b)); - std::cout << "lazy cpu " << tmt.best(CPU_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) - << "s)\n"; - std::cout << "lazy real " << tmt.best(REAL_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) - << "s)\n"; - } - -#ifdef DECOUPLED - if ((NumTraits::IsComplex) && (NumTraits::IsComplex)) { - M ar(m, p); - ar.setRandom(); - M ai(m, p); - ai.setRandom(); - M br(p, n); - br.setRandom(); - M bi(p, n); - bi.setRandom(); - M cr(m, n); - cr.setRandom(); - M ci(m, n); - ci.setRandom(); - - BenchTimer t; - BENCH(t, tries, rep, matlab_cplx_cplx(ar, ai, br, bi, cr, ci)); - std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) - << "s)\n"; - std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) - << "s)\n"; - } - if ((!NumTraits::IsComplex) && (NumTraits::IsComplex)) { - M a(m, p); - a.setRandom(); - M br(p, n); - br.setRandom(); - M bi(p, n); - bi.setRandom(); - M cr(m, n); - cr.setRandom(); - M ci(m, n); - ci.setRandom(); - - BenchTimer t; - BENCH(t, tries, rep, matlab_real_cplx(a, br, bi, cr, ci)); - std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) - << "s)\n"; - std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) - << "s)\n"; - } - if ((NumTraits::IsComplex) && (!NumTraits::IsComplex)) { - M ar(m, p); - ar.setRandom(); - M ai(m, p); - ai.setRandom(); - M b(p, n); - b.setRandom(); - M cr(m, n); - cr.setRandom(); - M ci(m, n); - ci.setRandom(); - - BenchTimer t; - BENCH(t, tries, rep, matlab_cplx_real(ar, ai, b, cr, ci)); - std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) - << "s)\n"; - std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t" - << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) - << "s)\n"; - } -#endif - - return 0; -} diff --git a/bench/bench_move_semantics.cpp b/bench/bench_move_semantics.cpp deleted file mode 100644 index 8b7341ab8..000000000 --- a/bench/bench_move_semantics.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2020 Sebastien Boisvert -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "BenchTimer.h" -#include "../test/MovableScalar.h" - -#include - -#include -#include - -template -void copy_matrix(MatrixType& m) { - MatrixType tmp(m); - m = tmp; -} - -template -void move_matrix(MatrixType&& m) { - MatrixType tmp(std::move(m)); - m = std::move(tmp); -} - -template -void bench(const std::string& label) { - using MatrixType = Eigen::Matrix, 1, 10>; - Eigen::BenchTimer t; - - int tries = 10; - int rep = 1000000; - - MatrixType data = MatrixType::Random().eval(); - MatrixType dest; - - BENCH(t, tries, rep, copy_matrix(data)); - std::cout << label << " copy semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl; - - BENCH(t, tries, rep, move_matrix(std::move(data))); - std::cout << label << " move semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl; -} - -int main() { - bench("float"); - bench("double"); - return 0; -} diff --git a/bench/bench_multi_compilers.sh b/bench/bench_multi_compilers.sh deleted file mode 100755 index 27e91f1d5..000000000 --- a/bench/bench_multi_compilers.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -if (($# < 2)); then - echo "Usage: $0 compilerlist.txt benchfile.cpp" -else - -compilerlist=$1 -benchfile=$2 - -g=0 -source $compilerlist - -# for each compiler, compile benchfile and run the benchmark -for (( i=0 ; i /dev/null - echo "" - else - echo "compiler not found: $compiler" - fi -done - -fi diff --git a/bench/bench_norm.cpp b/bench/bench_norm.cpp deleted file mode 100644 index fb53b85af..000000000 --- a/bench/bench_norm.cpp +++ /dev/null @@ -1,342 +0,0 @@ -#include -#include -#include -#include "BenchTimer.h" -using namespace Eigen; -using namespace std; - -template -EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) { - return v.norm(); -} - -template -EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) { - return v.stableNorm(); -} - -template -EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) { - return v.hypotNorm(); -} - -template -EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) { - return v.blueNorm(); -} - -template -EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) { - typedef typename T::Scalar Scalar; - int n = v.size(); - Scalar scale = 0; - Scalar ssq = 1; - for (int i = 0; i < n; ++i) { - Scalar ax = std::abs(v.coeff(i)); - if (scale >= ax) { - ssq += numext::abs2(ax / scale); - } else { - ssq = Scalar(1) + ssq * numext::abs2(scale / ax); - scale = ax; - } - } - return scale * std::sqrt(ssq); -} - -template -EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) { - typedef typename T::Scalar Scalar; - Scalar s = v.array().abs().maxCoeff(); - return s * (v / s).norm(); -} - -template -EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) { - return v.stableNorm(); -} - -template -EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) { - int n = v.size() / 2; - for (int i = 0; i < n; ++i) v(i) = v(2 * i) * v(2 * i) + v(2 * i + 1) * v(2 * i + 1); - n = n / 2; - while (n > 0) { - for (int i = 0; i < n; ++i) v(i) = v(2 * i) + v(2 * i + 1); - n = n / 2; - } - return std::sqrt(v(0)); -} - -namespace Eigen { -namespace internal { -#ifdef EIGEN_VECTORIZE -Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a, b); } -Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a, b); } - -Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a, b); } -Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a, b); } -#endif -} // namespace internal -} // namespace Eigen - -template -EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) { -#ifndef EIGEN_VECTORIZE - return v.blueNorm(); -#else - typedef typename T::Scalar Scalar; - - static int nmax = 0; - static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr; - int n; - - if (nmax <= 0) { - int nbig, ibeta, it, iemin, iemax, iexp; - Scalar abig, eps; - - nbig = NumTraits::highest(); // largest integer - ibeta = std::numeric_limits::radix; // NumTraits::Base; // base for - // floating-point numbers - it = NumTraits::digits(); // NumTraits::Mantissa; // number of base-beta digits in - // mantissa - iemin = NumTraits::min_exponent(); // minimum exponent - iemax = NumTraits::max_exponent(); // maximum exponent - rbig = NumTraits::highest(); // largest floating-point number - - // Check the basic machine-dependent constants. - if (iemin > 1 - 2 * it || 1 + it > iemax || (it == 2 && ibeta < 5) || (it <= 4 && ibeta <= 3) || it < 2) { - eigen_assert(false && "the algorithm cannot be guaranteed on this computer"); - } - iexp = -((1 - iemin) / 2); - b1 = std::pow(ibeta, iexp); // lower boundary of midrange - iexp = (iemax + 1 - it) / 2; - b2 = std::pow(ibeta, iexp); // upper boundary of midrange - - iexp = (2 - iemin) / 2; - s1m = std::pow(ibeta, iexp); // scaling factor for lower range - iexp = -((iemax + it) / 2); - s2m = std::pow(ibeta, iexp); // scaling factor for upper range - - overfl = rbig * s2m; // overflow boundary for abig - eps = std::pow(ibeta, 1 - it); - relerr = std::sqrt(eps); // tolerance for neglecting asml - abig = 1.0 / eps - 1.0; - if (Scalar(nbig) > abig) - nmax = abig; // largest safe n - else - nmax = nbig; - } - - typedef typename internal::packet_traits::type Packet; - const int ps = internal::packet_traits::size; - Packet pasml = internal::pset1(Scalar(0)); - Packet pamed = internal::pset1(Scalar(0)); - Packet pabig = internal::pset1(Scalar(0)); - Packet ps2m = internal::pset1(s2m); - Packet ps1m = internal::pset1(s1m); - Packet pb2 = internal::pset1(b2); - Packet pb1 = internal::pset1(b1); - for (int j = 0; j < v.size(); j += ps) { - Packet ax = internal::pabs(v.template packet(j)); - Packet ax_s2m = internal::pmul(ax, ps2m); - Packet ax_s1m = internal::pmul(ax, ps1m); - Packet maskBig = internal::plt(pb2, ax); - Packet maskSml = internal::plt(ax, pb1); - - // Packet maskMed = internal::pand(maskSml,maskBig); - // Packet scale = internal::pset1(Scalar(0)); - // scale = internal::por(scale, internal::pand(maskBig,ps2m)); - // scale = internal::por(scale, internal::pand(maskSml,ps1m)); - // scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed)); - // ax = internal::pmul(ax,scale); - // ax = internal::pmul(ax,ax); - // pabig = internal::padd(pabig, internal::pand(maskBig, ax)); - // pasml = internal::padd(pasml, internal::pand(maskSml, ax)); - // pamed = internal::padd(pamed, internal::pandnot(ax,maskMed)); - - pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m, ax_s2m))); - pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m, ax_s1m))); - pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax, ax), internal::pand(maskSml, maskBig))); - } - Scalar abig = internal::predux(pabig); - Scalar asml = internal::predux(pasml); - Scalar amed = internal::predux(pamed); - if (abig > Scalar(0)) { - abig = std::sqrt(abig); - if (abig > overfl) { - eigen_assert(false && "overflow"); - return rbig; - } - if (amed > Scalar(0)) { - abig = abig / s2m; - amed = std::sqrt(amed); - } else { - return abig / s2m; - } - - } else if (asml > Scalar(0)) { - if (amed > Scalar(0)) { - abig = std::sqrt(amed); - amed = std::sqrt(asml) / s1m; - } else { - return std::sqrt(asml) / s1m; - } - } else { - return std::sqrt(amed); - } - asml = std::min(abig, amed); - abig = std::max(abig, amed); - if (asml <= abig * relerr) - return abig; - else - return abig * std::sqrt(Scalar(1) + numext::abs2(asml / abig)); -#endif -} - -#define BENCH_PERF(NRM) \ - { \ - float af = 0; \ - double ad = 0; \ - std::complex ac = 0; \ - Eigen::BenchTimer tf, td, tcf; \ - tf.reset(); \ - td.reset(); \ - tcf.reset(); \ - for (int k = 0; k < tries; ++k) { \ - tf.start(); \ - for (int i = 0; i < iters; ++i) { \ - af += NRM(vf); \ - } \ - tf.stop(); \ - } \ - for (int k = 0; k < tries; ++k) { \ - td.start(); \ - for (int i = 0; i < iters; ++i) { \ - ad += NRM(vd); \ - } \ - td.stop(); \ - } \ - /*for (int k=0; k()); - double yd = based * std::abs(internal::random()); - VectorXf vf = VectorXf::Ones(s) * yf; - VectorXd vd = VectorXd::Ones(s) * yd; - - std::cout << "reference\t" << std::sqrt(double(s)) * yf << "\t" << std::sqrt(double(s)) * yd << "\n"; - std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n"; - std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n"; - std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n"; - std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n"; - std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n"; - std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n"; - std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n"; -} - -void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) { - VectorXf vf(s); - VectorXd vd(s); - for (int i = 0; i < s; ++i) { - vf[i] = std::abs(internal::random()) * std::pow(double(10), internal::random(ef0, ef1)); - vd[i] = std::abs(internal::random()) * std::pow(double(10), internal::random(ed0, ed1)); - } - - // std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n"; - std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast()) - << "\t" << sqsumNorm(vd.cast()) << "\n"; - std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast()) - << "\t" << hypotNorm(vd.cast()) << "\n"; - std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" - << blueNorm(vd.cast()) << "\n"; - std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast()) - << "\t" << blueNorm(vd.cast()) << "\n"; - std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast()) - << "\t" << lapackNorm(vd.cast()) << "\n"; - std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" - << twopassNorm(vf.cast()) << "\t" << twopassNorm(vd.cast()) << "\n"; - // std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast()) << "\t" << bl2passNorm(vd.cast()) << "\n"; -} - -int main(int argc, char** argv) { - int tries = 10; - int iters = 100000; - double y = 1.1345743233455785456788e12 * internal::random(); - VectorXf v = VectorXf::Ones(1024) * y; - - // return 0; - int s = 10000; - double basef_ok = 1.1345743233455785456788e15; - double based_ok = 1.1345743233455785456788e95; - - double basef_under = 1.1345743233455785456788e-27; - double based_under = 1.1345743233455785456788e-303; - - double basef_over = 1.1345743233455785456788e+27; - double based_over = 1.1345743233455785456788e+302; - - std::cout.precision(20); - - std::cerr << "\nNo under/overflow:\n"; - check_accuracy(basef_ok, based_ok, s); - - std::cerr << "\nUnderflow:\n"; - check_accuracy(basef_under, based_under, s); - - std::cerr << "\nOverflow:\n"; - check_accuracy(basef_over, based_over, s); - - std::cerr << "\nVarying (over):\n"; - for (int k = 0; k < 1; ++k) { - check_accuracy_var(20, 27, 190, 302, s); - std::cout << "\n"; - } - - std::cerr << "\nVarying (under):\n"; - for (int k = 0; k < 1; ++k) { - check_accuracy_var(-27, 20, -302, -190, s); - std::cout << "\n"; - } - - y = 1; - std::cout.precision(4); - int s1 = 1024 * 1024 * 32; - std::cerr << "Performance (out of cache, " << s1 << "):\n"; - { - int iters = 1; - VectorXf vf = VectorXf::Random(s1) * y; - VectorXd vd = VectorXd::Random(s1) * y; - VectorXcf vcf = VectorXcf::Random(s1) * y; - BENCH_PERF(sqsumNorm); - BENCH_PERF(stableNorm); - BENCH_PERF(blueNorm); - BENCH_PERF(pblueNorm); - BENCH_PERF(lapackNorm); - BENCH_PERF(hypotNorm); - BENCH_PERF(twopassNorm); - BENCH_PERF(bl2passNorm); - } - - std::cerr << "\nPerformance (in cache, " << 512 << "):\n"; - { - int iters = 100000; - VectorXf vf = VectorXf::Random(512) * y; - VectorXd vd = VectorXd::Random(512) * y; - VectorXcf vcf = VectorXcf::Random(512) * y; - BENCH_PERF(sqsumNorm); - BENCH_PERF(stableNorm); - BENCH_PERF(blueNorm); - BENCH_PERF(pblueNorm); - BENCH_PERF(lapackNorm); - BENCH_PERF(hypotNorm); - BENCH_PERF(twopassNorm); - BENCH_PERF(bl2passNorm); - } -} diff --git a/bench/bench_reverse.cpp b/bench/bench_reverse.cpp deleted file mode 100644 index bf24982f3..000000000 --- a/bench/bench_reverse.cpp +++ /dev/null @@ -1,76 +0,0 @@ - -#include -#include -#include -using namespace Eigen; - -#ifndef REPEAT -#define REPEAT 100000 -#endif - -#ifndef TRIES -#define TRIES 20 -#endif - -typedef double Scalar; - -template -__attribute__((noinline)) void bench_reverse(const MatrixType& m) { - int rows = m.rows(); - int cols = m.cols(); - int size = m.size(); - - int repeats = (REPEAT * 1000) / size; - MatrixType a = MatrixType::Random(rows, cols); - MatrixType b = MatrixType::Random(rows, cols); - - BenchTimer timerB, timerH, timerV; - - Scalar acc = 0; - int r = internal::random(0, rows - 1); - int c = internal::random(0, cols - 1); - for (int t = 0; t < TRIES; ++t) { - timerB.start(); - for (int k = 0; k < repeats; ++k) { - asm("#begin foo"); - b = a.reverse(); - asm("#end foo"); - acc += b.coeff(r, c); - } - timerB.stop(); - } - - if (MatrixType::RowsAtCompileTime == Dynamic) - std::cout << "dyn "; - else - std::cout << "fixed "; - std::cout << rows << " x " << cols << " \t" << (timerB.value() * REPEAT) / repeats << "s " - << "(" << 1e-6 * size * repeats / timerB.value() << " MFLOPS)\t"; - - std::cout << "\n"; - // make sure the compiler does not optimize too much - if (acc == 123) std::cout << acc; -} - -int main(int argc, char* argv[]) { - const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 0}; - std::cout << "size no sqrt standard"; - // #ifdef BENCH_GSL - // std::cout << " GSL (standard + double + ATLAS) "; - // #endif - std::cout << "\n"; - for (uint i = 0; dynsizes[i] > 0; ++i) { - bench_reverse(Matrix(dynsizes[i], dynsizes[i])); - bench_reverse(Matrix(dynsizes[i] * dynsizes[i])); - } - // bench_reverse(Matrix()); - // bench_reverse(Matrix()); - // bench_reverse(Matrix()); - // bench_reverse(Matrix()); - // bench_reverse(Matrix()); - // bench_reverse(Matrix()); - // bench_reverse(Matrix()); - // bench_reverse(Matrix()); - // bench_reverse(Matrix()); - return 0; -} diff --git a/bench/bench_sum.cpp b/bench/bench_sum.cpp deleted file mode 100644 index a5390b2a9..000000000 --- a/bench/bench_sum.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include -using namespace Eigen; -using namespace std; - -int main() { - typedef Matrix Vec; - Vec v(SIZE); - v.setZero(); - v[0] = 1; - v[1] = 2; - for (int i = 0; i < 1000000; i++) { - v.coeffRef(0) += v.sum() * SCALAR(1e-20); - } - cout << v.sum() << endl; -} diff --git a/bench/bench_unrolling b/bench/bench_unrolling deleted file mode 100755 index 826443845..000000000 --- a/bench/bench_unrolling +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# gcc : CXX="g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000" -# icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions" -CXX=${CXX-g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value - -for ((i=1; i<16; ++i)); do - echo "Matrix size: $i x $i :" - $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null - $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null - echo " " -done diff --git a/bench/benchmark-blocking-sizes.cpp b/bench/benchmark-blocking-sizes.cpp deleted file mode 100644 index 61c0aedc7..000000000 --- a/bench/benchmark-blocking-sizes.cpp +++ /dev/null @@ -1,617 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include -#include -#include -#include -#include -#include -#include - -bool eigen_use_specific_block_size; -int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n; -#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size -#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k -#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m -#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n -#include - -#include - -using namespace Eigen; -using namespace std; - -static BenchTimer timer; - -// how many times we repeat each measurement. -// measurements are randomly shuffled - we're not doing -// all N identical measurements in a row. -const int measurement_repetitions = 3; - -// Timings below this value are too short to be accurate, -// we'll repeat measurements with more iterations until -// we get a timing above that threshold. -const float min_accurate_time = 1e-2f; - -// See --min-working-set-size command line parameter. -size_t min_working_set_size = 0; - -float max_clock_speed = 0.0f; - -// range of sizes that we will benchmark (in all 3 K,M,N dimensions) -const size_t maxsize = 2048; -const size_t minsize = 16; - -typedef MatrixXf MatrixType; -typedef MatrixType::Scalar Scalar; -typedef internal::packet_traits::type Packet; - -static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two"); -static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two"); -static_assert(maxsize > minsize, "maxsize must be larger than minsize"); -static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)"); - -// just a helper to store a triple of K,M,N sizes for matrix product -struct size_triple_t { - size_t k, m, n; - size_triple_t() : k(0), m(0), n(0) {} - size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} - size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} - size_triple_t(uint16_t compact) { - k = 1 << ((compact & 0xf00) >> 8); - m = 1 << ((compact & 0x0f0) >> 4); - n = 1 << ((compact & 0x00f) >> 0); - } -}; - -uint8_t log2_pot(size_t x) { - size_t l = 0; - while (x >>= 1) l++; - return l; -} - -// Convert between size tripes and a compact form fitting in 12 bits -// where each size, which must be a POT, is encoded as its log2, on 4 bits -// so the largest representable size is 2^15 == 32k ... big enough. -uint16_t compact_size_triple(size_t k, size_t m, size_t n) { - return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); -} - -uint16_t compact_size_triple(const size_triple_t& t) { return compact_size_triple(t.k, t.m, t.n); } - -// A single benchmark. Initially only contains benchmark params. -// Then call run(), which stores the result in the gflops field. -struct benchmark_t { - uint16_t compact_product_size; - uint16_t compact_block_size; - bool use_default_block_size; - float gflops; - benchmark_t() : compact_product_size(0), compact_block_size(0), use_default_block_size(false), gflops(0) {} - benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn) - : compact_product_size(compact_size_triple(pk, pm, pn)), - compact_block_size(compact_size_triple(bk, bm, bn)), - use_default_block_size(false), - gflops(0) {} - benchmark_t(size_t pk, size_t pm, size_t pn) - : compact_product_size(compact_size_triple(pk, pm, pn)), - compact_block_size(0), - use_default_block_size(true), - gflops(0) {} - - void run(); -}; - -ostream& operator<<(ostream& s, const benchmark_t& b) { - s << hex << b.compact_product_size << dec; - if (b.use_default_block_size) { - size_triple_t t(b.compact_product_size); - Index k = t.k, m = t.m, n = t.n; - internal::computeProductBlockingSizes(k, m, n); - s << " default(" << k << ", " << m << ", " << n << ")"; - } else { - s << " " << hex << b.compact_block_size << dec; - } - s << " " << b.gflops; - return s; -} - -// We sort first by increasing benchmark parameters, -// then by decreasing performance. -bool operator<(const benchmark_t& b1, const benchmark_t& b2) { - return b1.compact_product_size < b2.compact_product_size || - (b1.compact_product_size == b2.compact_product_size && - ((b1.compact_block_size < b2.compact_block_size || - (b1.compact_block_size == b2.compact_block_size && b1.gflops > b2.gflops)))); -} - -void benchmark_t::run() { - size_triple_t productsizes(compact_product_size); - - if (use_default_block_size) { - eigen_use_specific_block_size = false; - } else { - // feed eigen with our custom blocking params - eigen_use_specific_block_size = true; - size_triple_t blocksizes(compact_block_size); - eigen_block_size_k = blocksizes.k; - eigen_block_size_m = blocksizes.m; - eigen_block_size_n = blocksizes.n; - } - - // set up the matrix pool - - const size_t combined_three_matrices_sizes = - sizeof(Scalar) * - (productsizes.k * productsizes.m + productsizes.k * productsizes.n + productsizes.m * productsizes.n); - - // 64 M is large enough that nobody has a cache bigger than that, - // while still being small enough that everybody has this much RAM, - // so conveniently we don't need to special-case platforms here. - const size_t unlikely_large_cache_size = 64 << 20; - - const size_t working_set_size = min_working_set_size ? min_working_set_size : unlikely_large_cache_size; - - const size_t matrix_pool_size = 1 + working_set_size / combined_three_matrices_sizes; - - MatrixType* lhs = new MatrixType[matrix_pool_size]; - MatrixType* rhs = new MatrixType[matrix_pool_size]; - MatrixType* dst = new MatrixType[matrix_pool_size]; - - for (size_t i = 0; i < matrix_pool_size; i++) { - lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k); - rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n); - dst[i] = MatrixType::Zero(productsizes.m, productsizes.n); - } - - // main benchmark loop - - int iters_at_a_time = 1; - float time_per_iter = 0.0f; - size_t matrix_index = 0; - while (true) { - double starttime = timer.getCpuTime(); - for (int i = 0; i < iters_at_a_time; i++) { - dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index]; - matrix_index++; - if (matrix_index == matrix_pool_size) { - matrix_index = 0; - } - } - double endtime = timer.getCpuTime(); - - const float timing = float(endtime - starttime); - - if (timing >= min_accurate_time) { - time_per_iter = timing / iters_at_a_time; - break; - } - - iters_at_a_time *= 2; - } - - delete[] lhs; - delete[] rhs; - delete[] dst; - - gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter; -} - -void print_cpuinfo() { -#ifdef __linux__ - cout << "contents of /proc/cpuinfo:" << endl; - string line; - ifstream cpuinfo("/proc/cpuinfo"); - if (cpuinfo.is_open()) { - while (getline(cpuinfo, line)) { - cout << line << endl; - } - cpuinfo.close(); - } - cout << endl; -#elif defined __APPLE__ - cout << "output of sysctl hw:" << endl; - system("sysctl hw"); - cout << endl; -#endif -} - -template -string type_name() { - return "unknown"; -} - -template <> -string type_name() { - return "float"; -} - -template <> -string type_name() { - return "double"; -} - -struct action_t { - virtual const char* invokation_name() const { - abort(); - return nullptr; - } - virtual void run() const { abort(); } - virtual ~action_t() {} -}; - -void show_usage_and_exit(int /*argc*/, char* argv[], const vector>& available_actions) { - cerr << "usage: " << argv[0] << " [options...]" << endl << endl; - cerr << "available actions:" << endl << endl; - for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { - cerr << " " << (*it)->invokation_name() << endl; - } - cerr << endl; - cerr << "options:" << endl << endl; - cerr << " --min-working-set-size=N:" << endl; - cerr << " Set the minimum working set size to N bytes." << endl; - cerr << " This is rounded up as needed to a multiple of matrix size." << endl; - cerr << " A larger working set lowers the chance of a warm cache." << endl; - cerr << " The default value 0 means use a large enough working" << endl; - cerr << " set to likely outsize caches." << endl; - cerr << " A value of 1 (that is, 1 byte) would mean don't do anything to" << endl; - cerr << " avoid warm caches." << endl; - exit(1); -} - -float measure_clock_speed() { - cerr << "Measuring clock speed... \r" << flush; - - vector all_gflops; - for (int i = 0; i < 8; i++) { - benchmark_t b(1024, 1024, 1024); - b.run(); - all_gflops.push_back(b.gflops); - } - - sort(all_gflops.begin(), all_gflops.end()); - float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5]; - - // multiply by an arbitrary constant to discourage trying doing anything with the - // returned values besides just comparing them with each other. - float result = stable_estimate * 123.456f; - - return result; -} - -struct human_duration_t { - int seconds; - human_duration_t(int s) : seconds(s) {} -}; - -ostream& operator<<(ostream& s, const human_duration_t& d) { - int remainder = d.seconds; - if (remainder > 3600) { - int hours = remainder / 3600; - s << hours << " h "; - remainder -= hours * 3600; - } - if (remainder > 60) { - int minutes = remainder / 60; - s << minutes << " min "; - remainder -= minutes * 60; - } - if (d.seconds < 600) { - s << remainder << " s"; - } - return s; -} - -const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data"; - -void serialize_benchmarks(const char* filename, const vector& benchmarks, size_t first_benchmark_to_run) { - FILE* file = fopen(filename, "w"); - if (!file) { - cerr << "Could not open file " << filename << " for writing." << endl; - cerr << "Do you have write permissions on the current working directory?" << endl; - exit(1); - } - size_t benchmarks_vector_size = benchmarks.size(); - fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file); - fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file); - fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file); - fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file); - fclose(file); -} - -bool deserialize_benchmarks(const char* filename, vector& benchmarks, size_t& first_benchmark_to_run) { - FILE* file = fopen(filename, "r"); - if (!file) { - return false; - } - if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) { - return false; - } - size_t benchmarks_vector_size = 0; - if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) { - return false; - } - if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) { - return false; - } - benchmarks.resize(benchmarks_vector_size); - if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) { - return false; - } - unlink(filename); - return true; -} - -void try_run_some_benchmarks(vector& benchmarks, double time_start, size_t& first_benchmark_to_run) { - if (first_benchmark_to_run == benchmarks.size()) { - return; - } - - double time_last_progress_update = 0; - double time_last_clock_speed_measurement = 0; - double time_now = 0; - - size_t benchmark_index = first_benchmark_to_run; - - while (true) { - float ratio_done = float(benchmark_index) / benchmarks.size(); - time_now = timer.getRealTime(); - - // We check clock speed every minute and at the end. - if (benchmark_index == benchmarks.size() || time_now > time_last_clock_speed_measurement + 60.0f) { - time_last_clock_speed_measurement = time_now; - - // Ensure that clock speed is as expected - float current_clock_speed = measure_clock_speed(); - - // The tolerance needs to be smaller than the relative difference between - // clock speeds that a device could operate under. - // It seems unlikely that a device would be throttling clock speeds by - // amounts smaller than 2%. - // With a value of 1%, I was getting within noise on a Sandy Bridge. - const float clock_speed_tolerance = 0.02f; - - if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) { - // Clock speed is now higher than we previously measured. - // Either our initial measurement was inaccurate, which won't happen - // too many times as we are keeping the best clock speed value and - // and allowing some tolerance; or an unexpected condition occurred, - // which invalidates all benchmark results collected so far. - // Either way, we better restart all over again now. - if (benchmark_index) { - cerr << "Restarting at " << 100.0f * ratio_done << " % because clock speed increased. " << endl; - } - max_clock_speed = current_clock_speed; - first_benchmark_to_run = 0; - return; - } - - bool rerun_last_tests = false; - - if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { - cerr << "Measurements completed so far: " << 100.0f * ratio_done << " % " << endl; - cerr << "Clock speed seems to be only " << current_clock_speed / max_clock_speed << " times what it used to be." - << endl; - - unsigned int seconds_to_sleep_if_lower_clock_speed = 1; - - while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { - if (seconds_to_sleep_if_lower_clock_speed > 32) { - cerr << "Sleeping longer probably won't make a difference." << endl; - cerr << "Serializing benchmarks to " << session_filename << endl; - serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run); - cerr << "Now restart this benchmark, and it should pick up where we left." << endl; - exit(2); - } - rerun_last_tests = true; - cerr << "Sleeping " << seconds_to_sleep_if_lower_clock_speed << " s... \r" - << endl; - sleep(seconds_to_sleep_if_lower_clock_speed); - current_clock_speed = measure_clock_speed(); - seconds_to_sleep_if_lower_clock_speed *= 2; - } - } - - if (rerun_last_tests) { - cerr << "Redoing the last " << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size() - << " % because clock speed had been low. " << endl; - return; - } - - // nothing wrong with the clock speed so far, so there won't be a need to rerun - // benchmarks run so far in case we later encounter a lower clock speed. - first_benchmark_to_run = benchmark_index; - } - - if (benchmark_index == benchmarks.size()) { - // We're done! - first_benchmark_to_run = benchmarks.size(); - // Erase progress info - cerr << " " << endl; - return; - } - - // Display progress info on stderr - if (time_now > time_last_progress_update + 1.0f) { - time_last_progress_update = time_now; - cerr << "Measurements... " << 100.0f * ratio_done << " %, ETA " - << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done) - << " \r" << flush; - } - - // This is where we actually run a benchmark! - benchmarks[benchmark_index].run(); - benchmark_index++; - } -} - -void run_benchmarks(vector& benchmarks) { - size_t first_benchmark_to_run; - vector deserialized_benchmarks; - bool use_deserialized_benchmarks = false; - if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) { - cerr << "Found serialized session with " << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size() - << " % already done" << endl; - if (deserialized_benchmarks.size() == benchmarks.size() && first_benchmark_to_run > 0 && - first_benchmark_to_run < benchmarks.size()) { - use_deserialized_benchmarks = true; - } - } - - if (use_deserialized_benchmarks) { - benchmarks = deserialized_benchmarks; - } else { - // not using deserialized benchmarks, starting from scratch - first_benchmark_to_run = 0; - - // Randomly shuffling benchmarks allows us to get accurate enough progress info, - // as now the cheap/expensive benchmarks are randomly mixed so they average out. - // It also means that if data is corrupted for some time span, the odds are that - // not all repetitions of a given benchmark will be corrupted. - random_shuffle(benchmarks.begin(), benchmarks.end()); - } - - for (int i = 0; i < 4; i++) { - max_clock_speed = max(max_clock_speed, measure_clock_speed()); - } - - double time_start = 0.0; - while (first_benchmark_to_run < benchmarks.size()) { - if (first_benchmark_to_run == 0) { - time_start = timer.getRealTime(); - } - try_run_some_benchmarks(benchmarks, time_start, first_benchmark_to_run); - } - - // Sort timings by increasing benchmark parameters, and decreasing gflops. - // The latter is very important. It means that we can ignore all but the first - // benchmark with given parameters. - sort(benchmarks.begin(), benchmarks.end()); - - // Collect best (i.e. now first) results for each parameter values. - vector best_benchmarks; - for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { - if (best_benchmarks.empty() || best_benchmarks.back().compact_product_size != it->compact_product_size || - best_benchmarks.back().compact_block_size != it->compact_block_size) { - best_benchmarks.push_back(*it); - } - } - - // keep and return only the best benchmarks - benchmarks = best_benchmarks; -} - -struct measure_all_pot_sizes_action_t : action_t { - virtual const char* invokation_name() const { return "all-pot-sizes"; } - virtual void run() const { - vector benchmarks; - for (int repetition = 0; repetition < measurement_repetitions; repetition++) { - for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { - for (size_t msize = minsize; msize <= maxsize; msize *= 2) { - for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { - for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) { - for (size_t mblock = minsize; mblock <= msize; mblock *= 2) { - for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) { - benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock); - } - } - } - } - } - } - } - - run_benchmarks(benchmarks); - - cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl; - for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { - cout << *it << endl; - } - } -}; - -struct measure_default_sizes_action_t : action_t { - virtual const char* invokation_name() const { return "default-sizes"; } - virtual void run() const { - vector benchmarks; - for (int repetition = 0; repetition < measurement_repetitions; repetition++) { - for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { - for (size_t msize = minsize; msize <= maxsize; msize *= 2) { - for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { - benchmarks.emplace_back(ksize, msize, nsize); - } - } - } - } - - run_benchmarks(benchmarks); - - cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl; - for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { - cout << *it << endl; - } - } -}; - -int main(int argc, char* argv[]) { - double time_start = timer.getRealTime(); - cout.precision(4); - cerr.precision(4); - - vector> available_actions; - available_actions.emplace_back(new measure_all_pot_sizes_action_t); - available_actions.emplace_back(new measure_default_sizes_action_t); - - auto action = available_actions.end(); - - if (argc <= 1) { - show_usage_and_exit(argc, argv, available_actions); - } - for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { - if (!strcmp(argv[1], (*it)->invokation_name())) { - action = it; - break; - } - } - - if (action == available_actions.end()) { - show_usage_and_exit(argc, argv, available_actions); - } - - for (int i = 2; i < argc; i++) { - if (argv[i] == strstr(argv[i], "--min-working-set-size=")) { - const char* equals_sign = strchr(argv[i], '='); - min_working_set_size = strtoul(equals_sign + 1, nullptr, 10); - } else { - cerr << "unrecognized option: " << argv[i] << endl << endl; - show_usage_and_exit(argc, argv, available_actions); - } - } - - print_cpuinfo(); - - cout << "benchmark parameters:" << endl; - cout << "pointer size: " << 8 * sizeof(void*) << " bits" << endl; - cout << "scalar type: " << type_name() << endl; - cout << "packet size: " << internal::packet_traits::size << endl; - cout << "minsize = " << minsize << endl; - cout << "maxsize = " << maxsize << endl; - cout << "measurement_repetitions = " << measurement_repetitions << endl; - cout << "min_accurate_time = " << min_accurate_time << endl; - cout << "min_working_set_size = " << min_working_set_size; - if (min_working_set_size == 0) { - cout << " (try to outsize caches)"; - } - cout << endl << endl; - - (*action)->run(); - - double time_end = timer.getRealTime(); - cerr << "Finished in " << human_duration_t(time_end - time_start) << endl; -} diff --git a/bench/benchmark.cpp b/bench/benchmark.cpp deleted file mode 100644 index 93e18b68b..000000000 --- a/bench/benchmark.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// g++ -O3 -DNDEBUG -DMATSIZE= benchmark.cpp -o benchmark && time ./benchmark - -#include - -#include - -#ifndef MATSIZE -#define MATSIZE 3 -#endif - -using namespace std; -using namespace Eigen; - -#ifndef REPEAT -#define REPEAT 40000000 -#endif - -#ifndef SCALAR -#define SCALAR double -#endif - -int main(int argc, char *argv[]) { - Matrix I = Matrix::Ones(); - Matrix m; - for (int i = 0; i < MATSIZE; i++) - for (int j = 0; j < MATSIZE; j++) { - m(i, j) = (i + MATSIZE * j); - } - asm("#begin"); - for (int a = 0; a < REPEAT; a++) { - m = Matrix::Ones() + 0.00005 * (m + (m * m)); - } - asm("#end"); - cout << m << endl; - return 0; -} diff --git a/bench/benchmarkSlice.cpp b/bench/benchmarkSlice.cpp deleted file mode 100644 index 584137b75..000000000 --- a/bench/benchmarkSlice.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX - -#include - -#include - -using namespace std; -using namespace Eigen; - -#ifndef REPEAT -#define REPEAT 10000 -#endif - -#ifndef SCALAR -#define SCALAR float -#endif - -int main(int argc, char *argv[]) { - typedef Matrix Mat; - Mat m(100, 100); - m.setRandom(); - - for (int a = 0; a < REPEAT; a++) { - int r, c, nr, nc; - r = Eigen::internal::random(0, 10); - c = Eigen::internal::random(0, 10); - nr = Eigen::internal::random(50, 80); - nc = Eigen::internal::random(50, 80); - m.block(r, c, nr, nc) += Mat::Ones(nr, nc); - m.block(r, c, nr, nc) *= SCALAR(10); - m.block(r, c, nr, nc) -= Mat::constant(nr, nc, 10); - m.block(r, c, nr, nc) /= SCALAR(10); - } - cout << m[0] << endl; - return 0; -} diff --git a/bench/benchmarkX.cpp b/bench/benchmarkX.cpp deleted file mode 100644 index eff931834..000000000 --- a/bench/benchmarkX.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// g++ -fopenmp -I .. -O3 -DNDEBUG -finline-limit=1000 benchmarkX.cpp -o b && time ./b - -#include - -#include - -using namespace std; -using namespace Eigen; - -#ifndef MATTYPE -#define MATTYPE MatrixXLd -#endif - -#ifndef MATSIZE -#define MATSIZE 400 -#endif - -#ifndef REPEAT -#define REPEAT 100 -#endif - -int main(int argc, char *argv[]) { - MATTYPE I = MATTYPE::Ones(MATSIZE, MATSIZE); - MATTYPE m(MATSIZE, MATSIZE); - for (int i = 0; i < MATSIZE; i++) - for (int j = 0; j < MATSIZE; j++) { - m(i, j) = (i + j + 1) / (MATSIZE * MATSIZE); - } - for (int a = 0; a < REPEAT; a++) { - m = I + 0.0001 * (m + m * m); - } - cout << m(0, 0) << endl; - return 0; -} diff --git a/bench/benchmarkXcwise.cpp b/bench/benchmarkXcwise.cpp deleted file mode 100644 index f0c49779b..000000000 --- a/bench/benchmarkXcwise.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX - -#include -#include - -using namespace std; -using namespace Eigen; - -#ifndef VECTYPE -#define VECTYPE VectorXLd -#endif - -#ifndef VECSIZE -#define VECSIZE 1000000 -#endif - -#ifndef REPEAT -#define REPEAT 1000 -#endif - -int main(int argc, char *argv[]) { - VECTYPE I = VECTYPE::Ones(VECSIZE); - VECTYPE m(VECSIZE, 1); - for (int i = 0; i < VECSIZE; i++) { - m[i] = 0.1 * i / VECSIZE; - } - for (int a = 0; a < REPEAT; a++) { - m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m / 4); - } - cout << m[0] << endl; - return 0; -} diff --git a/bench/benchmark_aocl.cpp b/bench/benchmark_aocl.cpp deleted file mode 100644 index 33d7af217..000000000 --- a/bench/benchmark_aocl.cpp +++ /dev/null @@ -1,362 +0,0 @@ -/* - * benchmark_aocl.cpp - AOCL Performance Benchmark Suite for Eigen - * - * Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * Description: - * ------------ - * This benchmark suite evaluates the performance of Eigen mathematical - * operations when integrated with AMD Optimizing CPU Libraries (AOCL). It - * tests: - * - * 1. Vector Math Operations: Transcendental functions (exp, sin, cos, sqrt, - * log, etc.) using AOCL Vector Math Library (VML) for optimized - * double-precision operations - * - * 2. Matrix Operations: BLAS Level-3 operations (DGEMM) using AOCL BLAS library - * with support for both single-threaded and multithreaded execution - * - * 3. Linear Algebra: LAPACK operations (eigenvalue decomposition) using - * libflame - * - * 4. Real-world Scenarios: Financial risk computation simulating covariance - * matrix calculations and eigenvalue analysis for portfolio optimization - * - * The benchmark automatically detects AOCL configuration and adjusts test - * execution accordingly, providing performance comparisons between standard - * Eigen operations and AOCL-accelerated implementations. - * - * Compilation: - * ------------ - * # Using AOCC compiler (recommended for best AOCL compatibility): - * clang++ -O3 -g -DEIGEN_USE_AOCL_ALL -I - * -I${AOCL_ROOT}/include \ - * -Wno-parentheses src/benchmark_aocl.cpp -L${AOCL_ROOT}/lib \ - * -lamdlibm -lm -lblis -lflame -lpthread -lrt -pthread \ - * -o build/eigen_aocl_benchmark - * - * # Alternative: Using GCC with proper library paths: - * g++ -O3 -g -DEIGEN_USE_AOCL_ALL -I - * -I${AOCL_ROOT}/include \ - * -Wno-parentheses src/benchmark_aocl.cpp -L${AOCL_ROOT}/lib \ - * -lamdlibm -lm -lblis -lflame -lpthread -lrt \ - * -o build/eigen_aocl_benchmark - * - * # For multithreaded BLIS support: - * clang++ -O3 -g -fopenmp -DEIGEN_USE_AOCL_MT -I \ - * -I${AOCL_ROOT}/include -Wno-parentheses src/benchmark_aocl.cpp \ - * -L${AOCL_ROOT}/lib -lamdlibm -lm -lblis-mt -lflame -lpthread -lrt \ - * -o build/eigen_aocl_benchmark_mt - * - * Usage: - * ------ - * export AOCL_ROOT=/path/to/aocl/installation - * export LD_LIBRARY_PATH=$AOCL_ROOT/lib:$LD_LIBRARY_PATH - * ./build/eigen_aocl_benchmark - * - * Developer: - * ---------- - * Name: Sharad Saurabh Bhaskar - * Email: shbhaska@amd.com - * Organization: Advanced Micro Devices, Inc. - */ - -#include -#include -#include -#include -#include - -// Simple - just include Eigen headers -#include -#include -#include - -// Only include CBLAS if AOCL BLIS is available -#ifdef EIGEN_USE_AOCL_ALL -#include -#endif - -using namespace std; -using namespace std::chrono; -using namespace Eigen; - -void benchmarkVectorMath(int size) { - VectorXd v = VectorXd::LinSpaced(size, 0.1, 10.0); - VectorXd result(size); - double elapsed_ms = 0; - - cout << "\n--- Vector Math Benchmark (size = " << size << ") ---" << endl; - - auto start = high_resolution_clock::now(); - result = v.array().exp(); - auto end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "exp() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().sin(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "sin() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().cos(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "cos() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().sqrt(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "sqrt() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().cbrt(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "cbrt() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().abs(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "abs() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().log(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "log() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().log10(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "log10() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().exp2(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "exp2() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().asin(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "asin() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().sinh(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "sinh() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().acos(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "acos() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().cosh(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "cosh() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().tan(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "tan() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().atan(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "atan() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().tanh(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "tanh() time: " << elapsed_ms << " ms" << endl; - - VectorXd v2 = VectorXd::Random(size); - start = high_resolution_clock::now(); - result = v.array() + v2.array(); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "add() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().pow(2.0); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "pow() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().max(v2.array()); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "max() time: " << elapsed_ms << " ms" << endl; - - start = high_resolution_clock::now(); - result = v.array().min(v2.array()); - end = high_resolution_clock::now(); - elapsed_ms = duration_cast(end - start).count(); - cout << "min() time: " << elapsed_ms << " ms" << endl; -} - -// Function to benchmark BLAS operation: Matrix multiplication. -void benchmarkMatrixMultiplication(int matSize) { - cout << "\n--- BLIS-st DGEMM Benchmark (" << matSize << " x " << matSize - << ") ---" << endl; - - MatrixXd A = MatrixXd::Random(matSize, matSize); - MatrixXd B = MatrixXd::Random(matSize, matSize); - MatrixXd C(matSize, matSize); - - auto start = high_resolution_clock::now(); - C = A * B; - auto end = high_resolution_clock::now(); - double elapsed_ms = duration_cast(end - start).count(); - cout << "Matrix multiplication time: " << elapsed_ms << " ms" << endl; -} - -// Benchmark BLIS directly using its CBLAS interface if available. -void benchmarkBlisMultithreaded(int matSize, int numThreads) { -#if defined(EIGEN_AOCL_USE_BLIS_MT) - cout << "\n--- BLIS-mt DGEMM Benchmark (" << matSize << " x " << matSize - << ", threads=" << numThreads << ") ---" << endl; - vector A(matSize * matSize); - vector B(matSize * matSize); - vector C(matSize * matSize); - for (auto &v : A) - v = static_cast(rand()) / RAND_MAX; - for (auto &v : B) - v = static_cast(rand()) / RAND_MAX; - double alpha = 1.0, beta = 0.0; - string th = to_string(numThreads); - setenv("BLIS_NUM_THREADS", th.c_str(), 1); - auto start = high_resolution_clock::now(); - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, matSize, matSize, - matSize, alpha, A.data(), matSize, B.data(), matSize, beta, - C.data(), matSize); - auto end = high_resolution_clock::now(); - double elapsed_ms = duration_cast(end - start).count(); - cout << "BLIS dgemm time: " << elapsed_ms << " ms" << endl; -#else - (void)matSize; - (void)numThreads; - cout << "\nBLIS multithreaded support not enabled." << endl; -#endif -} - -// Function to benchmark LAPACK operation: Eigenvalue decomposition. -void benchmarkEigenDecomposition(int matSize) { - cout << "\n--- Eigenvalue Decomposition Benchmark (Matrix Size: " << matSize - << " x " << matSize << ") ---" << endl; - MatrixXd M = MatrixXd::Random(matSize, matSize); - // Make matrix symmetric (necessary for eigenvalue decomposition of - // self-adjoint matrices) - M = (M + M.transpose()) * 0.5; - - SelfAdjointEigenSolver eigensolver; - auto start = high_resolution_clock::now(); - eigensolver.compute(M); - auto end = high_resolution_clock::now(); - double elapsed_ms = duration_cast(end - start).count(); - if (eigensolver.info() == Success) { - cout << "Eigenvalue decomposition time: " << elapsed_ms << " ms" << endl; - } else { - cout << "Eigenvalue decomposition failed." << endl; - } -} - -// Function simulating a real-world FSI risk computation scenario. -// Example: Compute covariance matrix from simulated asset returns, then perform -// eigenvalue decomposition. -void benchmarkFSIRiskComputation(int numPeriods, int numAssets) { - cout << "\n--- FSI Risk Computation Benchmark ---" << endl; - cout << "Simulating " << numPeriods << " periods for " << numAssets - << " assets." << endl; - - // Simulate asset returns: each column represents an asset's returns. - MatrixXd returns = MatrixXd::Random(numPeriods, numAssets); - - // Compute covariance matrix: cov = (returns^T * returns) / (numPeriods - 1) - auto start = high_resolution_clock::now(); - MatrixXd cov = (returns.transpose() * returns) / (numPeriods - 1); - auto end = high_resolution_clock::now(); - double cov_time = duration_cast(end - start).count(); - cout << "Covariance matrix computation time: " << cov_time << " ms" << endl; - - // Eigenvalue decomposition on covariance matrix. - SelfAdjointEigenSolver eigensolver; - start = high_resolution_clock::now(); - eigensolver.compute(cov); - end = high_resolution_clock::now(); - double eig_time = duration_cast(end - start).count(); - if (eigensolver.info() == Success) { - cout << "Eigenvalue decomposition (covariance) time: " << eig_time << " ms" - << endl; - cout << "Top 3 Eigenvalues: " - << eigensolver.eigenvalues().tail(3).transpose() << endl; - } else { - cout << "Eigenvalue decomposition failed." << endl; - } -} - -int main() { - cout << "=== AOCL Benchmark for Eigen on AMD Platforms ===" << endl; - cout << "Developer: Sharad Saurabh Bhaskar (shbhaska@amd.com)" << endl; - cout << "Organization: Advanced Micro Devices, Inc." << endl; - cout << "License: Mozilla Public License 2.0" << endl << endl; - - // Print AOCL configuration -#ifdef EIGEN_USE_AOCL_MT - cout << "AOCL Mode: MULTITHREADED (MT)" << endl; - cout << "Features: Multithreaded BLIS, AOCL VML, LAPACK" << endl; -#elif defined(EIGEN_USE_AOCL_ALL) - cout << "AOCL Mode: SINGLE-THREADED (ALL)" << endl; - cout << "Features: Single-threaded BLIS, AOCL VML, LAPACK" << endl; -#else - cout << "AOCL Mode: DISABLED" << endl; - cout << "Using standard Eigen implementation" << endl; -#endif - cout << "Hardware threads available: " << thread::hardware_concurrency() << endl << endl; - - // Benchmark vector math functions with varying vector sizes. - vector vectorSizes = {5000000, 10000000, 50000000}; - for (int size : vectorSizes) { - benchmarkVectorMath(size); - } - - // Benchmark matrix multiplication for varying sizes. - vector matrixSizes = {1024}; - for (int msize : matrixSizes) { - benchmarkMatrixMultiplication(msize); -#if defined(EIGEN_AOCL_USE_BLIS_MT) - benchmarkBlisMultithreaded(msize, thread::hardware_concurrency()); -#endif - } - - // Benchmark LAPACK: Eigenvalue Decomposition. - for (int msize : matrixSizes) { - benchmarkEigenDecomposition(msize); - } - - // Benchmark a complex FSI risk computation scenario. - // For example, simulate 10,000 time periods (days) for 500 assets. - benchmarkFSIRiskComputation(10000, 500); - - cout << "\n=== Benchmark Complete ===" << endl; - return 0; -} diff --git a/bench/benchmark_suite b/bench/benchmark_suite deleted file mode 100755 index 3f21d3661..000000000 --- a/bench/benchmark_suite +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -CXX=${CXX-g++} # default value unless caller has defined CXX -echo "Fixed size 3x3, column-major, -DNDEBUG" -$CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null -echo "Fixed size 3x3, column-major, with asserts" -$CXX -O3 -I .. benchmark.cpp -o benchmark && time ./benchmark >/dev/null -echo "Fixed size 3x3, row-major, -DNDEBUG" -$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null -echo "Fixed size 3x3, row-major, with asserts" -$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmark.cpp -o benchmark && time ./benchmark >/dev/null -echo "Dynamic size 20x20, column-major, -DNDEBUG" -$CXX -O3 -I .. -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null -echo "Dynamic size 20x20, column-major, with asserts" -$CXX -O3 -I .. benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null -echo "Dynamic size 20x20, row-major, -DNDEBUG" -$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null -echo "Dynamic size 20x20, row-major, with asserts" -$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null diff --git a/bench/btl/CMakeLists.txt b/bench/btl/CMakeLists.txt deleted file mode 100644 index 42094e867..000000000 --- a/bench/btl/CMakeLists.txt +++ /dev/null @@ -1,107 +0,0 @@ -project(BTL) - -cmake_minimum_required(VERSION 2.6.2) - -set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${Eigen_SOURCE_DIR}/cmake) -include(MacroOptionalAddSubdirectory) - -option(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF) - -set(CMAKE_INCLUDE_CURRENT_DIR ON) - -string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER}) -if(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) - set(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}") - set(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}") - if(BTL_NOVEC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") - endif(BTL_NOVEC) -endif(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) - -if(MSVC) - set(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG") -# set(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") - if(BTL_NOVEC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") - endif(BTL_NOVEC) -endif(MSVC) - -if(IS_ICPC) - set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}") - set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}") -endif() - -include_directories( - ${PROJECT_SOURCE_DIR}/actions - ${PROJECT_SOURCE_DIR}/generic_bench - ${PROJECT_SOURCE_DIR}/generic_bench/utils - ${PROJECT_SOURCE_DIR}/libs/STL) - -# find_package(MKL) -# if (MKL_FOUND) -# add_definitions(-DHAVE_MKL) -# set(DEFAULT_LIBRARIES ${MKL_LIBRARIES}) -# endif () - -find_library(EIGEN_BTL_RT_LIBRARY rt) -# if we cannot find it easily, then we don't need it! -if(NOT EIGEN_BTL_RT_LIBRARY) - set(EIGEN_BTL_RT_LIBRARY "") -endif() - -macro(BTL_ADD_BENCH targetname) - - foreach(_current_var ${ARGN}) - set(_last_var ${_current_var}) - endforeach() - - set(_sources ${ARGN}) - list(LENGTH _sources _argn_length) - - list(REMOVE_ITEM _sources ON OFF TRUE FALSE) - - list(LENGTH _sources _src_length) - - if (${_argn_length} EQUAL ${_src_length}) - set(_last_var ON) - endif () - - option(BUILD_${targetname} "Build benchmark ${targetname}" ${_last_var}) - - if(BUILD_${targetname}) - add_executable(${targetname} ${_sources}) - add_test(${targetname} "${targetname}") - target_link_libraries(${targetname} ${DEFAULT_LIBRARIES} ${EIGEN_BTL_RT_LIBRARY}) - endif(BUILD_${targetname}) - -endmacro(BTL_ADD_BENCH) - -macro(btl_add_target_property target prop value) - - if(BUILD_${target}) - get_target_property(previous ${target} ${prop}) - if(NOT previous) - set(previous "") - endif() - set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}") - endif() - -endmacro() - -enable_testing() - -add_subdirectory(libs/eigen3) -add_subdirectory(libs/eigen2) -add_subdirectory(libs/tensors) -add_subdirectory(libs/BLAS) -add_subdirectory(libs/ublas) -add_subdirectory(libs/gmm) -add_subdirectory(libs/mtl4) -add_subdirectory(libs/blitz) -add_subdirectory(libs/tvmet) -add_subdirectory(libs/STL) -add_subdirectory(libs/blaze) - -add_subdirectory(data) - - diff --git a/bench/btl/COPYING b/bench/btl/COPYING deleted file mode 100644 index 486449cc3..000000000 --- a/bench/btl/COPYING +++ /dev/null @@ -1,340 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/bench/btl/README b/bench/btl/README deleted file mode 100644 index ebed88960..000000000 --- a/bench/btl/README +++ /dev/null @@ -1,154 +0,0 @@ -Bench Template Library - -**************************************** -Introduction : - -The aim of this project is to compare the performance -of available numerical libraries. The code is designed -as generic and modular as possible. Thus, adding new -numerical libraries or new numerical tests should -require minimal effort. - - -***************************************** - -Installation : - -BTL uses cmake / ctest: - -1 - create a build directory: - - $ mkdir build - $ cd build - -2 - configure: - - $ ccmake .. - -3 - run the bench using ctest: - - $ ctest -V - -You can run the benchmarks only on libraries matching a given regular expression: - ctest -V -R -For instance: - ctest -V -R eigen2 - -You can also select a given set of actions defining the environment variable BTL_CONFIG this way: - BTL_CONFIG="-a action1{:action2}*" ctest -V -An example: - BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2 - -Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option: - BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2 - -4 : Analyze the result. different data files (.dat) are produced in each libs directories. - If gnuplot is available, choose a directory name in the data directory to store the results and type: - $ cd data - $ mkdir my_directory - $ cp ../libs/*/*.dat my_directory - Build the data utilities in this (data) directory - make - Then you can look the raw data, - go_mean my_directory - or smooth the data first : - smooth_all.sh my_directory - go_mean my_directory_smooth - - -************************************************* - -Files and directories : - - generic_bench : all the bench sources common to all libraries - - actions : sources for different action wrappers (axpy, matrix-matrix product) to be tested. - - libs/* : bench sources specific to each tested libraries. - - machine_dep : directory used to store machine specific Makefile.in - - data : directory used to store gnuplot scripts and data analysis utilities - -************************************************** - -Principles : the code modularity is achieved by defining two concepts : - - ****** Action concept : This is a class defining which kind - of test must be performed (e.g. a matrix_vector_product). - An Action should define the following methods : - - *** Ctor using the size of the problem (matrix or vector size) as an argument - Action action(size); - *** initialize : this method initialize the calculation (e.g. initialize the matrices and vectors arguments) - action.initialize(); - *** calculate : this method actually launch the calculation to be benchmarked - action.calculate; - *** nb_op_base() : this method returns the complexity of the calculate method (allowing the mflops evaluation) - *** name() : this method returns the name of the action (std::string) - - ****** Interface concept : This is a class or namespace defining how to use a given library and - its specific containers (matrix and vector). Up to now an interface should following types - - *** real_type : kind of float to be used (float or double) - *** stl_vector : must correspond to std::vector - *** stl_matrix : must correspond to std::vector - *** gene_vector : the vector type for this interface --> e.g. (real_type *) for the C_interface - *** gene_matrix : the matrix type for this interface --> e.g. (gene_vector *) for the C_interface - - + the following common methods - - *** free_matrix(gene_matrix & A, int N) dealocation of a N sized gene_matrix A - *** free_vector(gene_vector & B) dealocation of a N sized gene_vector B - *** matrix_from_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an stl_matrix A_stl into a gene_matrix A. - The allocation of A is done in this function. - *** vector_to_stl(gene_vector & B, stl_vector & B_stl) copy the content of an stl_vector B_stl into a gene_vector B. - The allocation of B is done in this function. - *** matrix_to_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an gene_matrix A into an stl_matrix A_stl. - The size of A_STL must corresponds to the size of A. - *** vector_to_stl(gene_vector & A, stl_vector & A_stl) copy the content of an gene_vector A into an stl_vector A_stl. - The size of B_STL must corresponds to the size of B. - *** copy_matrix(gene_matrix & source, gene_matrix & cible, int N) : copy the content of source in cible. Both source - and cible must be sized NxN. - *** copy_vector(gene_vector & source, gene_vector & cible, int N) : copy the content of source in cible. Both source - and cible must be sized N. - - and the following method corresponding to the action one wants to be benchmarked : - - *** matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N) - *** matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N) - *** ata_product(const gene_matrix & A, gene_matrix & X, int N) - *** aat_product(const gene_matrix & A, gene_matrix & X, int N) - *** axpy(real coef, const gene_vector & X, gene_vector & Y, int N) - - The bench algorithm (generic_bench/bench.hh) is templated with an action itself templated with - an interface. A typical main.cpp source stored in a given library directory libs/A_LIB - looks like : - - bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ; - - this function will produce XY data file containing measured mflops as a function of the size for 50 - sizes between 10 and 10000. - - This algorithm can be adapted by providing a given Perf_Analyzer object which determines how the time - measurements must be done. For example, the X86_Perf_Analyzer use the asm rdtsc function and provides - a very fast and accurate (but less portable) timing method. The default is the Portable_Perf_Analyzer - so - - bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ; - - is equivalent to - - bench< Portable_Perf_Analyzer,AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ; - - If your system supports it we suggest to use a mixed implementation (X86_perf_Analyzer+Portable_Perf_Analyzer). - replace - bench(size_min,size_max,nb_point); - with - bench(size_min,size_max,nb_point); - in generic/bench.hh - -. - - - diff --git a/bench/btl/actions/action_aat_product.hh b/bench/btl/actions/action_aat_product.hh deleted file mode 100644 index 2de1740ca..000000000 --- a/bench/btl/actions/action_aat_product.hh +++ /dev/null @@ -1,118 +0,0 @@ -//===================================================== -// File : action_aat_product.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_AAT_PRODUCT -#define ACTION_AAT_PRODUCT -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_aat_product { - public: - // Ctor - - Action_aat_product(int size) : _size(size) { - MESSAGE("Action_aat_product Ctor"); - - // STL matrix and vector initialization - - init_matrix(A_stl, _size); - init_matrix(X_stl, _size); - init_matrix(resu_stl, _size); - - // generic matrix and vector initialization - - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(X_ref, X_stl); - - Interface::matrix_from_stl(A, A_stl); - Interface::matrix_from_stl(X, X_stl); - } - - // invalidate copy ctor - - Action_aat_product(const Action_aat_product&) { - INFOS("illegal call to Action_aat_product Copy Ctor"); - exit(0); - } - - // Dtor - - ~Action_aat_product(void) { - MESSAGE("Action_aat_product Dtor"); - - // deallocation - - Interface::free_matrix(A, _size); - Interface::free_matrix(X, _size); - - Interface::free_matrix(A_ref, _size); - Interface::free_matrix(X_ref, _size); - } - - // action name - - static inline std::string name(void) { return "aat_" + Interface::name(); } - - double nb_op_base(void) { return double(_size) * double(_size) * double(_size); } - - inline void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_matrix(X_ref, X, _size); - } - - inline void calculate(void) { Interface::aat_product(A, X, _size); } - - void check_result(void) { - if (_size > 128) return; - // calculation check - - Interface::matrix_to_stl(X, resu_stl); - - STL_interface::aat_product(A_stl, X_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-6) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(1); - } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_matrix X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_matrix X; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_ata_product.hh b/bench/btl/actions/action_ata_product.hh deleted file mode 100644 index 0447ab3b2..000000000 --- a/bench/btl/actions/action_ata_product.hh +++ /dev/null @@ -1,118 +0,0 @@ -//===================================================== -// File : action_ata_product.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_ATA_PRODUCT -#define ACTION_ATA_PRODUCT -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_ata_product { - public: - // Ctor - - Action_ata_product(int size) : _size(size) { - MESSAGE("Action_ata_product Ctor"); - - // STL matrix and vector initialization - - init_matrix(A_stl, _size); - init_matrix(X_stl, _size); - init_matrix(resu_stl, _size); - - // generic matrix and vector initialization - - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(X_ref, X_stl); - - Interface::matrix_from_stl(A, A_stl); - Interface::matrix_from_stl(X, X_stl); - } - - // invalidate copy ctor - - Action_ata_product(const Action_ata_product&) { - INFOS("illegal call to Action_ata_product Copy Ctor"); - exit(0); - } - - // Dtor - - ~Action_ata_product(void) { - MESSAGE("Action_ata_product Dtor"); - - // deallocation - - Interface::free_matrix(A, _size); - Interface::free_matrix(X, _size); - - Interface::free_matrix(A_ref, _size); - Interface::free_matrix(X_ref, _size); - } - - // action name - - static inline std::string name(void) { return "ata_" + Interface::name(); } - - double nb_op_base(void) { return 2.0 * _size * _size * _size; } - - inline void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_matrix(X_ref, X, _size); - } - - inline void calculate(void) { Interface::ata_product(A, X, _size); } - - void check_result(void) { - if (_size > 128) return; - // calculation check - - Interface::matrix_to_stl(X, resu_stl); - - STL_interface::ata_product(A_stl, X_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-6) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(1); - } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_matrix X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_matrix X; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_atv_product.hh b/bench/btl/actions/action_atv_product.hh deleted file mode 100644 index 93c2e1c22..000000000 --- a/bench/btl/actions/action_atv_product.hh +++ /dev/null @@ -1,120 +0,0 @@ -//===================================================== -// File : action_atv_product.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_ATV_PRODUCT -#define ACTION_ATV_PRODUCT -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_atv_product { - public: - Action_atv_product(int size) : _size(size) { - MESSAGE("Action_atv_product Ctor"); - - // STL matrix and vector initialization - - init_matrix(A_stl, _size); - init_vector(B_stl, _size); - init_vector(X_stl, _size); - init_vector(resu_stl, _size); - - // generic matrix and vector initialization - - Interface::matrix_from_stl(A_ref, A_stl); - Interface::vector_from_stl(B_ref, B_stl); - Interface::vector_from_stl(X_ref, X_stl); - - Interface::matrix_from_stl(A, A_stl); - Interface::vector_from_stl(B, B_stl); - Interface::vector_from_stl(X, X_stl); - } - - // invalidate copy ctor - Action_atv_product(const Action_atv_product&) { - INFOS("illegal call to Action_atv_product Copy Ctor"); - exit(1); - } - - ~Action_atv_product(void) { - MESSAGE("Action_atv_product Dtor"); - - Interface::free_matrix(A, _size); - Interface::free_vector(B); - Interface::free_vector(X); - - Interface::free_matrix(A_ref, _size); - Interface::free_vector(B_ref); - Interface::free_vector(X_ref); - } - - static inline std::string name() { return "atv_" + Interface::name(); } - - double nb_op_base(void) { return 2.0 * _size * _size; } - - inline void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_vector(B_ref, B, _size); - Interface::copy_vector(X_ref, X, _size); - } - - BTL_DONT_INLINE void calculate(void) { - BTL_ASM_COMMENT("begin atv"); - Interface::atv_product(A, B, X, _size); - BTL_ASM_COMMENT("end atv"); - } - - void check_result(void) { - if (_size > 128) return; - Interface::vector_to_stl(X, resu_stl); - - STL_interface::atv_product(A_stl, B_stl, X_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-6) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(1); - } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_vector B_stl; - typename Interface::stl_vector X_stl; - typename Interface::stl_vector resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_vector B_ref; - typename Interface::gene_vector X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_vector B; - typename Interface::gene_vector X; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_axpby.hh b/bench/btl/actions/action_axpby.hh deleted file mode 100644 index e74cbd8cb..000000000 --- a/bench/btl/actions/action_axpby.hh +++ /dev/null @@ -1,116 +0,0 @@ -//===================================================== -// File : action_axpby.hh -// Copyright (C) 2008 Gael Guennebaud -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_AXPBY -#define ACTION_AXPBY -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_axpby { - public: - // Ctor - Action_axpby(int size) : _alpha(0.5), _beta(0.95), _size(size) { - MESSAGE("Action_axpby Ctor"); - - // STL vector initialization - init_vector(X_stl, _size); - init_vector(Y_stl, _size); - init_vector(resu_stl, _size); - - // generic matrix and vector initialization - Interface::vector_from_stl(X_ref, X_stl); - Interface::vector_from_stl(Y_ref, Y_stl); - - Interface::vector_from_stl(X, X_stl); - Interface::vector_from_stl(Y, Y_stl); - } - - // invalidate copy ctor - Action_axpby(const Action_axpby&) { - INFOS("illegal call to Action_axpby Copy Ctor"); - exit(1); - } - - // Dtor - ~Action_axpby(void) { - MESSAGE("Action_axpby Dtor"); - - // deallocation - Interface::free_vector(X_ref); - Interface::free_vector(Y_ref); - - Interface::free_vector(X); - Interface::free_vector(Y); - } - - // action name - static inline std::string name(void) { return "axpby_" + Interface::name(); } - - double nb_op_base(void) { return 3.0 * _size; } - - inline void initialize(void) { - Interface::copy_vector(X_ref, X, _size); - Interface::copy_vector(Y_ref, Y, _size); - } - - inline void calculate(void) { - BTL_ASM_COMMENT("mybegin axpby"); - Interface::axpby(_alpha, X, _beta, Y, _size); - BTL_ASM_COMMENT("myend axpby"); - } - - void check_result(void) { - if (_size > 128) return; - // calculation check - Interface::vector_to_stl(Y, resu_stl); - - STL_interface::axpby(_alpha, X_stl, _beta, Y_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(Y_stl, resu_stl); - - if (error > 1.e-6) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(2); - } - } - - private: - typename Interface::stl_vector X_stl; - typename Interface::stl_vector Y_stl; - typename Interface::stl_vector resu_stl; - - typename Interface::gene_vector X_ref; - typename Interface::gene_vector Y_ref; - - typename Interface::gene_vector X; - typename Interface::gene_vector Y; - - typename Interface::real_type _alpha; - typename Interface::real_type _beta; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_axpy.hh b/bench/btl/actions/action_axpy.hh deleted file mode 100644 index 073f36485..000000000 --- a/bench/btl/actions/action_axpy.hh +++ /dev/null @@ -1,124 +0,0 @@ -//===================================================== -// File : action_axpy.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_AXPY -#define ACTION_AXPY -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_axpy { - public: - // Ctor - - Action_axpy(int size) : _coef(1.0), _size(size) { - MESSAGE("Action_axpy Ctor"); - - // STL vector initialization - - init_vector(X_stl, _size); - init_vector(Y_stl, _size); - init_vector(resu_stl, _size); - - // generic matrix and vector initialization - - Interface::vector_from_stl(X_ref, X_stl); - Interface::vector_from_stl(Y_ref, Y_stl); - - Interface::vector_from_stl(X, X_stl); - Interface::vector_from_stl(Y, Y_stl); - } - - // invalidate copy ctor - - Action_axpy(const Action_axpy&) { - INFOS("illegal call to Action_axpy Copy Ctor"); - exit(1); - } - - // Dtor - - ~Action_axpy(void) { - MESSAGE("Action_axpy Dtor"); - - // deallocation - - Interface::free_vector(X_ref); - Interface::free_vector(Y_ref); - - Interface::free_vector(X); - Interface::free_vector(Y); - } - - // action name - - static inline std::string name(void) { return "axpy_" + Interface::name(); } - - double nb_op_base(void) { return 2.0 * _size; } - - inline void initialize(void) { - Interface::copy_vector(X_ref, X, _size); - Interface::copy_vector(Y_ref, Y, _size); - } - - inline void calculate(void) { - BTL_ASM_COMMENT("mybegin axpy"); - Interface::axpy(_coef, X, Y, _size); - BTL_ASM_COMMENT("myend axpy"); - } - - void check_result(void) { - if (_size > 128) return; - // calculation check - - Interface::vector_to_stl(Y, resu_stl); - - STL_interface::axpy(_coef, X_stl, Y_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(Y_stl, resu_stl); - - if (error > 1.e-6) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(0); - } - } - - private: - typename Interface::stl_vector X_stl; - typename Interface::stl_vector Y_stl; - typename Interface::stl_vector resu_stl; - - typename Interface::gene_vector X_ref; - typename Interface::gene_vector Y_ref; - - typename Interface::gene_vector X; - typename Interface::gene_vector Y; - - typename Interface::real_type _coef; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_cholesky.hh b/bench/btl/actions/action_cholesky.hh deleted file mode 100644 index c2ac1c20b..000000000 --- a/bench/btl/actions/action_cholesky.hh +++ /dev/null @@ -1,110 +0,0 @@ -//===================================================== -// File : action_cholesky.hh -// Copyright (C) 2008 Gael Guennebaud -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_CHOLESKY -#define ACTION_CHOLESKY -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_cholesky { - public: - // Ctor - - Action_cholesky(int size) : _size(size) { - MESSAGE("Action_cholesky Ctor"); - - // STL mat/vec initialization - init_matrix_symm(X_stl, _size); - init_matrix(C_stl, _size); - - // make sure X is invertible - for (int i = 0; i < _size; ++i) X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100; - - // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref, X_stl); - Interface::matrix_from_stl(X, X_stl); - Interface::matrix_from_stl(C, C_stl); - - _cost = 0; - for (int j = 0; j < _size; ++j) { - double r = std::max(_size - j - 1, 0); - _cost += 2 * (r * j + r + j); - } - } - - // invalidate copy ctor - - Action_cholesky(const Action_cholesky&) { - INFOS("illegal call to Action_cholesky Copy Ctor"); - exit(1); - } - - // Dtor - - ~Action_cholesky(void) { - MESSAGE("Action_cholesky Dtor"); - - // deallocation - Interface::free_matrix(X_ref, _size); - Interface::free_matrix(X, _size); - Interface::free_matrix(C, _size); - } - - // action name - - static inline std::string name(void) { return "cholesky_" + Interface::name(); } - - double nb_op_base(void) { return _cost; } - - inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - - inline void calculate(void) { Interface::cholesky(X, C, _size); } - - void check_result(void) { - // calculation check - // STL_interface::cholesky(X_stl,C_stl,_size); - // - // typename Interface::real_type error= - // STL_interface::norm_diff(C_stl,resu_stl); - // - // if (error>1.e-6){ - // INFOS("WRONG CALCULATION...residual=" << error); - // exit(0); - // } - } - - private: - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix C_stl; - - typename Interface::gene_matrix X_ref; - typename Interface::gene_matrix X; - typename Interface::gene_matrix C; - - int _size; - double _cost; -}; - -#endif diff --git a/bench/btl/actions/action_ger.hh b/bench/btl/actions/action_ger.hh deleted file mode 100644 index d46dd0a3e..000000000 --- a/bench/btl/actions/action_ger.hh +++ /dev/null @@ -1,114 +0,0 @@ - -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_GER -#define ACTION_GER -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_ger { - public: - // Ctor - BTL_DONT_INLINE Action_ger(int size) : _size(size) { - MESSAGE("Action_ger Ctor"); - - // STL matrix and vector initialization - typename Interface::stl_matrix tmp; - init_matrix(A_stl, _size); - init_vector(B_stl, _size); - init_vector(X_stl, _size); - init_vector(resu_stl, _size); - - // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(A, A_stl); - Interface::vector_from_stl(B_ref, B_stl); - Interface::vector_from_stl(B, B_stl); - Interface::vector_from_stl(X_ref, X_stl); - Interface::vector_from_stl(X, X_stl); - } - - // invalidate copy ctor - Action_ger(const Action_ger&) { - INFOS("illegal call to Action_ger Copy Ctor"); - exit(1); - } - - // Dtor - BTL_DONT_INLINE ~Action_ger(void) { - MESSAGE("Action_ger Dtor"); - Interface::free_matrix(A, _size); - Interface::free_vector(B); - Interface::free_vector(X); - Interface::free_matrix(A_ref, _size); - Interface::free_vector(B_ref); - Interface::free_vector(X_ref); - } - - // action name - static inline std::string name(void) { return "ger_" + Interface::name(); } - - double nb_op_base(void) { return 2.0 * _size * _size; } - - BTL_DONT_INLINE void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_vector(B_ref, B, _size); - Interface::copy_vector(X_ref, X, _size); - } - - BTL_DONT_INLINE void calculate(void) { - BTL_ASM_COMMENT("#begin ger"); - Interface::ger(A, B, X, _size); - BTL_ASM_COMMENT("end ger"); - } - - BTL_DONT_INLINE void check_result(void) { - // calculation check - Interface::vector_to_stl(X, resu_stl); - - STL_interface::ger(A_stl, B_stl, X_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-3) { - INFOS("WRONG CALCULATION...residual=" << error); - // exit(0); - } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_vector B_stl; - typename Interface::stl_vector X_stl; - typename Interface::stl_vector resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_vector B_ref; - typename Interface::gene_vector X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_vector B; - typename Interface::gene_vector X; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_hessenberg.hh b/bench/btl/actions/action_hessenberg.hh deleted file mode 100644 index c364bfcc4..000000000 --- a/bench/btl/actions/action_hessenberg.hh +++ /dev/null @@ -1,200 +0,0 @@ -//===================================================== -// File : action_hessenberg.hh -// Copyright (C) 2008 Gael Guennebaud -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_HESSENBERG -#define ACTION_HESSENBERG -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_hessenberg { - public: - // Ctor - - Action_hessenberg(int size) : _size(size) { - MESSAGE("Action_hessenberg Ctor"); - - // STL vector initialization - init_matrix(X_stl, _size); - - init_matrix(C_stl, _size); - init_matrix(resu_stl, _size); - - // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref, X_stl); - Interface::matrix_from_stl(X, X_stl); - Interface::matrix_from_stl(C, C_stl); - - _cost = 0; - for (int j = 0; j < _size - 2; ++j) { - double r = std::max(0, _size - j - 1); - double b = std::max(0, _size - j - 2); - _cost += 6 + 3 * b + r * r * 4 + r * _size * 4; - } - } - - // invalidate copy ctor - - Action_hessenberg(const Action_hessenberg&) { - INFOS("illegal call to Action_hessenberg Copy Ctor"); - exit(1); - } - - // Dtor - - ~Action_hessenberg(void) { - MESSAGE("Action_hessenberg Dtor"); - - // deallocation - Interface::free_matrix(X_ref, _size); - Interface::free_matrix(X, _size); - Interface::free_matrix(C, _size); - } - - // action name - - static inline std::string name(void) { return "hessenberg_" + Interface::name(); } - - double nb_op_base(void) { return _cost; } - - inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - - inline void calculate(void) { Interface::hessenberg(X, C, _size); } - - void check_result(void) { - // calculation check - Interface::matrix_to_stl(C, resu_stl); - - // STL_interface::hessenberg(X_stl,C_stl,_size); - // - // typename Interface::real_type error= - // STL_interface::norm_diff(C_stl,resu_stl); - // - // if (error>1.e-6){ - // INFOS("WRONG CALCULATION...residual=" << error); - // exit(0); - // } - } - - private: - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix C_stl; - typename Interface::stl_matrix resu_stl; - - typename Interface::gene_matrix X_ref; - typename Interface::gene_matrix X; - typename Interface::gene_matrix C; - - int _size; - double _cost; -}; - -template -class Action_tridiagonalization { - public: - // Ctor - - Action_tridiagonalization(int size) : _size(size) { - MESSAGE("Action_tridiagonalization Ctor"); - - // STL vector initialization - init_matrix(X_stl, _size); - - for (int i = 0; i < _size; ++i) { - for (int j = 0; j < i; ++j) X_stl[i][j] = X_stl[j][i]; - } - - init_matrix(C_stl, _size); - init_matrix(resu_stl, _size); - - // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref, X_stl); - Interface::matrix_from_stl(X, X_stl); - Interface::matrix_from_stl(C, C_stl); - - _cost = 0; - for (int j = 0; j < _size - 2; ++j) { - double r = std::max(0, _size - j - 1); - double b = std::max(0, _size - j - 2); - _cost += 6. + 3. * b + r * r * 8.; - } - } - - // invalidate copy ctor - - Action_tridiagonalization(const Action_tridiagonalization&) { - INFOS("illegal call to Action_tridiagonalization Copy Ctor"); - exit(1); - } - - // Dtor - - ~Action_tridiagonalization(void) { - MESSAGE("Action_tridiagonalization Dtor"); - - // deallocation - Interface::free_matrix(X_ref, _size); - Interface::free_matrix(X, _size); - Interface::free_matrix(C, _size); - } - - // action name - - static inline std::string name(void) { return "tridiagonalization_" + Interface::name(); } - - double nb_op_base(void) { return _cost; } - - inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - - inline void calculate(void) { Interface::tridiagonalization(X, C, _size); } - - void check_result(void) { - // calculation check - Interface::matrix_to_stl(C, resu_stl); - - // STL_interface::tridiagonalization(X_stl,C_stl,_size); - // - // typename Interface::real_type error= - // STL_interface::norm_diff(C_stl,resu_stl); - // - // if (error>1.e-6){ - // INFOS("WRONG CALCULATION...residual=" << error); - // exit(0); - // } - } - - private: - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix C_stl; - typename Interface::stl_matrix resu_stl; - - typename Interface::gene_matrix X_ref; - typename Interface::gene_matrix X; - typename Interface::gene_matrix C; - - int _size; - double _cost; -}; - -#endif diff --git a/bench/btl/actions/action_lu_decomp.hh b/bench/btl/actions/action_lu_decomp.hh deleted file mode 100644 index 46fad915b..000000000 --- a/bench/btl/actions/action_lu_decomp.hh +++ /dev/null @@ -1,108 +0,0 @@ -//===================================================== -// File : action_lu_decomp.hh -// Copyright (C) 2008 Gael Guennebaud -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_LU_DECOMP -#define ACTION_LU_DECOMP -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_lu_decomp { - public: - // Ctor - - Action_lu_decomp(int size) : _size(size) { - MESSAGE("Action_lu_decomp Ctor"); - - // STL vector initialization - init_matrix(X_stl, _size); - - init_matrix(C_stl, _size); - init_matrix(resu_stl, _size); - - // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref, X_stl); - Interface::matrix_from_stl(X, X_stl); - Interface::matrix_from_stl(C, C_stl); - - _cost = 2.0 * size * size * size / 3.0 + size * size; - } - - // invalidate copy ctor - - Action_lu_decomp(const Action_lu_decomp&) { - INFOS("illegal call to Action_lu_decomp Copy Ctor"); - exit(1); - } - - // Dtor - - ~Action_lu_decomp(void) { - MESSAGE("Action_lu_decomp Dtor"); - - // deallocation - Interface::free_matrix(X_ref, _size); - Interface::free_matrix(X, _size); - Interface::free_matrix(C, _size); - } - - // action name - - static inline std::string name(void) { return "complete_lu_decomp_" + Interface::name(); } - - double nb_op_base(void) { return _cost; } - - inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - - inline void calculate(void) { Interface::lu_decomp(X, C, _size); } - - void check_result(void) { - // calculation check - Interface::matrix_to_stl(C, resu_stl); - - // STL_interface::lu_decomp(X_stl,C_stl,_size); - // - // typename Interface::real_type error= - // STL_interface::norm_diff(C_stl,resu_stl); - // - // if (error>1.e-6){ - // INFOS("WRONG CALCULATION...residual=" << error); - // exit(0); - // } - } - - private: - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix C_stl; - typename Interface::stl_matrix resu_stl; - - typename Interface::gene_matrix X_ref; - typename Interface::gene_matrix X; - typename Interface::gene_matrix C; - - int _size; - double _cost; -}; - -#endif diff --git a/bench/btl/actions/action_lu_solve.hh b/bench/btl/actions/action_lu_solve.hh deleted file mode 100644 index afc640c3d..000000000 --- a/bench/btl/actions/action_lu_solve.hh +++ /dev/null @@ -1,120 +0,0 @@ -//===================================================== -// File : action_lu_solve.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_LU_SOLVE -#define ACTION_LU_SOLVE -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_lu_solve { - public: - static inline std::string name(void) { return "lu_solve_" + Interface::name(); } - - static double nb_op_base(int size) { - return 2.0 * size * size * size / 3.0; // questionable but not really important - } - - static double calculate(int nb_calc, int size) { - // STL matrix and vector initialization - - typename Interface::stl_matrix A_stl; - typename Interface::stl_vector B_stl; - typename Interface::stl_vector X_stl; - - init_matrix(A_stl, size); - init_vector(B_stl, size); - init_vector(X_stl, size); - - // generic matrix and vector initialization - - typename Interface::gene_matrix A; - typename Interface::gene_vector B; - typename Interface::gene_vector X; - - typename Interface::gene_matrix LU; - - Interface::matrix_from_stl(A, A_stl); - Interface::vector_from_stl(B, B_stl); - Interface::vector_from_stl(X, X_stl); - Interface::matrix_from_stl(LU, A_stl); - - // local variable : - - typename Interface::Pivot_Vector pivot; // pivot vector - Interface::new_Pivot_Vector(pivot, size); - - // timer utilities - - Portable_Timer chronos; - - // time measurement - - chronos.start(); - - for (int ii = 0; ii < nb_calc; ii++) { - // LU factorization - Interface::copy_matrix(A, LU, size); - Interface::LU_factor(LU, pivot, size); - - // LU solve - - Interface::LU_solve(LU, pivot, B, X, size); - } - - // Time stop - - chronos.stop(); - - double time = chronos.user_time(); - - // check result : - - typename Interface::stl_vector B_new_stl(size); - Interface::vector_to_stl(X, X_stl); - - STL_interface::matrix_vector_product(A_stl, X_stl, B_new_stl, size); - - typename Interface::real_type error = STL_interface::norm_diff(B_stl, B_new_stl); - - if (error > 1.e-5) { - INFOS("WRONG CALCULATION...residual=" << error); - STL_interface::display_vector(B_stl); - STL_interface::display_vector(B_new_stl); - exit(0); - } - - // deallocation and return time - - Interface::free_matrix(A, size); - Interface::free_vector(B); - Interface::free_vector(X); - Interface::free_Pivot_Vector(pivot); - - return time; - } -}; - -#endif diff --git a/bench/btl/actions/action_matrix_matrix_product.hh b/bench/btl/actions/action_matrix_matrix_product.hh deleted file mode 100644 index a66d47756..000000000 --- a/bench/btl/actions/action_matrix_matrix_product.hh +++ /dev/null @@ -1,124 +0,0 @@ -//===================================================== -// File : action_matrix_matrix_product.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_MATRIX_MATRIX_PRODUCT -#define ACTION_MATRIX_MATRIX_PRODUCT -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_matrix_matrix_product { - public: - // Ctor - - Action_matrix_matrix_product(int size) : _size(size) { - MESSAGE("Action_matrix_matrix_product Ctor"); - - // STL matrix and vector initialization - - init_matrix(A_stl, _size); - init_matrix(B_stl, _size); - init_matrix(X_stl, _size); - init_matrix(resu_stl, _size); - - // generic matrix and vector initialization - - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(B_ref, B_stl); - Interface::matrix_from_stl(X_ref, X_stl); - - Interface::matrix_from_stl(A, A_stl); - Interface::matrix_from_stl(B, B_stl); - Interface::matrix_from_stl(X, X_stl); - } - - // invalidate copy ctor - - Action_matrix_matrix_product(const Action_matrix_matrix_product&) { - INFOS("illegal call to Action_matrix_matrix_product Copy Ctor"); - exit(0); - } - - // Dtor - - ~Action_matrix_matrix_product(void) { - MESSAGE("Action_matrix_matrix_product Dtor"); - - // deallocation - - Interface::free_matrix(A, _size); - Interface::free_matrix(B, _size); - Interface::free_matrix(X, _size); - - Interface::free_matrix(A_ref, _size); - Interface::free_matrix(B_ref, _size); - Interface::free_matrix(X_ref, _size); - } - - // action name - - static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); } - - double nb_op_base(void) { return 2.0 * _size * _size * _size; } - - inline void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_matrix(B_ref, B, _size); - Interface::copy_matrix(X_ref, X, _size); - } - - inline void calculate(void) { Interface::matrix_matrix_product(A, B, X, _size); } - - void check_result(void) { - // calculation check - if (_size < 200) { - Interface::matrix_to_stl(X, resu_stl); - STL_interface::matrix_matrix_product(A_stl, B_stl, X_stl, _size); - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - if (error > 1.e-6) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(1); - } - } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_matrix B_stl; - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_matrix B_ref; - typename Interface::gene_matrix X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_matrix B; - typename Interface::gene_matrix X; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_matrix_matrix_product_bis.hh b/bench/btl/actions/action_matrix_matrix_product_bis.hh deleted file mode 100644 index c02dfb36f..000000000 --- a/bench/btl/actions/action_matrix_matrix_product_bis.hh +++ /dev/null @@ -1,131 +0,0 @@ -//===================================================== -// File : action_matrix_matrix_product_bis.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_MATRIX_MATRIX_PRODUCT_BIS -#define ACTION_MATRIX_MATRIX_PRODUCT_BIS -#include "utilities.h" -#include "STL_interface.hh" -#include "STL_timer.hh" -#include -#include "init_function.hh" -#include "init_vector.hh" -#include "init_matrix.hh" - -using namespace std; - -template -class Action_matrix_matrix_product_bis { - public: - static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); } - - static double nb_op_base(int size) { return 2.0 * size * size * size; } - - static double calculate(int nb_calc, int size) { - // STL matrix and vector initialization - - typename Interface::stl_matrix A_stl; - typename Interface::stl_matrix B_stl; - typename Interface::stl_matrix X_stl; - - init_matrix(A_stl, size); - init_matrix(B_stl, size); - init_matrix(X_stl, size); - - // generic matrix and vector initialization - - typename Interface::gene_matrix A_ref; - typename Interface::gene_matrix B_ref; - typename Interface::gene_matrix X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_matrix B; - typename Interface::gene_matrix X; - - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(B_ref, B_stl); - Interface::matrix_from_stl(X_ref, X_stl); - - Interface::matrix_from_stl(A, A_stl); - Interface::matrix_from_stl(B, B_stl); - Interface::matrix_from_stl(X, X_stl); - - // STL_timer utilities - - STL_timer chronos; - - // Baseline evaluation - - chronos.start_baseline(nb_calc); - - do { - Interface::copy_matrix(A_ref, A, size); - Interface::copy_matrix(B_ref, B, size); - Interface::copy_matrix(X_ref, X, size); - - // Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!! - } while (chronos.check()); - - chronos.report(true); - - // Time measurement - - chronos.start(nb_calc); - - do { - Interface::copy_matrix(A_ref, A, size); - Interface::copy_matrix(B_ref, B, size); - Interface::copy_matrix(X_ref, X, size); - - Interface::matrix_matrix_product(A, B, X, size); // here it is not commented !!!! - } while (chronos.check()); - - chronos.report(true); - - double time = chronos.calculated_time / 2000.0; - - // calculation check - - typename Interface::stl_matrix resu_stl(size); - - Interface::matrix_to_stl(X, resu_stl); - - STL_interface::matrix_matrix_product(A_stl, B_stl, X_stl, size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-6) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(1); - } - - // deallocation and return time - - Interface::free_matrix(A, size); - Interface::free_matrix(B, size); - Interface::free_matrix(X, size); - - Interface::free_matrix(A_ref, size); - Interface::free_matrix(B_ref, size); - Interface::free_matrix(X_ref, size); - - return time; - } -}; - -#endif diff --git a/bench/btl/actions/action_matrix_vector_product.hh b/bench/btl/actions/action_matrix_vector_product.hh deleted file mode 100644 index 002f87654..000000000 --- a/bench/btl/actions/action_matrix_vector_product.hh +++ /dev/null @@ -1,129 +0,0 @@ -//===================================================== -// File : action_matrix_vector_product.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_MATRIX_VECTOR_PRODUCT -#define ACTION_MATRIX_VECTOR_PRODUCT -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_matrix_vector_product { - public: - // Ctor - - BTL_DONT_INLINE Action_matrix_vector_product(int size) : _size(size) { - MESSAGE("Action_matrix_vector_product Ctor"); - - // STL matrix and vector initialization - - init_matrix(A_stl, _size); - init_vector(B_stl, _size); - init_vector(X_stl, _size); - init_vector(resu_stl, _size); - - // generic matrix and vector initialization - - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(A, A_stl); - Interface::vector_from_stl(B_ref, B_stl); - Interface::vector_from_stl(B, B_stl); - Interface::vector_from_stl(X_ref, X_stl); - Interface::vector_from_stl(X, X_stl); - } - - // invalidate copy ctor - - Action_matrix_vector_product(const Action_matrix_vector_product&) { - INFOS("illegal call to Action_matrix_vector_product Copy Ctor"); - exit(1); - } - - // Dtor - - BTL_DONT_INLINE ~Action_matrix_vector_product(void) { - MESSAGE("Action_matrix_vector_product Dtor"); - - // deallocation - - Interface::free_matrix(A, _size); - Interface::free_vector(B); - Interface::free_vector(X); - - Interface::free_matrix(A_ref, _size); - Interface::free_vector(B_ref); - Interface::free_vector(X_ref); - } - - // action name - - static inline std::string name(void) { return "matrix_vector_" + Interface::name(); } - - double nb_op_base(void) { return 2.0 * _size * _size; } - - BTL_DONT_INLINE void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_vector(B_ref, B, _size); - Interface::copy_vector(X_ref, X, _size); - } - - BTL_DONT_INLINE void calculate(void) { - BTL_ASM_COMMENT("#begin matrix_vector_product"); - Interface::matrix_vector_product(A, B, X, _size); - BTL_ASM_COMMENT("end matrix_vector_product"); - } - - BTL_DONT_INLINE void check_result(void) { - // calculation check - - Interface::vector_to_stl(X, resu_stl); - - STL_interface::matrix_vector_product(A_stl, B_stl, X_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-5) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(0); - } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_vector B_stl; - typename Interface::stl_vector X_stl; - typename Interface::stl_vector resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_vector B_ref; - typename Interface::gene_vector X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_vector B; - typename Interface::gene_vector X; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_partial_lu.hh b/bench/btl/actions/action_partial_lu.hh deleted file mode 100644 index 400e3ffe0..000000000 --- a/bench/btl/actions/action_partial_lu.hh +++ /dev/null @@ -1,108 +0,0 @@ -//===================================================== -// File : action_lu_decomp.hh -// Copyright (C) 2008 Gael Guennebaud -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_PARTIAL_LU -#define ACTION_PARTIAL_LU -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_partial_lu { - public: - // Ctor - - Action_partial_lu(int size) : _size(size) { - MESSAGE("Action_partial_lu Ctor"); - - // STL vector initialization - init_matrix(X_stl, _size); - init_matrix(C_stl, _size); - - // make sure X is invertible - for (int i = 0; i < _size; ++i) X_stl[i][i] = X_stl[i][i] * 1e2 + 1; - - // generic matrix and vector initialization - Interface::matrix_from_stl(X_ref, X_stl); - Interface::matrix_from_stl(X, X_stl); - Interface::matrix_from_stl(C, C_stl); - - _cost = 2.0 * size * size * size / 3.0 + size * size; - } - - // invalidate copy ctor - - Action_partial_lu(const Action_partial_lu&) { - INFOS("illegal call to Action_partial_lu Copy Ctor"); - exit(1); - } - - // Dtor - - ~Action_partial_lu(void) { - MESSAGE("Action_partial_lu Dtor"); - - // deallocation - Interface::free_matrix(X_ref, _size); - Interface::free_matrix(X, _size); - Interface::free_matrix(C, _size); - } - - // action name - - static inline std::string name(void) { return "partial_lu_decomp_" + Interface::name(); } - - double nb_op_base(void) { return _cost; } - - inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); } - - inline void calculate(void) { Interface::partial_lu_decomp(X, C, _size); } - - void check_result(void) { - // calculation check - // Interface::matrix_to_stl(C,resu_stl); - - // STL_interface::lu_decomp(X_stl,C_stl,_size); - // - // typename Interface::real_type error= - // STL_interface::norm_diff(C_stl,resu_stl); - // - // if (error>1.e-6){ - // INFOS("WRONG CALCULATION...residual=" << error); - // exit(0); - // } - } - - private: - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix C_stl; - - typename Interface::gene_matrix X_ref; - typename Interface::gene_matrix X; - typename Interface::gene_matrix C; - - int _size; - double _cost; -}; - -#endif diff --git a/bench/btl/actions/action_rot.hh b/bench/btl/actions/action_rot.hh deleted file mode 100644 index 7cc3c6162..000000000 --- a/bench/btl/actions/action_rot.hh +++ /dev/null @@ -1,104 +0,0 @@ - -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_ROT -#define ACTION_ROT -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_rot { - public: - // Ctor - BTL_DONT_INLINE Action_rot(int size) : _size(size) { - MESSAGE("Action_rot Ctor"); - - // STL matrix and vector initialization - typename Interface::stl_matrix tmp; - init_vector(A_stl, _size); - init_vector(B_stl, _size); - - // generic matrix and vector initialization - Interface::vector_from_stl(A_ref, A_stl); - Interface::vector_from_stl(A, A_stl); - Interface::vector_from_stl(B_ref, B_stl); - Interface::vector_from_stl(B, B_stl); - } - - // invalidate copy ctor - Action_rot(const Action_rot&) { - INFOS("illegal call to Action_rot Copy Ctor"); - exit(1); - } - - // Dtor - BTL_DONT_INLINE ~Action_rot(void) { - MESSAGE("Action_rot Dtor"); - Interface::free_vector(A); - Interface::free_vector(B); - Interface::free_vector(A_ref); - Interface::free_vector(B_ref); - } - - // action name - static inline std::string name(void) { return "rot_" + Interface::name(); } - - double nb_op_base(void) { return 6.0 * _size; } - - BTL_DONT_INLINE void initialize(void) { - Interface::copy_vector(A_ref, A, _size); - Interface::copy_vector(B_ref, B, _size); - } - - BTL_DONT_INLINE void calculate(void) { - BTL_ASM_COMMENT("#begin rot"); - Interface::rot(A, B, 0.5, 0.6, _size); - BTL_ASM_COMMENT("end rot"); - } - - BTL_DONT_INLINE void check_result(void) { - // calculation check - // Interface::vector_to_stl(X,resu_stl); - - // STL_interface::rot(A_stl,B_stl,X_stl,_size); - - // typename Interface::real_type error= - // STL_interface::norm_diff(X_stl,resu_stl); - - // if (error>1.e-3){ - // INFOS("WRONG CALCULATION...residual=" << error); - // exit(0); - // } - } - - private: - typename Interface::stl_vector A_stl; - typename Interface::stl_vector B_stl; - - typename Interface::gene_vector A_ref; - typename Interface::gene_vector B_ref; - - typename Interface::gene_vector A; - typename Interface::gene_vector B; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_symv.hh b/bench/btl/actions/action_symv.hh deleted file mode 100644 index d8b38551e..000000000 --- a/bench/btl/actions/action_symv.hh +++ /dev/null @@ -1,121 +0,0 @@ -//===================================================== -// File : action_symv.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_SYMV -#define ACTION_SYMV -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_symv { - public: - // Ctor - - BTL_DONT_INLINE Action_symv(int size) : _size(size) { - MESSAGE("Action_symv Ctor"); - - // STL matrix and vector initialization - init_matrix_symm(A_stl, _size); - init_vector(B_stl, _size); - init_vector(X_stl, _size); - init_vector(resu_stl, _size); - - // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(A, A_stl); - Interface::vector_from_stl(B_ref, B_stl); - Interface::vector_from_stl(B, B_stl); - Interface::vector_from_stl(X_ref, X_stl); - Interface::vector_from_stl(X, X_stl); - } - - // invalidate copy ctor - - Action_symv(const Action_symv&) { - INFOS("illegal call to Action_symv Copy Ctor"); - exit(1); - } - - // Dtor - BTL_DONT_INLINE ~Action_symv(void) { - Interface::free_matrix(A, _size); - Interface::free_vector(B); - Interface::free_vector(X); - Interface::free_matrix(A_ref, _size); - Interface::free_vector(B_ref); - Interface::free_vector(X_ref); - } - - // action name - - static inline std::string name(void) { return "symv_" + Interface::name(); } - - double nb_op_base(void) { return 2.0 * _size * _size; } - - BTL_DONT_INLINE void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_vector(B_ref, B, _size); - Interface::copy_vector(X_ref, X, _size); - } - - BTL_DONT_INLINE void calculate(void) { - BTL_ASM_COMMENT("#begin symv"); - Interface::symv(A, B, X, _size); - BTL_ASM_COMMENT("end symv"); - } - - BTL_DONT_INLINE void check_result(void) { - if (_size > 128) return; - // calculation check - Interface::vector_to_stl(X, resu_stl); - - STL_interface::symv(A_stl, B_stl, X_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-5) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(0); - } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_vector B_stl; - typename Interface::stl_vector X_stl; - typename Interface::stl_vector resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_vector B_ref; - typename Interface::gene_vector X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_vector B; - typename Interface::gene_vector X; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_syr2.hh b/bench/btl/actions/action_syr2.hh deleted file mode 100644 index 3355faa66..000000000 --- a/bench/btl/actions/action_syr2.hh +++ /dev/null @@ -1,118 +0,0 @@ -//===================================================== -// File : action_syr2.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_SYR2 -#define ACTION_SYR2 -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_syr2 { - public: - // Ctor - - BTL_DONT_INLINE Action_syr2(int size) : _size(size) { - // STL matrix and vector initialization - typename Interface::stl_matrix tmp; - init_matrix(A_stl, _size); - init_vector(B_stl, _size); - init_vector(X_stl, _size); - init_vector(resu_stl, _size); - - // generic matrix and vector initialization - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(A, A_stl); - Interface::vector_from_stl(B_ref, B_stl); - Interface::vector_from_stl(B, B_stl); - Interface::vector_from_stl(X_ref, X_stl); - Interface::vector_from_stl(X, X_stl); - } - - // invalidate copy ctor - Action_syr2(const Action_syr2&) { - INFOS("illegal call to Action_syr2 Copy Ctor"); - exit(1); - } - - // Dtor - BTL_DONT_INLINE ~Action_syr2(void) { - Interface::free_matrix(A, _size); - Interface::free_vector(B); - Interface::free_vector(X); - Interface::free_matrix(A_ref, _size); - Interface::free_vector(B_ref); - Interface::free_vector(X_ref); - } - - // action name - - static inline std::string name(void) { return "syr2_" + Interface::name(); } - - double nb_op_base(void) { return 2.0 * _size * _size; } - - BTL_DONT_INLINE void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_vector(B_ref, B, _size); - Interface::copy_vector(X_ref, X, _size); - } - - BTL_DONT_INLINE void calculate(void) { - BTL_ASM_COMMENT("#begin syr2"); - Interface::syr2(A, B, X, _size); - BTL_ASM_COMMENT("end syr2"); - } - - BTL_DONT_INLINE void check_result(void) { - // calculation check - Interface::vector_to_stl(X, resu_stl); - - STL_interface::syr2(A_stl, B_stl, X_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-3) { - INFOS("WRONG CALCULATION...residual=" << error); - // exit(0); - } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_vector B_stl; - typename Interface::stl_vector X_stl; - typename Interface::stl_vector resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_vector B_ref; - typename Interface::gene_vector X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_vector B; - typename Interface::gene_vector X; - - int _size; -}; - -#endif diff --git a/bench/btl/actions/action_trisolve.hh b/bench/btl/actions/action_trisolve.hh deleted file mode 100644 index 6751a2090..000000000 --- a/bench/btl/actions/action_trisolve.hh +++ /dev/null @@ -1,119 +0,0 @@ -//===================================================== -// File : action_trisolve.hh -// Copyright (C) 2008 Gael Guennebaud -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_TRISOLVE -#define ACTION_TRISOLVE -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_trisolve { - public: - // Ctor - - Action_trisolve(int size) : _size(size) { - MESSAGE("Action_trisolve Ctor"); - - // STL vector initialization - init_matrix(L_stl, _size); - init_vector(B_stl, _size); - init_vector(X_stl, _size); - for (int j = 0; j < _size; ++j) { - for (int i = 0; i < j; ++i) L_stl[j][i] = 0; - L_stl[j][j] += 3; - } - - init_vector(resu_stl, _size); - - // generic matrix and vector initialization - Interface::matrix_from_stl(L, L_stl); - Interface::vector_from_stl(X, X_stl); - Interface::vector_from_stl(B, B_stl); - - _cost = 0; - for (int j = 0; j < _size; ++j) { - _cost += 2 * j + 1; - } - } - - // invalidate copy ctor - - Action_trisolve(const Action_trisolve&) { - INFOS("illegal call to Action_trisolve Copy Ctor"); - exit(1); - } - - // Dtor - - ~Action_trisolve(void) { - MESSAGE("Action_trisolve Dtor"); - - // deallocation - Interface::free_matrix(L, _size); - Interface::free_vector(B); - Interface::free_vector(X); - } - - // action name - - static inline std::string name(void) { return "trisolve_vector_" + Interface::name(); } - - double nb_op_base(void) { return _cost; } - - inline void initialize(void) { - // Interface::copy_vector(X_ref,X,_size); - } - - inline void calculate(void) { Interface::trisolve_lower(L, B, X, _size); } - - void check_result() { - if (_size > 128) return; - // calculation check - Interface::vector_to_stl(X, resu_stl); - - STL_interface::trisolve_lower(L_stl, B_stl, X_stl, _size); - - typename Interface::real_type error = STL_interface::norm_diff(X_stl, resu_stl); - - if (error > 1.e-4) { - INFOS("WRONG CALCULATION...residual=" << error); - exit(2); - } // else INFOS("CALCULATION OK...residual=" << error); - } - - private: - typename Interface::stl_matrix L_stl; - typename Interface::stl_vector X_stl; - typename Interface::stl_vector B_stl; - typename Interface::stl_vector resu_stl; - - typename Interface::gene_matrix L; - typename Interface::gene_vector X; - typename Interface::gene_vector B; - - int _size; - double _cost; -}; - -#endif diff --git a/bench/btl/actions/action_trisolve_matrix.hh b/bench/btl/actions/action_trisolve_matrix.hh deleted file mode 100644 index cf55aae7c..000000000 --- a/bench/btl/actions/action_trisolve_matrix.hh +++ /dev/null @@ -1,139 +0,0 @@ -//===================================================== -// File : action_matrix_matrix_product.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_TRISOLVE_MATRIX_PRODUCT -#define ACTION_TRISOLVE_MATRIX_PRODUCT -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_trisolve_matrix { - public: - // Ctor - - Action_trisolve_matrix(int size) : _size(size) { - MESSAGE("Action_trisolve_matrix Ctor"); - - // STL matrix and vector initialization - - init_matrix(A_stl, _size); - init_matrix(B_stl, _size); - init_matrix(X_stl, _size); - init_matrix(resu_stl, _size); - - for (int j = 0; j < _size; ++j) { - for (int i = 0; i < j; ++i) A_stl[j][i] = 0; - A_stl[j][j] += 3; - } - - // generic matrix and vector initialization - - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(B_ref, B_stl); - Interface::matrix_from_stl(X_ref, X_stl); - - Interface::matrix_from_stl(A, A_stl); - Interface::matrix_from_stl(B, B_stl); - Interface::matrix_from_stl(X, X_stl); - - _cost = 0; - for (int j = 0; j < _size; ++j) { - _cost += 2 * j + 1; - } - _cost *= _size; - } - - // invalidate copy ctor - - Action_trisolve_matrix(const Action_trisolve_matrix&) { - INFOS("illegal call to Action_trisolve_matrix Copy Ctor"); - exit(0); - } - - // Dtor - - ~Action_trisolve_matrix(void) { - MESSAGE("Action_trisolve_matrix Dtor"); - - // deallocation - - Interface::free_matrix(A, _size); - Interface::free_matrix(B, _size); - Interface::free_matrix(X, _size); - - Interface::free_matrix(A_ref, _size); - Interface::free_matrix(B_ref, _size); - Interface::free_matrix(X_ref, _size); - } - - // action name - - static inline std::string name(void) { return "trisolve_matrix_" + Interface::name(); } - - double nb_op_base(void) { return _cost; } - - inline void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_matrix(B_ref, B, _size); - Interface::copy_matrix(X_ref, X, _size); - } - - inline void calculate(void) { Interface::trisolve_lower_matrix(A, B, X, _size); } - - void check_result(void) { - // calculation check - - // Interface::matrix_to_stl(X,resu_stl); - // - // STL_interface::matrix_matrix_product(A_stl,B_stl,X_stl,_size); - // - // typename Interface::real_type error= - // STL_interface::norm_diff(X_stl,resu_stl); - // - // if (error>1.e-6){ - // INFOS("WRONG CALCULATION...residual=" << error); - // // exit(1); - // } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_matrix B_stl; - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_matrix B_ref; - typename Interface::gene_matrix X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_matrix B; - typename Interface::gene_matrix X; - - int _size; - double _cost; -}; - -#endif diff --git a/bench/btl/actions/action_trmm.hh b/bench/btl/actions/action_trmm.hh deleted file mode 100644 index 8c0b25f1e..000000000 --- a/bench/btl/actions/action_trmm.hh +++ /dev/null @@ -1,139 +0,0 @@ -//===================================================== -// File : action_matrix_matrix_product.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_TRMM -#define ACTION_TRMM -#include "utilities.h" -#include "STL_interface.hh" -#include -#include "init/init_function.hh" -#include "init/init_vector.hh" -#include "init/init_matrix.hh" - -using namespace std; - -template -class Action_trmm { - public: - // Ctor - - Action_trmm(int size) : _size(size) { - MESSAGE("Action_trmm Ctor"); - - // STL matrix and vector initialization - - init_matrix(A_stl, _size); - init_matrix(B_stl, _size); - init_matrix(X_stl, _size); - init_matrix(resu_stl, _size); - - for (int j = 0; j < _size; ++j) { - for (int i = 0; i < j; ++i) A_stl[j][i] = 0; - A_stl[j][j] += 3; - } - - // generic matrix and vector initialization - - Interface::matrix_from_stl(A_ref, A_stl); - Interface::matrix_from_stl(B_ref, B_stl); - Interface::matrix_from_stl(X_ref, X_stl); - - Interface::matrix_from_stl(A, A_stl); - Interface::matrix_from_stl(B, B_stl); - Interface::matrix_from_stl(X, X_stl); - - _cost = 0; - for (int j = 0; j < _size; ++j) { - _cost += 2 * j + 1; - } - _cost *= _size; - } - - // invalidate copy ctor - - Action_trmm(const Action_trmm&) { - INFOS("illegal call to Action_trmm Copy Ctor"); - exit(0); - } - - // Dtor - - ~Action_trmm(void) { - MESSAGE("Action_trmm Dtor"); - - // deallocation - - Interface::free_matrix(A, _size); - Interface::free_matrix(B, _size); - Interface::free_matrix(X, _size); - - Interface::free_matrix(A_ref, _size); - Interface::free_matrix(B_ref, _size); - Interface::free_matrix(X_ref, _size); - } - - // action name - - static inline std::string name(void) { return "trmm_" + Interface::name(); } - - double nb_op_base(void) { return _cost; } - - inline void initialize(void) { - Interface::copy_matrix(A_ref, A, _size); - Interface::copy_matrix(B_ref, B, _size); - Interface::copy_matrix(X_ref, X, _size); - } - - inline void calculate(void) { Interface::trmm(A, B, X, _size); } - - void check_result(void) { - // calculation check - - // Interface::matrix_to_stl(X,resu_stl); - // - // STL_interface::matrix_matrix_product(A_stl,B_stl,X_stl,_size); - // - // typename Interface::real_type error= - // STL_interface::norm_diff(X_stl,resu_stl); - // - // if (error>1.e-6){ - // INFOS("WRONG CALCULATION...residual=" << error); - // // exit(1); - // } - } - - private: - typename Interface::stl_matrix A_stl; - typename Interface::stl_matrix B_stl; - typename Interface::stl_matrix X_stl; - typename Interface::stl_matrix resu_stl; - - typename Interface::gene_matrix A_ref; - typename Interface::gene_matrix B_ref; - typename Interface::gene_matrix X_ref; - - typename Interface::gene_matrix A; - typename Interface::gene_matrix B; - typename Interface::gene_matrix X; - - int _size; - double _cost; -}; - -#endif diff --git a/bench/btl/actions/basic_actions.hh b/bench/btl/actions/basic_actions.hh deleted file mode 100644 index e2e1f1c82..000000000 --- a/bench/btl/actions/basic_actions.hh +++ /dev/null @@ -1,20 +0,0 @@ - -#include "action_axpy.hh" -#include "action_axpby.hh" - -#include "action_matrix_vector_product.hh" -#include "action_atv_product.hh" - -#include "action_matrix_matrix_product.hh" -#include "action_ata_product.hh" -#include "action_aat_product.hh" - -#include "action_trisolve.hh" -#include "action_trmm.hh" -#include "action_symv.hh" -// #include "action_symm.hh" -#include "action_syr2.hh" -#include "action_ger.hh" -#include "action_rot.hh" - -// #include "action_lu_solve.hh" diff --git a/bench/btl/cmake/FindACML.cmake b/bench/btl/cmake/FindACML.cmake deleted file mode 100644 index daeeb535d..000000000 --- a/bench/btl/cmake/FindACML.cmake +++ /dev/null @@ -1,51 +0,0 @@ - -if (ACML_LIBRARIES) - set(ACML_FIND_QUIETLY TRUE) -endif () - -find_library(ACML_LIBRARIES - NAMES - acml_mp acml_mv - PATHS - $ENV{ACMLDIR}/lib - $ENV{ACML_DIR}/lib - ${LIB_INSTALL_DIR} -) - -find_file(ACML_LIBRARIES - NAMES - libacml_mp.so - PATHS - /usr/lib - /usr/lib64 - $ENV{ACMLDIR}/lib - ${LIB_INSTALL_DIR} -) - -if(NOT ACML_LIBRARIES) - message(STATUS "Multi-threaded library not found, looking for single-threaded") - find_library(ACML_LIBRARIES - NAMES - acml acml_mv - PATHS - $ENV{ACMLDIR}/lib - $ENV{ACML_DIR}/lib - ${LIB_INSTALL_DIR} - ) - find_file(ACML_LIBRARIES - libacml.so libacml_mv.so - PATHS - /usr/lib - /usr/lib64 - $ENV{ACMLDIR}/lib - ${LIB_INSTALL_DIR} - ) -endif() - - - - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ACML DEFAULT_MSG ACML_LIBRARIES) - -mark_as_advanced(ACML_LIBRARIES) diff --git a/bench/btl/cmake/FindATLAS.cmake b/bench/btl/cmake/FindATLAS.cmake deleted file mode 100644 index 572a4c0b2..000000000 --- a/bench/btl/cmake/FindATLAS.cmake +++ /dev/null @@ -1,31 +0,0 @@ - -if (ATLAS_LIBRARIES) - set(ATLAS_FIND_QUIETLY TRUE) -endif () - -find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_library(ATLAS_LIB satlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) - -find_file(ATLAS_LAPACK NAMES liblapack_atlas.so.3 liblapack.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_library(ATLAS_LAPACK NAMES lapack_atlas lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) - -find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) - -if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) - - set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_LIB}) - - # search the default lapack lib link to it - find_file(ATLAS_REFERENCE_LAPACK liblapack.so.3 PATHS /usr/lib /usr/lib64) - find_library(ATLAS_REFERENCE_LAPACK NAMES lapack) -# if(ATLAS_REFERENCE_LAPACK) -# set(ATLAS_LIBRARIES ${ATLAS_LIBRARIES} ${ATLAS_REFERENCE_LAPACK}) -# endif() - -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ATLAS DEFAULT_MSG ATLAS_LIBRARIES) - -mark_as_advanced(ATLAS_LIBRARIES) diff --git a/bench/btl/cmake/FindBLAZE.cmake b/bench/btl/cmake/FindBLAZE.cmake deleted file mode 100644 index 18a878ff9..000000000 --- a/bench/btl/cmake/FindBLAZE.cmake +++ /dev/null @@ -1,31 +0,0 @@ -# - Try to find eigen2 headers -# Once done this will define -# -# BLAZE_FOUND - system has blaze lib -# BLAZE_INCLUDE_DIR - the blaze include directory -# -# Copyright (C) 2008 Gael Guennebaud -# Adapted from FindEigen.cmake: -# Copyright (c) 2006, 2007 Montel Laurent, -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - -if (BLAZE_INCLUDE_DIR) - - # in cache already - set(BLAZE_FOUND TRUE) - -else () - -find_path(BLAZE_INCLUDE_DIR NAMES blaze/Blaze.h - PATHS - ${INCLUDE_INSTALL_DIR} - ) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(BLAZE DEFAULT_MSG BLAZE_INCLUDE_DIR) - -mark_as_advanced(BLAZE_INCLUDE_DIR) - -endif() - diff --git a/bench/btl/cmake/FindBlitz.cmake b/bench/btl/cmake/FindBlitz.cmake deleted file mode 100644 index 7ab375fd8..000000000 --- a/bench/btl/cmake/FindBlitz.cmake +++ /dev/null @@ -1,40 +0,0 @@ -# - Try to find blitz lib -# Once done this will define -# -# BLITZ_FOUND - system has blitz lib -# BLITZ_INCLUDES - the blitz include directory -# BLITZ_LIBRARIES - The libraries needed to use blitz - -# Copyright (c) 2006, Montel Laurent, -# Copyright (c) 2007, Allen Winter, -# Copyright (C) 2008 Gael Guennebaud -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - -# include(FindLibraryWithDebug) - -if (BLITZ_INCLUDES AND BLITZ_LIBRARIES) - set(Blitz_FIND_QUIETLY TRUE) -endif () - -find_path(BLITZ_INCLUDES - NAMES - blitz/array.h - PATH_SUFFIXES blitz* - PATHS - $ENV{BLITZDIR}/include - ${INCLUDE_INSTALL_DIR} -) - -find_library(BLITZ_LIBRARIES - blitz - PATHS - $ENV{BLITZDIR}/lib - ${LIB_INSTALL_DIR} -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Blitz DEFAULT_MSG - BLITZ_INCLUDES BLITZ_LIBRARIES) - -mark_as_advanced(BLITZ_INCLUDES BLITZ_LIBRARIES) diff --git a/bench/btl/cmake/FindCBLAS.cmake b/bench/btl/cmake/FindCBLAS.cmake deleted file mode 100644 index 43a90f7f6..000000000 --- a/bench/btl/cmake/FindCBLAS.cmake +++ /dev/null @@ -1,35 +0,0 @@ -# include(FindLibraryWithDebug) - -if (CBLAS_INCLUDES AND CBLAS_LIBRARIES) - set(CBLAS_FIND_QUIETLY TRUE) -endif () - -find_path(CBLAS_INCLUDES - NAMES - cblas.h - PATHS - $ENV{CBLASDIR}/include - ${INCLUDE_INSTALL_DIR} -) - -find_library(CBLAS_LIBRARIES - cblas - PATHS - $ENV{CBLASDIR}/lib - ${LIB_INSTALL_DIR} -) - -find_file(CBLAS_LIBRARIES - libcblas.so.3 - PATHS - /usr/lib - /usr/lib64 - $ENV{CBLASDIR}/lib - ${LIB_INSTALL_DIR} -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(CBLAS DEFAULT_MSG - CBLAS_INCLUDES CBLAS_LIBRARIES) - -mark_as_advanced(CBLAS_INCLUDES CBLAS_LIBRARIES) diff --git a/bench/btl/cmake/FindGMM.cmake b/bench/btl/cmake/FindGMM.cmake deleted file mode 100644 index ff45e6a0c..000000000 --- a/bench/btl/cmake/FindGMM.cmake +++ /dev/null @@ -1,17 +0,0 @@ -if (GMM_INCLUDE_DIR) - # in cache already - set(GMM_FOUND TRUE) -else () - -find_path(GMM_INCLUDE_DIR NAMES gmm/gmm.h - PATHS - ${INCLUDE_INSTALL_DIR} - ${GMM_INCLUDE_PATH} - ) - -include(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(GMM DEFAULT_MSG GMM_INCLUDE_DIR ) - -mark_as_advanced(GMM_INCLUDE_DIR) - -endif() diff --git a/bench/btl/cmake/FindMKL.cmake b/bench/btl/cmake/FindMKL.cmake deleted file mode 100644 index 23e77279a..000000000 --- a/bench/btl/cmake/FindMKL.cmake +++ /dev/null @@ -1,65 +0,0 @@ - -if (MKL_LIBRARIES) - set(MKL_FIND_QUIETLY TRUE) -endif () - -if(CMAKE_MINOR_VERSION GREATER 4) - -if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") - -find_library(MKL_LIBRARIES - mkl_core - PATHS - $ENV{MKLLIB} - /opt/intel/mkl/*/lib/em64t - /opt/intel/Compiler/*/*/mkl/lib/em64t - ${LIB_INSTALL_DIR} -) - -find_library(MKL_GUIDE - guide - PATHS - $ENV{MKLLIB} - /opt/intel/mkl/*/lib/em64t - /opt/intel/Compiler/*/*/mkl/lib/em64t - /opt/intel/Compiler/*/*/lib/intel64 - ${LIB_INSTALL_DIR} -) - -if(MKL_LIBRARIES AND MKL_GUIDE) - set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64 mkl_sequential ${MKL_GUIDE} pthread) -endif() - -else() - -find_library(MKL_LIBRARIES - mkl_core - PATHS - $ENV{MKLLIB} - /opt/intel/mkl/*/lib/32 - /opt/intel/Compiler/*/*/mkl/lib/32 - ${LIB_INSTALL_DIR} -) - -find_library(MKL_GUIDE - guide - PATHS - $ENV{MKLLIB} - /opt/intel/mkl/*/lib/32 - /opt/intel/Compiler/*/*/mkl/lib/32 - /opt/intel/Compiler/*/*/lib/intel32 - ${LIB_INSTALL_DIR} -) - -if(MKL_LIBRARIES AND MKL_GUIDE) - set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel mkl_sequential ${MKL_GUIDE} pthread) -endif() - -endif() - -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(MKL DEFAULT_MSG MKL_LIBRARIES) - -mark_as_advanced(MKL_LIBRARIES) diff --git a/bench/btl/cmake/FindMTL4.cmake b/bench/btl/cmake/FindMTL4.cmake deleted file mode 100644 index 1bafc93a6..000000000 --- a/bench/btl/cmake/FindMTL4.cmake +++ /dev/null @@ -1,31 +0,0 @@ -# - Try to find eigen2 headers -# Once done this will define -# -# MTL4_FOUND - system has eigen2 lib -# MTL4_INCLUDE_DIR - the eigen2 include directory -# -# Copyright (C) 2008 Gael Guennebaud -# Adapted from FindEigen.cmake: -# Copyright (c) 2006, 2007 Montel Laurent, -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - -if (MTL4_INCLUDE_DIR) - - # in cache already - set(MTL4_FOUND TRUE) - -else () - -find_path(MTL4_INCLUDE_DIR NAMES boost/numeric/mtl/mtl.hpp - PATHS - ${INCLUDE_INSTALL_DIR} - ) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(MTL4 DEFAULT_MSG MTL4_INCLUDE_DIR) - -mark_as_advanced(MTL4_INCLUDE_DIR) - -endif() - diff --git a/bench/btl/cmake/FindOPENBLAS.cmake b/bench/btl/cmake/FindOPENBLAS.cmake deleted file mode 100644 index 5c0762306..000000000 --- a/bench/btl/cmake/FindOPENBLAS.cmake +++ /dev/null @@ -1,17 +0,0 @@ - -if (OPENBLAS_LIBRARIES) - set(OPENBLAS_FIND_QUIETLY TRUE) -endif () - -find_file(OPENBLAS_LIBRARIES NAMES libopenblas.so libopenblas.so.0 PATHS /usr/lib /usr/lib64 $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) -find_library(OPENBLAS_LIBRARIES openblas PATHS $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) - -if(OPENBLAS_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX) - set(OPENBLAS_LIBRARIES ${OPENBLAS_LIBRARIES} "-lpthread -lgfortran") -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(OPENBLAS DEFAULT_MSG - OPENBLAS_LIBRARIES) - -mark_as_advanced(OPENBLAS_LIBRARIES) diff --git a/bench/btl/cmake/FindPackageHandleStandardArgs.cmake b/bench/btl/cmake/FindPackageHandleStandardArgs.cmake deleted file mode 100644 index 05d7e65bd..000000000 --- a/bench/btl/cmake/FindPackageHandleStandardArgs.cmake +++ /dev/null @@ -1,60 +0,0 @@ -# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... ) -# -# This macro is intended to be used in FindXXX.cmake modules files. -# It handles the REQUIRED and QUIET argument to find_package() and -# it also sets the _FOUND variable. -# The package is found if all variables listed are TRUE. -# Example: -# -# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR) -# -# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and -# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE. -# If it is not found and REQUIRED was used, it fails with FATAL_ERROR, -# independent whether QUIET was used or not. -# -# If it is found, the location is reported using the VAR1 argument, so -# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out. -# If the second argument is DEFAULT_MSG, the message in the failure case will -# be "Could NOT find LibXml2", if you don't like this message you can specify -# your own custom failure message there. - -macro(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 ) - - if("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - if (${_NAME}_FIND_REQUIRED) - set(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}") - else (${_NAME}_FIND_REQUIRED) - set(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}") - endif (${_NAME}_FIND_REQUIRED) - else("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - set(_FAIL_MESSAGE "${_FAIL_MSG}") - endif("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") - - string(TOUPPER ${_NAME} _NAME_UPPER) - - set(${_NAME_UPPER}_FOUND TRUE) - if(NOT ${_VAR1}) - set(${_NAME_UPPER}_FOUND FALSE) - endif(NOT ${_VAR1}) - - foreach(_CURRENT_VAR ${ARGN}) - if(NOT ${_CURRENT_VAR}) - set(${_NAME_UPPER}_FOUND FALSE) - endif(NOT ${_CURRENT_VAR}) - endforeach(_CURRENT_VAR) - - if (${_NAME_UPPER}_FOUND) - if (NOT ${_NAME}_FIND_QUIETLY) - message(STATUS "Found ${_NAME}: ${${_VAR1}}") - endif (NOT ${_NAME}_FIND_QUIETLY) - else (${_NAME_UPPER}_FOUND) - if (${_NAME}_FIND_REQUIRED) - message(FATAL_ERROR "${_FAIL_MESSAGE}") - else (${_NAME}_FIND_REQUIRED) - if (NOT ${_NAME}_FIND_QUIETLY) - message(STATUS "${_FAIL_MESSAGE}") - endif (NOT ${_NAME}_FIND_QUIETLY) - endif (${_NAME}_FIND_REQUIRED) - endif (${_NAME_UPPER}_FOUND) -endmacro(FIND_PACKAGE_HANDLE_STANDARD_ARGS) diff --git a/bench/btl/cmake/FindTvmet.cmake b/bench/btl/cmake/FindTvmet.cmake deleted file mode 100644 index 8ccae271b..000000000 --- a/bench/btl/cmake/FindTvmet.cmake +++ /dev/null @@ -1,32 +0,0 @@ -# - Try to find tvmet headers -# Once done this will define -# -# TVMET_FOUND - system has tvmet lib -# TVMET_INCLUDE_DIR - the tvmet include directory -# -# Copyright (C) 2008 Gael Guennebaud -# Adapted from FindEigen.cmake: -# Copyright (c) 2006, 2007 Montel Laurent, -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - -if (TVMET_INCLUDE_DIR) - - # in cache already - set(TVMET_FOUND TRUE) - -else () - -find_path(TVMET_INCLUDE_DIR NAMES tvmet/tvmet.h - PATHS - ${TVMETDIR}/ - ${INCLUDE_INSTALL_DIR} - ) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Tvmet DEFAULT_MSG TVMET_INCLUDE_DIR) - -mark_as_advanced(TVMET_INCLUDE_DIR) - -endif() - diff --git a/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake b/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake deleted file mode 100644 index 8d46fcea2..000000000 --- a/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake +++ /dev/null @@ -1,31 +0,0 @@ -# - MACRO_OPTIONAL_ADD_SUBDIRECTORY() combines add_subdirectory() with an option() -# MACRO_OPTIONAL_ADD_SUBDIRECTORY( ) -# If you use MACRO_OPTIONAL_ADD_SUBDIRECTORY() instead of add_subdirectory(), -# this will have two effects -# 1 - CMake will not complain if the directory doesn't exist -# This makes sense if you want to distribute just one of the subdirs -# in a source package, e.g. just one of the subdirs in kdeextragear. -# 2 - If the directory exists, it will offer an option to skip the -# subdirectory. -# This is useful if you want to compile only a subset of all -# directories. - -# Copyright (c) 2007, Alexander Neundorf, -# -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - - -macro (MACRO_OPTIONAL_ADD_SUBDIRECTORY _dir ) - get_filename_component(_fullPath ${_dir} ABSOLUTE) - if(EXISTS ${_fullPath}) - if(${ARGC} EQUAL 2) - option(BUILD_${_dir} "Build directory ${_dir}" ${ARGV1}) - else(${ARGC} EQUAL 2) - option(BUILD_${_dir} "Build directory ${_dir}" TRUE) - endif(${ARGC} EQUAL 2) - if(BUILD_${_dir}) - add_subdirectory(${_dir}) - endif(BUILD_${_dir}) - endif(EXISTS ${_fullPath}) -endmacro (MACRO_OPTIONAL_ADD_SUBDIRECTORY) diff --git a/bench/btl/data/CMakeLists.txt b/bench/btl/data/CMakeLists.txt deleted file mode 100644 index 580c1ced0..000000000 --- a/bench/btl/data/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ - -add_custom_target(copy_scripts) - -set(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh - perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh ) - -foreach(script_file ${script_files}) -add_custom_command( - TARGET copy_scripts - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${script_file} ${CMAKE_CURRENT_BINARY_DIR}/ - ARGS -) -endforeach(script_file) - -add_custom_command( - TARGET copy_scripts - POST_BUILD - COMMAND ${CMAKE_CXX_COMPILER} --version | head -n 1 > ${CMAKE_CURRENT_BINARY_DIR}/compiler_version.txt - ARGS -) -add_custom_command( - TARGET copy_scripts - POST_BUILD - COMMAND echo "${Eigen_SOURCE_DIR}" > ${CMAKE_CURRENT_BINARY_DIR}/eigen_root_dir.txt - ARGS -) - -add_executable(smooth smooth.cxx) -add_executable(regularize regularize.cxx) -add_executable(main mean.cxx) -add_dependencies(main copy_scripts) diff --git a/bench/btl/data/action_settings.txt b/bench/btl/data/action_settings.txt deleted file mode 100644 index 39d2b5dc4..000000000 --- a/bench/btl/data/action_settings.txt +++ /dev/null @@ -1,19 +0,0 @@ -aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:5000 -ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:5000 -atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:5000 -axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000 -axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000 -matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:5000 -matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:5000 -trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:5000 -trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:5000 -trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:5000 -cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:5000 -complete_lu_decomp ; "{/*1.5 Complete LU decomposition}" ; "matrix size" ; 4:5000 -partial_lu_decomp ; "{/*1.5 Partial LU decomposition}" ; "matrix size" ; 4:5000 -tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:5000 -hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:5000 -symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:5000 -syr2 ; "{/*1.5 symmretric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:5000 -ger ; "{/*1.5 general rank-1 update (A += u v^T)}" ; "matrix size" ; 4:5000 -rot ; "{/*1.5 apply rotation in the plane}" ; "vector size" ; 4:1000000 diff --git a/bench/btl/data/gnuplot_common_settings.hh b/bench/btl/data/gnuplot_common_settings.hh deleted file mode 100644 index 6f677df60..000000000 --- a/bench/btl/data/gnuplot_common_settings.hh +++ /dev/null @@ -1,87 +0,0 @@ -set noclip points -set clip one -set noclip two -set bar 1.000000 -set border 31 lt -1 lw 1.000 -set xdata -set ydata -set zdata -set x2data -set y2data -set boxwidth -set dummy x,y -set format x "%g" -set format y "%g" -set format x2 "%g" -set format y2 "%g" -set format z "%g" -set angles radians -set nogrid -set key title "" -set key left top Right noreverse box linetype -2 linewidth 1.000 samplen 4 spacing 1 width 0 -set nolabel -set noarrow -# set nolinestyle # deprecated -set nologscale -set logscale x 10 -set offsets 0, 0, 0, 0 -set pointsize 1 -set encoding default -set nopolar -set noparametric -set view 60, 30, 1, 1 -set samples 100, 100 -set isosamples 10, 10 -set surface -set nocontour -set clabel '%8.3g' -set mapping cartesian -set nohidden3d -set cntrparam order 4 -set cntrparam linear -set cntrparam levels auto 5 -set cntrparam points 5 -set size ratio 0 1,1 -set origin 0,0 -# set data style lines -# set function style lines -set xzeroaxis lt -2 lw 1.000 -set x2zeroaxis lt -2 lw 1.000 -set yzeroaxis lt -2 lw 1.000 -set y2zeroaxis lt -2 lw 1.000 -set tics in -set ticslevel 0.5 -set tics scale 1, 0.5 -set mxtics default -set mytics default -set mx2tics default -set my2tics default -set xtics border mirror norotate autofreq -set ytics border mirror norotate autofreq -set ztics border nomirror norotate autofreq -set nox2tics -set noy2tics -set timestamp "" bottom norotate offset 0,0 -set rrange [ * : * ] noreverse nowriteback # (currently [-0:10] ) -set trange [ * : * ] noreverse nowriteback # (currently [-5:5] ) -set urange [ * : * ] noreverse nowriteback # (currently [-5:5] ) -set vrange [ * : * ] noreverse nowriteback # (currently [-5:5] ) -set xlabel "matrix size" offset 0,0 -set x2label "" offset 0,0 -set timefmt "%d/%m/%y\n%H:%M" -set xrange [ 10 : 1000 ] noreverse nowriteback -set x2range [ * : * ] noreverse nowriteback # (currently [-10:10] ) -set ylabel "MFLOPS" offset 0,0 -set y2label "" offset 0,0 -set yrange [ * : * ] noreverse nowriteback # (currently [-10:10] ) -set y2range [ * : * ] noreverse nowriteback # (currently [-10:10] ) -set zlabel "" offset 0,0 -set zrange [ * : * ] noreverse nowriteback # (currently [-10:10] ) -set zero 1e-08 -set lmargin -1 -set bmargin -1 -set rmargin -1 -set tmargin -1 -set locale "C" -set xrange [4:1024] - diff --git a/bench/btl/data/go_mean b/bench/btl/data/go_mean deleted file mode 100755 index d01426909..000000000 --- a/bench/btl/data/go_mean +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -if [ $# < 1 ]; then - echo "Usage: $0 working_directory [tiny|large [prefix]]" -else - -mkdir -p $1 -##cp ../libs/*/*.dat $1 - -mode=large -if [ $# > 2 ]; then - mode=$2 -fi -if [ $# > 3 ]; then - prefix=$3 -fi - -EIGENDIR=`cat eigen_root_dir.txt` - -webpagefilename=$1/index.html -meanstatsfilename=$1/mean.html - -echo '' > $meanstatsfilename -echo '' > $webpagefilename -echo '

Configuration' >> $webpagefilename -echo '

    '\ - '
  • ' `cat /proc/cpuinfo | grep "model name" | head -n 1`\ - ' (' `uname -m` ')
  • '\ - '
  • compiler: ' `cat compiler_version.txt` '
  • '\ - '
  • eigen3: ' `git ls-remote --refs -q $EIGENDIR HEAD | cut -f 1` '
  • '\ - '
' \ - '

' >> $webpagefilename - -source mk_mean_script.sh axpy $1 11 2500 100000 250000 $mode $prefix -source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix -source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix -source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix -source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix -# source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh partial_lu_decomp $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix -source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix -source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix -source mk_mean_script.sh ger $1 11 50 300 1000 $mode $prefix -source mk_mean_script.sh rot $1 11 2500 100000 250000 $mode $prefix -source mk_mean_script.sh complete_lu_decomp $1 11 100 300 1000 $mode $prefix - -fi - -## compile the web page ## - -#echo `cat footer.html` >> $webpagefilename \ No newline at end of file diff --git a/bench/btl/data/mean.cxx b/bench/btl/data/mean.cxx deleted file mode 100644 index fe4b453cb..000000000 --- a/bench/btl/data/mean.cxx +++ /dev/null @@ -1,165 +0,0 @@ -//===================================================== -// File : mean.cxx -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#include "utilities.h" -#include -#include -#include -#include -#include "bench_parameter.hh" -#include "utils/xy_file.hh" -#include - -using namespace std; - -double mean_calc(const vector &tab_sizes, const vector &tab_mflops, const int size_min, - const int size_max); - -class Lib_Mean { - public: - Lib_Mean(void) : _lib_name(), _mean_in_cache(), _mean_out_of_cache() { - MESSAGE("Lib_mean Default Ctor"); - MESSAGE("!!! should not be used"); - exit(0); - } - Lib_Mean(const string &name, const double &mic, const double &moc) - : _lib_name(name), _mean_in_cache(mic), _mean_out_of_cache(moc) { - MESSAGE("Lib_mean Ctor"); - } - Lib_Mean(const Lib_Mean &lm) - : _lib_name(lm._lib_name), _mean_in_cache(lm._mean_in_cache), _mean_out_of_cache(lm._mean_out_of_cache) { - MESSAGE("Lib_mean Copy Ctor"); - } - ~Lib_Mean(void) { MESSAGE("Lib_mean Dtor"); } - - double _mean_in_cache; - double _mean_out_of_cache; - string _lib_name; - - bool operator<(const Lib_Mean &right) const { - // return ( this->_mean_out_of_cache > right._mean_out_of_cache) ; - return (this->_mean_in_cache > right._mean_in_cache); - } -}; - -int main(int argc, char *argv[]) { - if (argc < 6) { - INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2..."); - exit(0); - } - INFOS(argc); - - int min_in_cache = atoi(argv[2]); - int max_in_cache = atoi(argv[3]); - int min_out_of_cache = atoi(argv[4]); - int max_out_of_cache = atoi(argv[5]); - - multiset s_lib_mean; - - for (int i = 6; i < argc; i++) { - string filename = argv[i]; - - INFOS(filename); - - double mic = 0; - double moc = 0; - - { - vector tab_sizes; - vector tab_mflops; - - read_xy_file(filename, tab_sizes, tab_mflops); - - mic = mean_calc(tab_sizes, tab_mflops, min_in_cache, max_in_cache); - moc = mean_calc(tab_sizes, tab_mflops, min_out_of_cache, max_out_of_cache); - - Lib_Mean cur_lib_mean(filename, mic, moc); - - s_lib_mean.insert(cur_lib_mean); - } - } - - cout << "" << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " - << endl; - cout << " " << endl; - cout << " " - << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - - multiset::iterator is = s_lib_mean.begin(); - Lib_Mean best(*is); - - for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) { - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - cout << " " << endl; - } - - cout << "
" << argv[1] << " in cache
mean perf
Mflops
in cache
% best
out of cache
mean perf
Mflops
out of cache
% best
details comments
" << is->_lib_name << " " << is->_mean_in_cache << " " << 100 * (is->_mean_in_cache / best._mean_in_cache) << " " << is->_mean_out_of_cache << " " << 100 * (is->_mean_out_of_cache / best._mean_out_of_cache) << " " - << "_lib_name << "_" << argv[1] - << "\">snippet/" - "_lib_name << "_flags\">flags " - << "_lib_name << "_comments\">click here
" << endl; - - ofstream output_file("../order_lib", ios::out); - - for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) { - output_file << is->_lib_name << endl; - } - - output_file.close(); -} - -double mean_calc(const vector &tab_sizes, const vector &tab_mflops, const int size_min, - const int size_max) { - int size = tab_sizes.size(); - int nb_sample = 0; - double mean = 0.0; - - for (int i = 0; i < size; i++) { - if ((tab_sizes[i] >= size_min) && (tab_sizes[i] <= size_max)) { - nb_sample++; - mean += tab_mflops[i]; - } - } - - if (nb_sample == 0) { - INFOS("no data for mean calculation"); - return 0.0; - } - - return mean / nb_sample; -} diff --git a/bench/btl/data/mk_gnuplot_script.sh b/bench/btl/data/mk_gnuplot_script.sh deleted file mode 100755 index 2ca7b5cb5..000000000 --- a/bench/btl/data/mk_gnuplot_script.sh +++ /dev/null @@ -1,68 +0,0 @@ -#! /bin/bash -WHAT=$1 -DIR=$2 -echo $WHAT script generation -cat $WHAT.hh > $WHAT.gnuplot - -DATA_FILE=`find $DIR -name "*.dat" | grep $WHAT` - -echo plot \\ >> $WHAT.gnuplot - -for FILE in $DATA_FILE -do - LAST=$FILE -done - -echo LAST=$LAST - -for FILE in $DATA_FILE -do - if [ $FILE != $LAST ] - then - BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} - echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot - fi -done -BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} -echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot - -#echo set term postscript color >> $WHAT.gnuplot -#echo set output "'"$WHAT.ps"'" >> $WHAT.gnuplot -echo set term pbm small color >> $WHAT.gnuplot -echo set output "'"$WHAT.ppm"'" >> $WHAT.gnuplot -echo plot \\ >> $WHAT.gnuplot - -for FILE in $DATA_FILE -do - if [ $FILE != $LAST ] - then - BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} - echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot - fi -done -BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} -echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot - -echo set term jpeg large >> $WHAT.gnuplot -echo set output "'"$WHAT.jpg"'" >> $WHAT.gnuplot -echo plot \\ >> $WHAT.gnuplot - -for FILE in $DATA_FILE -do - if [ $FILE != $LAST ] - then - BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} - echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot - fi -done -BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} -echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot - - -gnuplot -persist < $WHAT.gnuplot - -rm $WHAT.gnuplot - - - - diff --git a/bench/btl/data/mk_mean_script.sh b/bench/btl/data/mk_mean_script.sh deleted file mode 100755 index b10df0240..000000000 --- a/bench/btl/data/mk_mean_script.sh +++ /dev/null @@ -1,52 +0,0 @@ -#! /bin/bash -WHAT=$1 -DIR=$2 -MINIC=$3 -MAXIC=$4 -MINOC=$5 -MAXOC=$6 -prefix=$8 - -meanstatsfilename=$2/mean.html - -WORK_DIR=tmp -mkdir $WORK_DIR - -DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}` - -if [ -n "$DATA_FILE" ]; then - - echo "" - echo "$1..." - for FILE in $DATA_FILE - do - ##echo hello world - ##echo "mk_mean_script1" ${FILE} - BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} - - ##echo "mk_mean_script1" ${TITLE} - cp $FILE ${WORK_DIR}/${TITLE} - - done - - cd $WORK_DIR - ../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename - ../mk_new_gnuplot.sh $1 $2 $7 - rm -f *.gnuplot - cd .. - - echo '
' >> $meanstatsfilename - - webpagefilename=$2/index.html - # echo '

'${WHAT}'

' >> $webpagefilename - echo '
'${WHAT}'
' >> $webpagefilename - -fi - -rm -R $WORK_DIR - - - - - - diff --git a/bench/btl/data/mk_new_gnuplot.sh b/bench/btl/data/mk_new_gnuplot.sh deleted file mode 100755 index fad3b23a4..000000000 --- a/bench/btl/data/mk_new_gnuplot.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -WHAT=$1 -DIR=$2 - -cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot - -echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot -echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` " offset 0,0" >> $WHAT.gnuplot -echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot - -if [ $# > 3 ]; then - if [ "$3" == "tiny" ]; then - echo "set xrange [2:16]" >> $WHAT.gnuplot - echo "set nologscale" >> $WHAT.gnuplot - fi -fi - - - -DATA_FILE=`cat ../order_lib` -echo set term postscript color rounded enhanced >> $WHAT.gnuplot -echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot - -# echo set term svg color rounded enhanced >> $WHAT.gnuplot -# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot -# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot - -echo plot \\ >> $WHAT.gnuplot - -for FILE in $DATA_FILE -do - LAST=$FILE -done - -for FILE in $DATA_FILE -do - BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} - - echo "'"$FILE"'" `grep $TITLE ../perlib_plot_settings.txt | head -n 1 | cut -d ";" -f 2` "\\" >> $WHAT.gnuplot - if [ $FILE != $LAST ] - then - echo ", \\" >> $WHAT.gnuplot - fi -done -echo " " >> $WHAT.gnuplot - -gnuplot -persist < $WHAT.gnuplot - -rm $WHAT.gnuplot - -ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf -convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 ../${DIR}/$WHAT.ps -background white -flatten ../${DIR}/$WHAT.png - -# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100 -f plot-svg aat.ps aat2.svg diff --git a/bench/btl/data/perlib_plot_settings.txt b/bench/btl/data/perlib_plot_settings.txt deleted file mode 100644 index f023cfe02..000000000 --- a/bench/btl/data/perlib_plot_settings.txt +++ /dev/null @@ -1,16 +0,0 @@ -eigen3 ; with lines lw 4 lt 1 lc rgbcolor "black" -eigen2 ; with lines lw 3 lt 1 lc rgbcolor "#999999" -EigenBLAS ; with lines lw 3 lt 3 lc rgbcolor "#999999" -eigen3_novec ; with lines lw 2 lt 1 lc rgbcolor "#999999" -eigen3_nogccvec ; with lines lw 2 lt 2 lc rgbcolor "#991010" -INTEL_MKL ; with lines lw 3 lt 1 lc rgbcolor "#ff0000" -ATLAS ; with lines lw 3 lt 1 lc rgbcolor "#008000" -gmm ; with lines lw 3 lt 1 lc rgbcolor "#0000ff" -ublas ; with lines lw 3 lt 1 lc rgbcolor "#00b7ff" -mtl4 ; with lines lw 3 lt 1 lc rgbcolor "#d18847" -blitz ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff" -F77 ; with lines lw 3 lt 3 lc rgbcolor "#e6e64c" -OPENBLAS ; with lines lw 3 lt 1 lc rgbcolor "#C05600" -C ; with lines lw 3 lt 3 lc rgbcolor "#e6bd96" -ACML ; with lines lw 2 lt 3 lc rgbcolor "#e6e64c" -blaze ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff" diff --git a/bench/btl/data/regularize.cxx b/bench/btl/data/regularize.cxx deleted file mode 100644 index 51e2edf19..000000000 --- a/bench/btl/data/regularize.cxx +++ /dev/null @@ -1,113 +0,0 @@ -//===================================================== -// File : regularize.cxx -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#include "utilities.h" -#include -#include -#include -#include -#include "bench_parameter.hh" -#include - -using namespace std; - -void read_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops); -void regularize_curve(const string &filename, const vector &tab_mflops, const vector &tab_sizes, - int start_cut_size, int stop_cut_size); -///////////////////////////////////////////////////////////////////////////////////////////////// - -int main(int argc, char *argv[]) { - // input data - - if (argc < 4) { - INFOS("!!! Error ... usage : main filename start_cut_size stop_cut_size regularize_filename"); - exit(0); - } - INFOS(argc); - - int start_cut_size = atoi(argv[2]); - int stop_cut_size = atoi(argv[3]); - - string filename = argv[1]; - string regularize_filename = argv[4]; - - INFOS(filename); - INFOS("start_cut_size=" << start_cut_size); - - vector tab_sizes; - vector tab_mflops; - - read_xy_file(filename, tab_sizes, tab_mflops); - - // regularizeing - - regularize_curve(regularize_filename, tab_mflops, tab_sizes, start_cut_size, stop_cut_size); -} - -////////////////////////////////////////////////////////////////////////////////////// - -void regularize_curve(const string &filename, const vector &tab_mflops, const vector &tab_sizes, - int start_cut_size, int stop_cut_size) { - int size = tab_mflops.size(); - ofstream output_file(filename.c_str(), ios::out); - - int i = 0; - - while (tab_sizes[i] < start_cut_size) { - output_file << tab_sizes[i] << " " << tab_mflops[i] << endl; - i++; - } - - output_file << endl; - - while (tab_sizes[i] < stop_cut_size) { - i++; - } - - while (i < size) { - output_file << tab_sizes[i] << " " << tab_mflops[i] << endl; - i++; - } - - output_file.close(); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void read_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops) { - ifstream input_file(filename.c_str(), ios::in); - - if (!input_file) { - INFOS("!!! Error opening " << filename); - exit(0); - } - - int nb_point = 0; - int size = 0; - double mflops = 0; - - while (input_file >> size >> mflops) { - nb_point++; - tab_sizes.push_back(size); - tab_mflops.push_back(mflops); - } - SCRUTE(nb_point); - - input_file.close(); -} diff --git a/bench/btl/data/smooth.cxx b/bench/btl/data/smooth.cxx deleted file mode 100644 index b0a2d960a..000000000 --- a/bench/btl/data/smooth.cxx +++ /dev/null @@ -1,165 +0,0 @@ -//===================================================== -// File : smooth.cxx -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#include "utilities.h" -#include -#include -#include -#include -#include -#include "bench_parameter.hh" -#include - -using namespace std; - -void read_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops); -void write_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops); -void smooth_curve(const vector &tab_mflops, vector &smooth_tab_mflops, int window_half_width); -void centered_smooth_curve(const vector &tab_mflops, vector &smooth_tab_mflops, int window_half_width); - -///////////////////////////////////////////////////////////////////////////////////////////////// - -int main(int argc, char *argv[]) { - // input data - - if (argc < 3) { - INFOS("!!! Error ... usage : main filename window_half_width smooth_filename"); - exit(0); - } - INFOS(argc); - - int window_half_width = atoi(argv[2]); - - string filename = argv[1]; - string smooth_filename = argv[3]; - - INFOS(filename); - INFOS("window_half_width=" << window_half_width); - - vector tab_sizes; - vector tab_mflops; - - read_xy_file(filename, tab_sizes, tab_mflops); - - // smoothing - - vector smooth_tab_mflops; - - // smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width); - centered_smooth_curve(tab_mflops, smooth_tab_mflops, window_half_width); - - // output result - - write_xy_file(smooth_filename, tab_sizes, smooth_tab_mflops); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -template -double weighted_mean(const VECTOR &data) { - double mean = 0.0; - - for (int i = 0; i < data.size(); i++) { - mean += data[i]; - } - - return mean / double(data.size()); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void smooth_curve(const vector &tab_mflops, vector &smooth_tab_mflops, int window_half_width) { - int window_width = 2 * window_half_width + 1; - - int size = tab_mflops.size(); - - vector sample(window_width); - - for (int i = 0; i < size; i++) { - for (int j = 0; j < window_width; j++) { - int shifted_index = i + j - window_half_width; - if (shifted_index < 0) shifted_index = 0; - if (shifted_index > size - 1) shifted_index = size - 1; - sample[j] = tab_mflops[shifted_index]; - } - - smooth_tab_mflops.push_back(weighted_mean(sample)); - } -} - -void centered_smooth_curve(const vector &tab_mflops, vector &smooth_tab_mflops, int window_half_width) { - int max_window_width = 2 * window_half_width + 1; - - int size = tab_mflops.size(); - - for (int i = 0; i < size; i++) { - deque sample; - - sample.push_back(tab_mflops[i]); - - for (int j = 1; j <= window_half_width; j++) { - int before = i - j; - int after = i + j; - - if ((before >= 0) && (after < size)) // inside of the vector - { - sample.push_front(tab_mflops[before]); - sample.push_back(tab_mflops[after]); - } - } - - smooth_tab_mflops.push_back(weighted_mean(sample)); - } -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void write_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops) { - ofstream output_file(filename.c_str(), ios::out); - - for (int i = 0; i < tab_sizes.size(); i++) { - output_file << tab_sizes[i] << " " << tab_mflops[i] << endl; - } - - output_file.close(); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void read_xy_file(const string &filename, vector &tab_sizes, vector &tab_mflops) { - ifstream input_file(filename.c_str(), ios::in); - - if (!input_file) { - INFOS("!!! Error opening " << filename); - exit(0); - } - - int nb_point = 0; - int size = 0; - double mflops = 0; - - while (input_file >> size >> mflops) { - nb_point++; - tab_sizes.push_back(size); - tab_mflops.push_back(mflops); - } - SCRUTE(nb_point); - - input_file.close(); -} diff --git a/bench/btl/data/smooth_all.sh b/bench/btl/data/smooth_all.sh deleted file mode 100755 index 3e5bfdf47..000000000 --- a/bench/btl/data/smooth_all.sh +++ /dev/null @@ -1,68 +0,0 @@ -#! /bin/bash -ORIG_DIR=$1 -SMOOTH_DIR=${ORIG_DIR}_smooth -mkdir ${SMOOTH_DIR} - -AXPY_FILE=`find ${ORIG_DIR} -name "*.dat" | grep axpy` -for FILE in ${AXPY_FILE} -do - echo $FILE - BASE=${FILE##*/} - ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp - ./regularize ${SMOOTH_DIR}/${BASE}_tmp 2500 15000 ${SMOOTH_DIR}/${BASE} - rm -f ${SMOOTH_DIR}/${BASE}_tmp -done - - -MATRIX_VECTOR_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_vector` -for FILE in ${MATRIX_VECTOR_FILE} -do - echo $FILE - BASE=${FILE##*/} - ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp - ./regularize ${SMOOTH_DIR}/${BASE}_tmp 50 180 ${SMOOTH_DIR}/${BASE} - rm -f ${SMOOTH_DIR}/${BASE}_tmp -done - -MATRIX_MATRIX_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_matrix` -for FILE in ${MATRIX_MATRIX_FILE} -do - echo $FILE - BASE=${FILE##*/} - ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE} -done - -AAT_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _aat` -for FILE in ${AAT_FILE} -do - echo $FILE - BASE=${FILE##*/} - ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE} -done - - -ATA_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _ata` -for FILE in ${ATA_FILE} -do - echo $FILE - BASE=${FILE##*/} - ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE} -done - -### no smoothing for tinyvector and matrices libs - -TINY_BLITZ_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tiny_blitz` -for FILE in ${TINY_BLITZ_FILE} -do - echo $FILE - BASE=${FILE##*/} - cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE} -done - -TVMET_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tvmet` -for FILE in ${TVMET_FILE} -do - echo $FILE - BASE=${FILE##*/} - cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE} -done diff --git a/bench/btl/generic_bench/bench.hh b/bench/btl/generic_bench/bench.hh deleted file mode 100644 index cb3c359e3..000000000 --- a/bench/btl/generic_bench/bench.hh +++ /dev/null @@ -1,149 +0,0 @@ -//===================================================== -// File : bench.hh -// Author : L. Plagne -// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef BENCH_HH -#define BENCH_HH - -#include "btl.hh" -#include "bench_parameter.hh" -#include -#include "utilities.h" -#include "size_lin_log.hh" -#include "xy_file.hh" -#include -#include -#include "timers/portable_perf_analyzer.hh" -// #include "timers/mixed_perf_analyzer.hh" -// #include "timers/x86_perf_analyzer.hh" -// #include "timers/STL_perf_analyzer.hh" -#ifdef HAVE_MKL -extern "C" void cblas_saxpy(const int, const float, const float *, const int, float *, const int); -#endif -using namespace std; - -template