Remove obsolete bench/ and btl/ directories

libeigen/eigen!2217 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
2026-04-10 11:34:33 +08:00 · 2026-02-25 20:19:45 -08:00
parent 6e2aff6b5d
commit a95440de17
220 changed files with 9 additions and 21383 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,9 +74,6 @@ if (EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK)
  endif()
 endif()

-option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)
-option(EIGEN_BUILD_SPBENCH "Build sparse benchmark suite" OFF)
-option(EIGEN_BUILD_AOCL_BENCH "Build AOCL benchmark" OFF)
 # Avoid building docs if included from another project.
 # Building documentation requires creating and running executables on the host
 # platform.  We shouldn't do this if cross-compiling.
@@ -93,7 +90,7 @@ if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
 endif()
 option(EIGEN_BUILD_CMAKE_PACKAGE "Enables the creation of EigenConfig.cmake and related files" ${PROJECT_IS_TOP_LEVEL})

-if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILT_BTL OR EIGEN_BUILD_BTL OR EIGEN_BUILD_SPBENCH OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS)
+if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS)
  set(EIGEN_IS_BUILDING_ ON)
 endif()

@@ -764,66 +761,6 @@ if(EIGEN_BUILD_DOC)
  add_subdirectory(doc EXCLUDE_FROM_ALL)
 endif()

-# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"?
-if(EIGEN_BUILD_BTL)
-  add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
-endif()
-
-if(NOT WIN32 AND EIGEN_BUILD_SPBENCH)
-  add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
-endif()
-#--------------------------------------------------------------------------------------#
-#                      AOCL BENCHMARK BUILD SECTION                                    #
-#--------------------------------------------------------------------------------------#
-if(EIGEN_BUILD_AOCL_BENCH)
-  # Allow users to override the default architecture
-  set(EIGEN_AOCL_BENCH_ARCH "znver5" CACHE STRING "Target architecture for AOCL benchmark")
-  add_executable(benchmark_aocl EXCLUDE_FROM_ALL bench/benchmark_aocl.cpp)
-  include(CheckCXXCompilerFlag)
-  check_cxx_compiler_flag("-march=${EIGEN_AOCL_BENCH_ARCH}" COMPILER_SUPPORTS_AOCL_ARCH)
-  if(COMPILER_SUPPORTS_AOCL_ARCH)
-    target_compile_options(benchmark_aocl PRIVATE -O3 -Wno-shadow -march=${EIGEN_AOCL_BENCH_ARCH})
-  else()
-    message(WARNING "${EIGEN_AOCL_BENCH_ARCH} architecture not supported by compiler")
-    target_compile_options(benchmark_aocl PRIVATE -O3)
-  endif()
-
-  # Add custom flags if provided
-  if(EIGEN_AOCL_BENCH_FLAGS)
-    separate_arguments(CUSTOM_FLAGS NATIVE_COMMAND "${EIGEN_AOCL_BENCH_FLAGS}")
-    target_compile_options(benchmark_aocl PRIVATE ${CUSTOM_FLAGS})
-    # Check if OpenMP is requested in custom flags and link it
-    string(FIND "${EIGEN_AOCL_BENCH_FLAGS}" "-fopenmp" OPENMP_REQUESTED)
-    if(NOT OPENMP_REQUESTED EQUAL -1)
-      find_package(OpenMP)
-      if(OpenMP_CXX_FOUND)
-        target_link_libraries(benchmark_aocl OpenMP::OpenMP_CXX)
-      else()
-        # Generic fallback: let compiler handle OpenMP linking
-        if(MSVC)
-          target_compile_options(benchmark_aocl PRIVATE "/openmp")
-        else()
-          target_compile_options(benchmark_aocl PRIVATE "-fopenmp")
-          target_link_options(benchmark_aocl PRIVATE "-fopenmp")
-        endif()
-        message(STATUS "Using compiler OpenMP flags as fallback")
-      endif()
-    endif()
-  endif()
-
-  target_include_directories(benchmark_aocl PRIVATE ${INCLUDE_INSTALL_DIR})
-  if(EIGEN_AOCL_BENCH_USE_MT)
-    target_compile_definitions(benchmark_aocl PRIVATE EIGEN_USE_AOCL_MT)
-  else()
-    target_compile_definitions(benchmark_aocl PRIVATE EIGEN_USE_AOCL_ALL)
-  endif()
-  target_link_libraries(benchmark_aocl Eigen3::Eigen)
-  if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
-    target_link_libraries(benchmark_aocl ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
-  endif()
-endif()
-#----------------------------------------------------------------------------------------#
-
 if (EIGEN_BUILD_DEMOS)
  add_subdirectory(demos EXCLUDE_FROM_ALL)
 endif()
@@ -872,9 +809,6 @@ if(PROJECT_IS_TOP_LEVEL)
  if (EIGEN_BUILD_LAPACK)
    message(STATUS "lapack      | Build LAPACK subset library (not the same thing as Eigen)")
  endif()
-  if(EIGEN_BUILD_AOCL_BENCH)
-    message(STATUS "benchmark_aocl | Build AOCL benchmark executable")
-  endif()
  message(STATUS "------------+--------------------------------------------------------------")
  message(STATUS "")
 endif()
--- a/COPYING.README
+++ b/COPYING.README
@@ -6,6 +6,6 @@ Some files contain third-party code under BSD, LGPL, Apache, or other
 MPL2-compatible licenses, hence the other COPYING.* files here.

 Note that some optional external dependencies (e.g. FFTW, MPFR C++)
-and some bundled benchmark code (bench/btl/) are distributed under
-different licenses, including the GPL. Refer to the individual source
-files and their respective COPYING files for details.
+are distributed under different licenses, including the GPL. Refer to
+the individual source files and their respective COPYING files for
+details.
--- a/bench/BenchSparseUtil.h
+++ b/bench/BenchSparseUtil.h
@@ -1,129 +0,0 @@
-
-#include <Eigen/Sparse>
-#include <bench/BenchTimer.h>
-#include <set>
-
-using namespace std;
-using namespace Eigen;
-using namespace Eigen;
-
-#ifndef SIZE
-#define SIZE 1024
-#endif
-
-#ifndef DENSITY
-#define DENSITY 0.01
-#endif
-
-#ifndef SCALAR
-#define SCALAR double
-#endif
-
-typedef SCALAR Scalar;
-typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
-typedef Matrix<Scalar, Dynamic, 1> DenseVector;
-typedef SparseMatrix<Scalar> EigenSparseMatrix;
-
-void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst) {
-  dst.reserve(double(rows) * cols * density);
-  for (int j = 0; j < cols; j++) {
-    for (int i = 0; i < rows; i++) {
-      Scalar v = (internal::random<float>(0, 1) < density) ? internal::random<Scalar>() : 0;
-      if (v != 0) dst.insert(i, j) = v;
-    }
-  }
-  dst.finalize();
-}
-
-void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst) {
-  //   std::cout << "alloc " << nnzPerCol*cols << "\n";
-  dst.reserve(nnzPerCol * cols);
-  for (int j = 0; j < cols; j++) {
-    std::set<int> aux;
-    for (int i = 0; i < nnzPerCol; i++) {
-      int k = internal::random<int>(0, rows - 1);
-      while (aux.find(k) != aux.end()) k = internal::random<int>(0, rows - 1);
-      aux.insert(k);
-
-      dst.insert(k, j) = internal::random<Scalar>();
-    }
-  }
-  dst.finalize();
-}
-
-void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst) {
-  dst.setZero();
-  for (int j = 0; j < src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
-}
-
-#ifndef NOGMM
-#include "gmm/gmm.h"
-typedef gmm::csc_matrix<Scalar> GmmSparse;
-typedef gmm::col_matrix<gmm::wsvector<Scalar> > GmmDynSparse;
-void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst) {
-  GmmDynSparse tmp(src.rows(), src.cols());
-  for (int j = 0; j < src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) tmp(it.index(), j) = it.value();
-  gmm::copy(tmp, dst);
-}
-#endif
-
-#ifndef NOMTL
-#include <boost/numeric/mtl/mtl.hpp>
-typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::col_major> > MtlSparse;
-typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::row_major> > MtlSparseRowMajor;
-void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst) {
-  mtl::matrix::inserter<MtlSparse> ins(dst);
-  for (int j = 0; j < src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) ins[it.index()][j] = it.value();
-}
-#endif
-
-#ifdef CSPARSE
-extern "C" {
-#include "cs.h"
-}
-void eiToCSparse(const EigenSparseMatrix& src, cs*& dst) {
-  cs* aux = cs_spalloc(0, 0, 1, 1, 1);
-  for (int j = 0; j < src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
-      if (!cs_entry(aux, it.index(), j, it.value())) {
-        std::cout << "cs_entry error\n";
-        exit(2);
-      }
-  dst = cs_compress(aux);
-  //    cs_spfree(aux);
-}
-#endif  // CSPARSE
-
-#ifndef NOUBLAS
-#include <boost/numeric/ublas/vector.hpp>
-#include <boost/numeric/ublas/matrix.hpp>
-#include <boost/numeric/ublas/io.hpp>
-#include <boost/numeric/ublas/triangular.hpp>
-#include <boost/numeric/ublas/vector_sparse.hpp>
-#include <boost/numeric/ublas/matrix_sparse.hpp>
-#include <boost/numeric/ublas/vector_of_vector.hpp>
-#include <boost/numeric/ublas/operation.hpp>
-
-typedef boost::numeric::ublas::compressed_matrix<Scalar, boost::numeric::ublas::column_major> UBlasSparse;
-
-void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst) {
-  dst.resize(src.rows(), src.cols(), false);
-  for (int j = 0; j < src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
-}
-
-template <typename EigenType, typename UblasType>
-void eiToUblasVec(const EigenType& src, UblasType& dst) {
-  dst.resize(src.size());
-  for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
-}
-#endif
-
-#ifdef OSKI
-extern "C" {
-#include <oski/oski.h>
-}
-#endif
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -1,176 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_BENCH_TIMERR_H
-#define EIGEN_BENCH_TIMERR_H
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-#ifndef NOMINMAX
-#define NOMINMAX
-#define EIGEN_BT_UNDEF_NOMINMAX
-#endif
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
-#endif
-#include <windows.h>
-#elif defined(__APPLE__)
-#include <mach/mach_time.h>
-#else
-#include <unistd.h>
-#endif
-
-static void escape(void *p) {
-#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
-  asm volatile("" : : "g"(p) : "memory");
-#endif
-}
-
-static void clobber() {
-#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
-  asm volatile("" : : : "memory");
-#endif
-}
-
-#include <Eigen/Core>
-
-namespace Eigen {
-
-enum { CPU_TIMER = 0, REAL_TIMER = 1 };
-
-/** Elapsed time timer keeping the best try.
- *
- * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
- * On Windows we use QueryPerformanceCounter
- *
- * Important: on linux, you must link with -lrt
- */
-class BenchTimer {
- public:
-  BenchTimer() {
-#if defined(_WIN32) || defined(__CYGWIN__)
-    LARGE_INTEGER freq;
-    QueryPerformanceFrequency(&freq);
-    m_frequency = (double)freq.QuadPart;
-#endif
-    reset();
-  }
-
-  ~BenchTimer() {}
-
-  inline void reset() {
-    m_bests.fill(1e9);
-    m_worsts.fill(0);
-    m_totals.setZero();
-  }
-  inline void start() {
-    m_starts[CPU_TIMER] = getCpuTime();
-    m_starts[REAL_TIMER] = getRealTime();
-  }
-  inline void stop() {
-    m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER];
-    m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER];
-#if EIGEN_VERSION_AT_LEAST(2, 90, 0)
-    m_bests = m_bests.cwiseMin(m_times);
-    m_worsts = m_worsts.cwiseMax(m_times);
-#else
-    m_bests(0) = std::min(m_bests(0), m_times(0));
-    m_bests(1) = std::min(m_bests(1), m_times(1));
-    m_worsts(0) = std::max(m_worsts(0), m_times(0));
-    m_worsts(1) = std::max(m_worsts(1), m_times(1));
-#endif
-    m_totals += m_times;
-  }
-
-  /** Return the elapsed time in seconds between the last start/stop pair
-   */
-  inline double value(int TIMER = CPU_TIMER) const { return m_times[TIMER]; }
-
-  /** Return the best elapsed time in seconds
-   */
-  inline double best(int TIMER = CPU_TIMER) const { return m_bests[TIMER]; }
-
-  /** Return the worst elapsed time in seconds
-   */
-  inline double worst(int TIMER = CPU_TIMER) const { return m_worsts[TIMER]; }
-
-  /** Return the total elapsed time in seconds.
-   */
-  inline double total(int TIMER = CPU_TIMER) const { return m_totals[TIMER]; }
-
-  inline double getCpuTime() const {
-#ifdef _WIN32
-    LARGE_INTEGER query_ticks;
-    QueryPerformanceCounter(&query_ticks);
-    return query_ticks.QuadPart / m_frequency;
-#elif __APPLE__
-    return double(mach_absolute_time()) * 1e-9;
-#else
-    timespec ts;
-    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
-    return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
-#endif
-  }
-
-  inline double getRealTime() const {
-#ifdef _WIN32
-    SYSTEMTIME st;
-    GetSystemTime(&st);
-    return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
-#elif __APPLE__
-    return double(mach_absolute_time()) * 1e-9;
-#else
-    timespec ts;
-    clock_gettime(CLOCK_REALTIME, &ts);
-    return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
-#endif
-  }
-
- protected:
-#if defined(_WIN32) || defined(__CYGWIN__)
-  double m_frequency;
-#endif
-  Vector2d m_starts;
-  Vector2d m_times;
-  Vector2d m_bests;
-  Vector2d m_worsts;
-  Vector2d m_totals;
-
- public:
-  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
-};
-
-#define BENCH(TIMER, TRIES, REP, CODE)                                 \
-  {                                                                    \
-    TIMER.reset();                                                     \
-    for (int bench_tries_ = 0; bench_tries_ < TRIES; ++bench_tries_) { \
-      TIMER.start();                                                   \
-      for (int bench_reps_ = 0; bench_reps_ < REP; ++bench_reps_) {    \
-        CODE;                                                          \
-      }                                                                \
-      TIMER.stop();                                                    \
-      clobber();                                                       \
-    }                                                                  \
-  }
-
-}  // namespace Eigen
-
-// clean #defined tokens
-#ifdef EIGEN_BT_UNDEF_NOMINMAX
-#undef EIGEN_BT_UNDEF_NOMINMAX
-#undef NOMINMAX
-#endif
-
-#ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
-#undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
-#undef WIN32_LEAN_AND_MEAN
-#endif
-
-#endif  // EIGEN_BENCH_TIMERR_H
--- a/bench/BenchUtil.h
+++ b/bench/BenchUtil.h
@@ -1,86 +0,0 @@
-
-#ifndef EIGEN_BENCH_UTIL_H
-#define EIGEN_BENCH_UTIL_H
-
-#include <Eigen/Core>
-#include "BenchTimer.h"
-
-using namespace std;
-using namespace Eigen;
-
-#include <boost/preprocessor/repetition/enum_params.hpp>
-#include <boost/preprocessor/repetition.hpp>
-#include <boost/preprocessor/seq.hpp>
-#include <boost/preprocessor/array.hpp>
-#include <boost/preprocessor/arithmetic.hpp>
-#include <boost/preprocessor/comparison.hpp>
-#include <boost/preprocessor/punctuation.hpp>
-#include <boost/preprocessor/punctuation/comma.hpp>
-#include <boost/preprocessor/stringize.hpp>
-
-template <typename MatrixType>
-void initMatrix_random(MatrixType& mat) __attribute__((noinline));
-template <typename MatrixType>
-void initMatrix_random(MatrixType& mat) {
-  mat.setRandom();  // = MatrixType::random(mat.rows(), mat.cols());
-}
-
-template <typename MatrixType>
-void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
-template <typename MatrixType>
-void initMatrix_identity(MatrixType& mat) {
-  mat.setIdentity();
-}
-
-#ifndef __INTEL_COMPILER
-#define DISABLE_SSE_EXCEPTIONS()          \
-  {                                       \
-    int aux;                              \
-    asm("stmxcsr   %[aux]           \n\t" \
-        "orl       $32832, %[aux]   \n\t" \
-        "ldmxcsr   %[aux]           \n\t" \
-        :                                 \
-        : [aux] "m"(aux));                \
-  }
-#else
-#define DISABLE_SSE_EXCEPTIONS()
-#endif
-
-#ifdef BENCH_GMM
-#include <gmm/gmm.h>
-template <typename EigenMatrixType, typename GmmMatrixType>
-void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst) {
-  dst.resize(src.rows(), src.cols());
-  for (int j = 0; j < src.cols(); ++j)
-    for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
-}
-#endif
-
-#ifdef BENCH_GSL
-#include <gsl/gsl_matrix.h>
-#include <gsl/gsl_linalg.h>
-#include <gsl/gsl_eigen.h>
-template <typename EigenMatrixType>
-void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst) {
-  for (int j = 0; j < src.cols(); ++j)
-    for (int i = 0; i < src.rows(); ++i) gsl_matrix_set(*dst, i, j, src.coeff(i, j));
-}
-#endif
-
-#ifdef BENCH_UBLAS
-#include <boost/numeric/ublas/matrix.hpp>
-#include <boost/numeric/ublas/vector.hpp>
-template <typename EigenMatrixType, typename UblasMatrixType>
-void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst) {
-  dst.resize(src.rows(), src.cols());
-  for (int j = 0; j < src.cols(); ++j)
-    for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
-}
-template <typename EigenType, typename UblasType>
-void eiToUblasVec(const EigenType& src, UblasType& dst) {
-  dst.resize(src.size());
-  for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
-}
-#endif
-
-#endif  // EIGEN_BENCH_UTIL_H
--- a/bench/README.txt
+++ b/bench/README.txt
@@ -1,108 +0,0 @@
-
-This folder contains a couple of benchmark utities and Eigen benchmarks.
-
-****************************
-* bench_multi_compilers.sh *
-****************************
-
-This script allows to run a benchmark on a set of different compilers/compiler options.
-It takes two arguments:
- - a file defining the list of the compilers with their options
- - the .cpp file of the benchmark
-
-Examples:
-
-$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp
-
-    g++-4.1 -O3 -DNDEBUG -finline-limit=10000
-    3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   /
-    0.271102   0.131416   0.422322   0.198633
-    0.201658   0.102436   0.397566   0.207282
-
-    g++-4.2 -O3 -DNDEBUG -finline-limit=10000
-    3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   /
-    0.107805   0.0890579   0.30265   0.161843
-    0.127157   0.0712581   0.278341   0.191029
-
-    g++-4.3 -O3 -DNDEBUG -finline-limit=10000
-    3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   /
-    0.134318   0.105291   0.3704   0.180966
-    0.137703   0.0732472   0.31225   0.202204
-
-    icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size
-    3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   /
-    0.226145   0.0941319   0.371873   0.159433
-    0.109302   0.0837538   0.328102   0.173891
-
-
-$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp
-
-    g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp
-    double, fixed-size 4x4: 0.00165105s  0.0778739s
-    double, 32x32: 0.0654769s 0.075289s  => x0.869674 (2)
-    double, 128x128: 0.054148s 0.0419669s  => x1.29025 (2)
-    double, 512x512: 0.913799s 0.428533s  => x2.13239 (2)
-    double, 1024x1024: 14.5972s 9.3542s  => x1.5605 (2)
-
-    icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp
-    double, fixed-size 4x4: 0.000589848s  0.019949s
-    double, 32x32: 0.0682781s 0.0449722s  => x1.51823 (2)
-    double, 128x128: 0.0547509s 0.0435519s  => x1.25714 (2)
-    double, 512x512: 0.829436s 0.424438s  => x1.9542 (2)
-    double, 1024x1024: 14.5243s 10.7735s  => x1.34815 (2)
-
-
-
-************************
-* benchmark_aocl       *
-************************
-
-This benchmark exercises Eigen operations using AMD Optimized Libraries
-(AOCL). It is disabled by default and can be enabled when configuring the
-build:
-
-  cmake .. -DEIGEN_BUILD_AOCL_BENCH=ON
-
-The resulting `benchmark_aocl` target is compiled with `-O3` and, if the
-compiler supports it, `-march=znver5` for optimal performance on AMD
-processors.
-
-The benchmark also links to `libblis-mt.so` and `libflame.so` so BLAS and
-LAPACK operations run with multithreaded AOCL when available.
-
-By default the CMake build defines `EIGEN_USE_AOCL_MT` via the option
-`EIGEN_AOCL_BENCH_USE_MT` (enabled).  Set this option to `OFF` if you want
-to build the benchmark using the single-threaded AOCL libraries instead,
-in which case `EIGEN_USE_AOCL_ALL` is defined.
-
-
-
-Alternatively you can build the same benchmark using the
-`Makefile` in this directory. This allows experimenting with
-different compiler flags without reconfiguring CMake:
-
-```
-cd bench && make       # builds with -O3 -march=znver5 by default
-make clean && make CXX="clang++" ## For different compiler apart from g++
-make clean && make MARCH="" CXXFLAGS="-O2"  # example of custom flags
-make AOCL_ROOT=/opt/aocl            # use AOCL from a custom location
-
-This Makefile links against `libblis-mt.so` and `libflame.so` so the
-matrix multiplication benchmark exercises multithreaded BLIS when
-`EIGEN_USE_AOCL_MT` is defined (enabled by default in the Makefile).
-
-If you prefer to compile manually, ensure that the Eigen include path
-points to the directory where `AOCL_Support.h` resides. For example:
-
-
-clang++ -O3 -std=c++14 -I../build/install/include \
-        -march=znver5 -DEIGEN_USE_AOCL_MT \
-        benchmark_aocl.cpp -o benchmark_aocl \
-        -lblis-mt -lflame -lamdlibm -lpthread -lm
-```
-Replace `../install/include` with your actual Eigen install path.
-
-When invoking `make`, you can point `AOCL_ROOT` to your AOCL
-installation directory so the Makefile links against `$(AOCL_ROOT)/lib`.
-
-
--- a/bench/analyze-blocking-sizes.cpp
+++ b/bench/analyze-blocking-sizes.cpp
@@ -1,772 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include <iostream>
-#include <cstdint>
-#include <cstdlib>
-#include <vector>
-#include <algorithm>
-#include <fstream>
-#include <string>
-#include <cmath>
-#include <cassert>
-#include <cstring>
-#include <memory>
-
-#include <Eigen/Core>
-
-using namespace std;
-
-const int default_precision = 4;
-
-// see --only-cubic-sizes
-bool only_cubic_sizes = false;
-
-// see --dump-tables
-bool dump_tables = false;
-
-uint8_t log2_pot(size_t x) {
-  size_t l = 0;
-  while (x >>= 1) l++;
-  return l;
-}
-
-uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
-  return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
-}
-
-// just a helper to store a triple of K,M,N sizes for matrix product
-struct size_triple_t {
-  uint16_t k, m, n;
-  size_triple_t() : k(0), m(0), n(0) {}
-  size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
-  size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
-  size_triple_t(uint16_t compact) {
-    k = 1 << ((compact & 0xf00) >> 8);
-    m = 1 << ((compact & 0x0f0) >> 4);
-    n = 1 << ((compact & 0x00f) >> 0);
-  }
-  bool is_cubic() const { return k == m && m == n; }
-};
-
-ostream& operator<<(ostream& s, const size_triple_t& t) { return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; }
-
-struct inputfile_entry_t {
-  uint16_t product_size;
-  uint16_t pot_block_size;
-  size_triple_t nonpot_block_size;
-  float gflops;
-};
-
-struct inputfile_t {
-  enum class type_t { unknown, all_pot_sizes, default_sizes };
-
-  string filename;
-  vector<inputfile_entry_t> entries;
-  type_t type;
-
-  inputfile_t(const string& fname) : filename(fname), type(type_t::unknown) {
-    ifstream stream(filename);
-    if (!stream.is_open()) {
-      cerr << "couldn't open input file: " << filename << endl;
-      exit(1);
-    }
-    string line;
-    while (getline(stream, line)) {
-      if (line.empty()) continue;
-      if (line.find("BEGIN MEASUREMENTS ALL POT SIZES") == 0) {
-        if (type != type_t::unknown) {
-          cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
-          exit(1);
-        }
-        type = type_t::all_pot_sizes;
-        continue;
-      }
-      if (line.find("BEGIN MEASUREMENTS DEFAULT SIZES") == 0) {
-        if (type != type_t::unknown) {
-          cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
-          exit(1);
-        }
-        type = type_t::default_sizes;
-        continue;
-      }
-
-      if (type == type_t::unknown) {
-        continue;
-      }
-      switch (type) {
-        case type_t::all_pot_sizes: {
-          unsigned int product_size, block_size;
-          float gflops;
-          int sscanf_result = sscanf(line.c_str(), "%x %x %f", &product_size, &block_size, &gflops);
-          if (3 != sscanf_result || !product_size || product_size > 0xfff || !block_size || block_size > 0xfff ||
-              !isfinite(gflops)) {
-            cerr << "ill-formed input file: " << filename << endl;
-            cerr << "offending line:" << endl << line << endl;
-            exit(1);
-          }
-          if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
-            continue;
-          }
-          inputfile_entry_t entry;
-          entry.product_size = uint16_t(product_size);
-          entry.pot_block_size = uint16_t(block_size);
-          entry.gflops = gflops;
-          entries.push_back(entry);
-          break;
-        }
-        case type_t::default_sizes: {
-          unsigned int product_size;
-          float gflops;
-          int bk, bm, bn;
-          int sscanf_result = sscanf(line.c_str(), "%x default(%d, %d, %d) %f", &product_size, &bk, &bm, &bn, &gflops);
-          if (5 != sscanf_result || !product_size || product_size > 0xfff || !isfinite(gflops)) {
-            cerr << "ill-formed input file: " << filename << endl;
-            cerr << "offending line:" << endl << line << endl;
-            exit(1);
-          }
-          if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
-            continue;
-          }
-          inputfile_entry_t entry;
-          entry.product_size = uint16_t(product_size);
-          entry.pot_block_size = 0;
-          entry.nonpot_block_size = size_triple_t(bk, bm, bn);
-          entry.gflops = gflops;
-          entries.push_back(entry);
-          break;
-        }
-
-        default:
-          break;
-      }
-    }
-    stream.close();
-    if (type == type_t::unknown) {
-      cerr << "Unrecognized input file " << filename << endl;
-      exit(1);
-    }
-    if (entries.empty()) {
-      cerr << "didn't find any measurements in input file: " << filename << endl;
-      exit(1);
-    }
-  }
-};
-
-struct preprocessed_inputfile_entry_t {
-  uint16_t product_size;
-  uint16_t block_size;
-
-  float efficiency;
-};
-
-bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) {
-  return e1.efficiency < e2.efficiency;
-}
-
-struct preprocessed_inputfile_t {
-  string filename;
-  vector<preprocessed_inputfile_entry_t> entries;
-
-  preprocessed_inputfile_t(const inputfile_t& inputfile) : filename(inputfile.filename) {
-    if (inputfile.type != inputfile_t::type_t::all_pot_sizes) {
-      abort();
-    }
-    auto it = inputfile.entries.begin();
-    auto it_first_with_given_product_size = it;
-    while (it != inputfile.entries.end()) {
-      ++it;
-      if (it == inputfile.entries.end() || it->product_size != it_first_with_given_product_size->product_size) {
-        import_input_file_range_one_product_size(it_first_with_given_product_size, it);
-        it_first_with_given_product_size = it;
-      }
-    }
-  }
-
- private:
-  void import_input_file_range_one_product_size(const vector<inputfile_entry_t>::const_iterator& begin,
-                                                const vector<inputfile_entry_t>::const_iterator& end) {
-    uint16_t product_size = begin->product_size;
-    float max_gflops = 0.0f;
-    for (auto it = begin; it != end; ++it) {
-      if (it->product_size != product_size) {
-        cerr << "Unexpected ordering of entries in " << filename << endl;
-        cerr << "(Expected all entries for product size " << hex << product_size << dec << " to be grouped)" << endl;
-        exit(1);
-      }
-      max_gflops = max(max_gflops, it->gflops);
-    }
-    for (auto it = begin; it != end; ++it) {
-      preprocessed_inputfile_entry_t entry;
-      entry.product_size = it->product_size;
-      entry.block_size = it->pot_block_size;
-      entry.efficiency = it->gflops / max_gflops;
-      entries.push_back(entry);
-    }
-  }
-};
-
-void check_all_files_in_same_exact_order(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) {
-  if (preprocessed_inputfiles.empty()) {
-    return;
-  }
-
-  const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[0];
-  const size_t num_entries = first_file.entries.size();
-
-  for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
-    if (preprocessed_inputfiles[i].entries.size() != num_entries) {
-      cerr << "these files have different number of entries: " << preprocessed_inputfiles[i].filename << " and "
-           << first_file.filename << endl;
-      exit(1);
-    }
-  }
-
-  for (size_t entry_index = 0; entry_index < num_entries; entry_index++) {
-    const uint16_t entry_product_size = first_file.entries[entry_index].product_size;
-    const uint16_t entry_block_size = first_file.entries[entry_index].block_size;
-    for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) {
-      const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index];
-      if (cur_file.entries[entry_index].product_size != entry_product_size ||
-          cur_file.entries[entry_index].block_size != entry_block_size) {
-        cerr << "entries not in same order between these files: " << first_file.filename << " and " << cur_file.filename
-             << endl;
-        exit(1);
-      }
-    }
-  }
-}
-
-float efficiency_of_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-                           const vector<size_t>& subset) {
-  if (subset.size() <= 1) {
-    return 1.0f;
-  }
-  const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
-  const size_t num_entries = first_file.entries.size();
-  float efficiency = 1.0f;
-  size_t entry_index = 0;
-  size_t first_entry_index_with_this_product_size = 0;
-  uint16_t product_size = first_file.entries[0].product_size;
-  while (entry_index < num_entries) {
-    ++entry_index;
-    if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
-      float efficiency_this_product_size = 0.0f;
-      for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
-        float efficiency_this_entry = 1.0f;
-        for (auto i = subset.begin(); i != subset.end(); ++i) {
-          efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
-        }
-        efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry);
-      }
-      efficiency = min(efficiency, efficiency_this_product_size);
-      if (entry_index < num_entries) {
-        first_entry_index_with_this_product_size = entry_index;
-        product_size = first_file.entries[entry_index].product_size;
-      }
-    }
-  }
-
-  return efficiency;
-}
-
-void dump_table_for_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-                           const vector<size_t>& subset) {
-  const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
-  const size_t num_entries = first_file.entries.size();
-  size_t entry_index = 0;
-  size_t first_entry_index_with_this_product_size = 0;
-  uint16_t product_size = first_file.entries[0].product_size;
-  size_t i = 0;
-  size_triple_t min_product_size(first_file.entries.front().product_size);
-  size_triple_t max_product_size(first_file.entries.back().product_size);
-  if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) {
-    abort();
-  }
-  if (only_cubic_sizes) {
-    cerr << "Can't generate tables with --only-cubic-sizes." << endl;
-    abort();
-  }
-  cout << "struct LookupTable {" << endl;
-  cout << "  static const size_t BaseSize = " << min_product_size.k << ";" << endl;
-  const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1;
-  const size_t TableSize = NumSizes * NumSizes * NumSizes;
-  cout << "  static const size_t NumSizes = " << NumSizes << ";" << endl;
-  cout << "  static const unsigned short* Data() {" << endl;
-  cout << "    static const unsigned short data[" << TableSize << "] = {";
-  while (entry_index < num_entries) {
-    ++entry_index;
-    if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
-      float best_efficiency_this_product_size = 0.0f;
-      uint16_t best_block_size_this_product_size = 0;
-      for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
-        float efficiency_this_entry = 1.0f;
-        for (auto i = subset.begin(); i != subset.end(); ++i) {
-          efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
-        }
-        if (efficiency_this_entry > best_efficiency_this_product_size) {
-          best_efficiency_this_product_size = efficiency_this_entry;
-          best_block_size_this_product_size = first_file.entries[e].block_size;
-        }
-      }
-      if ((i++) % NumSizes) {
-        cout << " ";
-      } else {
-        cout << endl << "      ";
-      }
-      cout << "0x" << hex << best_block_size_this_product_size << dec;
-      if (entry_index < num_entries) {
-        cout << ",";
-        first_entry_index_with_this_product_size = entry_index;
-        product_size = first_file.entries[entry_index].product_size;
-      }
-    }
-  }
-  if (i != TableSize) {
-    cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl;
-    abort();
-  }
-  cout << endl << "    };" << endl;
-  cout << "    return data;" << endl;
-  cout << "  }" << endl;
-  cout << "};" << endl;
-}
-
-float efficiency_of_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-                              const vector<vector<size_t>>& partition) {
-  float efficiency = 1.0f;
-  for (auto s = partition.begin(); s != partition.end(); ++s) {
-    efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s));
-  }
-  return efficiency;
-}
-
-void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size) {
-  assert(subset_size >= 1 && subset_size <= set_size);
-  out_subset.resize(subset_size);
-  for (size_t i = 0; i < subset_size; i++) {
-    out_subset[i] = i;
-  }
-}
-
-bool is_last_subset(const vector<size_t>& subset, size_t set_size) { return subset[0] == set_size - subset.size(); }
-
-void next_subset(vector<size_t>& inout_subset, size_t set_size) {
-  if (is_last_subset(inout_subset, set_size)) {
-    cerr << "iterating past the last subset" << endl;
-    abort();
-  }
-  size_t i = 1;
-  while (inout_subset[inout_subset.size() - i] == set_size - i) {
-    i++;
-    assert(i <= inout_subset.size());
-  }
-  size_t first_index_to_change = inout_subset.size() - i;
-  inout_subset[first_index_to_change]++;
-  size_t p = inout_subset[first_index_to_change];
-  for (size_t j = first_index_to_change + 1; j < inout_subset.size(); j++) {
-    inout_subset[j] = ++p;
-  }
-}
-
-const size_t number_of_subsets_limit = 100;
-const size_t always_search_subsets_of_size_at_least = 2;
-
-bool is_number_of_subsets_feasible(size_t n, size_t p) {
-  assert(n > 0 && p > 0 && p <= n);
-  uint64_t numerator = 1, denominator = 1;
-  for (size_t i = 0; i < p; i++) {
-    numerator *= n - i;
-    denominator *= i + 1;
-    if (numerator > denominator * number_of_subsets_limit) {
-      return false;
-    }
-  }
-  return true;
-}
-
-size_t max_feasible_subset_size(size_t n) {
-  assert(n > 0);
-  const size_t minresult = min<size_t>(n - 1, always_search_subsets_of_size_at_least);
-  for (size_t p = 1; p <= n - 1; p++) {
-    if (!is_number_of_subsets_feasible(n, p + 1)) {
-      return max(p, minresult);
-    }
-  }
-  return n - 1;
-}
-
-void find_subset_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-                                             float required_efficiency_to_beat, vector<size_t>& inout_remainder,
-                                             vector<size_t>& out_subset) {
-  out_subset.resize(0);
-
-  if (required_efficiency_to_beat >= 1.0f) {
-    cerr << "can't beat efficiency 1." << endl;
-    abort();
-  }
-
-  while (!inout_remainder.empty()) {
-    vector<size_t> candidate_indices(inout_remainder.size());
-    for (size_t i = 0; i < candidate_indices.size(); i++) {
-      candidate_indices[i] = i;
-    }
-
-    size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size());
-    while (candidate_indices_subset_size >= 1) {
-      vector<size_t> candidate_indices_subset;
-      make_first_subset(candidate_indices_subset_size, candidate_indices_subset, candidate_indices.size());
-
-      vector<size_t> best_candidate_indices_subset;
-      float best_efficiency = 0.0f;
-      vector<size_t> trial_subset = out_subset;
-      trial_subset.resize(out_subset.size() + candidate_indices_subset_size);
-      while (true) {
-        for (size_t i = 0; i < candidate_indices_subset_size; i++) {
-          trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]];
-        }
-
-        float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
-        if (trial_efficiency > best_efficiency) {
-          best_efficiency = trial_efficiency;
-          best_candidate_indices_subset = candidate_indices_subset;
-        }
-        if (is_last_subset(candidate_indices_subset, candidate_indices.size())) {
-          break;
-        }
-        next_subset(candidate_indices_subset, candidate_indices.size());
-      }
-
-      if (best_efficiency > required_efficiency_to_beat) {
-        for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) {
-          candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]];
-        }
-        candidate_indices.resize(best_candidate_indices_subset.size());
-      }
-      candidate_indices_subset_size--;
-    }
-
-    size_t candidate_index = candidate_indices[0];
-    auto candidate_iterator = inout_remainder.begin() + candidate_index;
-    vector<size_t> trial_subset = out_subset;
-
-    trial_subset.push_back(*candidate_iterator);
-    float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
-    if (trial_efficiency > required_efficiency_to_beat) {
-      out_subset.push_back(*candidate_iterator);
-      inout_remainder.erase(candidate_iterator);
-    } else {
-      break;
-    }
-  }
-}
-
-void find_partition_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-                                                float required_efficiency_to_beat,
-                                                vector<vector<size_t>>& out_partition) {
-  out_partition.resize(0);
-
-  vector<size_t> remainder;
-  for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
-    remainder.push_back(i);
-  }
-
-  while (!remainder.empty()) {
-    vector<size_t> new_subset;
-    find_subset_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, remainder,
-                                            new_subset);
-    out_partition.push_back(new_subset);
-  }
-}
-
-void print_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-                     const vector<vector<size_t>>& partition) {
-  float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
-  cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl;
-  for (auto subset = partition.begin(); subset != partition.end(); ++subset) {
-    cout << "  Subset " << (subset - partition.begin()) << ", efficiency "
-         << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" << endl;
-    for (auto file = subset->begin(); file != subset->end(); ++file) {
-      cout << "    " << preprocessed_inputfiles[*file].filename << endl;
-    }
-    if (dump_tables) {
-      cout << "  Table:" << endl;
-      dump_table_for_subset(preprocessed_inputfiles, *subset);
-    }
-  }
-  cout << endl;
-}
-
-struct action_t {
-  virtual const char* invokation_name() const {
-    abort();
-    return nullptr;
-  }
-  virtual void run(const vector<string>&) const { abort(); }
-  virtual ~action_t() {}
-};
-
-struct partition_action_t : action_t {
-  virtual const char* invokation_name() const override { return "partition"; }
-  virtual void run(const vector<string>& input_filenames) const override {
-    vector<preprocessed_inputfile_t> preprocessed_inputfiles;
-
-    if (input_filenames.empty()) {
-      cerr << "The " << invokation_name() << " action needs a list of input files." << endl;
-      exit(1);
-    }
-
-    for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) {
-      inputfile_t inputfile(*it);
-      switch (inputfile.type) {
-        case inputfile_t::type_t::all_pot_sizes:
-          preprocessed_inputfiles.emplace_back(inputfile);
-          break;
-        case inputfile_t::type_t::default_sizes:
-          cerr << "The " << invokation_name() << " action only uses measurements for all pot sizes, and "
-               << "has no use for " << *it << " which contains measurements for default sizes." << endl;
-          exit(1);
-          break;
-        default:
-          cerr << "Unrecognized input file: " << *it << endl;
-          exit(1);
-      }
-    }
-
-    check_all_files_in_same_exact_order(preprocessed_inputfiles);
-
-    float required_efficiency_to_beat = 0.0f;
-    vector<vector<vector<size_t>>> partitions;
-    cerr << "searching for partitions...\r" << flush;
-    while (true) {
-      vector<vector<size_t>> partition;
-      find_partition_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, partition);
-      float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
-      cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() << " subsets for "
-           << 100.0f * actual_efficiency << " % efficiency"
-           << "                  \r" << flush;
-      partitions.push_back(partition);
-      if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
-        break;
-      }
-      required_efficiency_to_beat = actual_efficiency;
-    }
-    cerr << "                                                                  " << endl;
-    while (true) {
-      bool repeat = false;
-      for (size_t i = 0; i < partitions.size() - 1; i++) {
-        if (partitions[i].size() >= partitions[i + 1].size()) {
-          partitions.erase(partitions.begin() + i);
-          repeat = true;
-          break;
-        }
-      }
-      if (!repeat) {
-        break;
-      }
-    }
-    for (auto it = partitions.begin(); it != partitions.end(); ++it) {
-      print_partition(preprocessed_inputfiles, *it);
-    }
-  }
-};
-
-struct evaluate_defaults_action_t : action_t {
-  struct results_entry_t {
-    uint16_t product_size;
-    size_triple_t default_block_size;
-    uint16_t best_pot_block_size;
-    float default_gflops;
-    float best_pot_gflops;
-    float default_efficiency;
-  };
-  friend ostream& operator<<(ostream& s, const results_entry_t& entry) {
-    return s << "Product size " << size_triple_t(entry.product_size) << ": default block size "
-             << entry.default_block_size << " -> " << entry.default_gflops
-             << " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
-             << " of best POT block size " << size_triple_t(entry.best_pot_block_size) << " -> "
-             << entry.best_pot_gflops << " GFlop/s" << dec;
-  }
-  static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
-    return e1.default_efficiency < e2.default_efficiency;
-  }
-  virtual const char* invokation_name() const override { return "evaluate-defaults"; }
-  void show_usage_and_exit() const {
-    cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
-    cerr << "checks how well the performance with default sizes compares to the best "
-         << "performance measured over all POT sizes." << endl;
-    exit(1);
-  }
-  virtual void run(const vector<string>& input_filenames) const override {
-    if (input_filenames.size() != 2) {
-      show_usage_and_exit();
-    }
-    inputfile_t inputfile_default_sizes(input_filenames[0]);
-    inputfile_t inputfile_all_pot_sizes(input_filenames[1]);
-    if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) {
-      cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl;
-      show_usage_and_exit();
-    }
-    if (inputfile_all_pot_sizes.type != inputfile_t::type_t::all_pot_sizes) {
-      cerr << inputfile_all_pot_sizes.filename << " is not an input file with all POT sizes." << endl;
-      show_usage_and_exit();
-    }
-    vector<results_entry_t> results;
-    vector<results_entry_t> cubic_results;
-
-    uint16_t product_size = 0;
-    auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin();
-    for (auto it_default_sizes = inputfile_default_sizes.entries.begin();
-         it_default_sizes != inputfile_default_sizes.entries.end(); ++it_default_sizes) {
-      if (it_default_sizes->product_size == product_size) {
-        continue;
-      }
-      product_size = it_default_sizes->product_size;
-      while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() &&
-             it_all_pot_sizes->product_size != product_size) {
-        ++it_all_pot_sizes;
-      }
-      if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) {
-        break;
-      }
-      uint16_t best_pot_block_size = 0;
-      float best_pot_gflops = 0;
-      for (auto it = it_all_pot_sizes; it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
-           ++it) {
-        if (it->gflops > best_pot_gflops) {
-          best_pot_gflops = it->gflops;
-          best_pot_block_size = it->pot_block_size;
-        }
-      }
-      results_entry_t entry;
-      entry.product_size = product_size;
-      entry.default_block_size = it_default_sizes->nonpot_block_size;
-      entry.best_pot_block_size = best_pot_block_size;
-      entry.default_gflops = it_default_sizes->gflops;
-      entry.best_pot_gflops = best_pot_gflops;
-      entry.default_efficiency = entry.default_gflops / entry.best_pot_gflops;
-      results.push_back(entry);
-
-      size_triple_t t(product_size);
-      if (t.k == t.m && t.m == t.n) {
-        cubic_results.push_back(entry);
-      }
-    }
-
-    cout << "All results:" << endl;
-    for (auto it = results.begin(); it != results.end(); ++it) {
-      cout << *it << endl;
-    }
-    cout << endl;
-
-    sort(results.begin(), results.end(), lower_efficiency);
-
-    const size_t n = min<size_t>(20, results.size());
-    cout << n << " worst results:" << endl;
-    for (size_t i = 0; i < n; i++) {
-      cout << results[i] << endl;
-    }
-    cout << endl;
-
-    cout << "cubic results:" << endl;
-    for (auto it = cubic_results.begin(); it != cubic_results.end(); ++it) {
-      cout << *it << endl;
-    }
-    cout << endl;
-
-    sort(cubic_results.begin(), cubic_results.end(), lower_efficiency);
-
-    cout.precision(2);
-    vector<float> a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f};
-    for (auto it = a.begin(); it != a.end(); ++it) {
-      size_t n = min(results.size() - 1, size_t(*it * results.size()));
-      cout << (100.0f * n / (results.size() - 1))
-           << " % of product sizes have default efficiency <= " << 100.0f * results[n].default_efficiency << " %"
-           << endl;
-    }
-    cout.precision(default_precision);
-  }
-};
-
-void show_usage_and_exit(int argc, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
-  cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
-  cerr << "available actions:" << endl;
-  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
-    cerr << "  " << (*it)->invokation_name() << endl;
-  }
-  cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
-  exit(1);
-}
-
-int main(int argc, char* argv[]) {
-  cout.precision(default_precision);
-  cerr.precision(default_precision);
-
-  vector<unique_ptr<action_t>> available_actions;
-  available_actions.emplace_back(new partition_action_t);
-  available_actions.emplace_back(new evaluate_defaults_action_t);
-
-  vector<string> input_filenames;
-
-  action_t* action = nullptr;
-
-  if (argc < 2) {
-    show_usage_and_exit(argc, argv, available_actions);
-  }
-  for (int i = 1; i < argc; i++) {
-    bool arg_handled = false;
-    // Step 1. Try to match action invocation names.
-    for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
-      if (!strcmp(argv[i], (*it)->invokation_name())) {
-        if (!action) {
-          action = it->get();
-          arg_handled = true;
-          break;
-        } else {
-          cerr << "can't specify more than one action!" << endl;
-          show_usage_and_exit(argc, argv, available_actions);
-        }
-      }
-    }
-    if (arg_handled) {
-      continue;
-    }
-    // Step 2. Try to match option names.
-    if (argv[i][0] == '-') {
-      if (!strcmp(argv[i], "--only-cubic-sizes")) {
-        only_cubic_sizes = true;
-        arg_handled = true;
-      }
-      if (!strcmp(argv[i], "--dump-tables")) {
-        dump_tables = true;
-        arg_handled = true;
-      }
-      if (!arg_handled) {
-        cerr << "Unrecognized option: " << argv[i] << endl;
-        show_usage_and_exit(argc, argv, available_actions);
-      }
-    }
-    if (arg_handled) {
-      continue;
-    }
-    // Step 3. Default to interpreting args as input filenames.
-    input_filenames.emplace_back(argv[i]);
-  }
-
-  if (dump_tables && only_cubic_sizes) {
-    cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl;
-    show_usage_and_exit(argc, argv, available_actions);
-  }
-
-  if (!action) {
-    show_usage_and_exit(argc, argv, available_actions);
-  }
-
-  action->run(input_filenames);
-}
--- a/bench/basicbench.cxxlist
+++ b/bench/basicbench.cxxlist
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG"
-# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG -finline-limit=20000"
-
-# CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG"
-#CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000"
-
-# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG"
-#CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000"
-# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate"
-# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use"
-
-# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG"
-#CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000"
-# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate"
-# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use"
-
-# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-genx"
-# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-use"
-
-#CLIST[((g++))]="/opt/intel/Compiler/11.1/072/bin/intel64/icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -lrt"
-CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
-CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -lrt"
-CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
-CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -lrt"
-CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
-CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -lrt"
--- a/bench/basicbenchmark.cpp
+++ b/bench/basicbenchmark.cpp
@@ -1,34 +0,0 @@
-
-#include <iostream>
-#include "BenchUtil.h"
-#include "basicbenchmark.h"
-
-int main(int argc, char *argv[]) {
-  DISABLE_SSE_EXCEPTIONS();
-
-// this is the list of matrix type and size we want to bench:
-// ((suffix) (matrix size) (number of iterations))
-#define MODES ((3d)(3)(4000000))((4d)(4)(1000000))((Xd)(4)(1000000))((Xd)(20)(10000))
-  //   #define MODES ((Xd)(20)(10000))
-
-#define _GENERATE_HEADER(R, ARG, EL)           \
-  << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) \
-                        << "-"                 \
-                        << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "   /   "
-
-  std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES) << endl;
-
-  const int tries = 10;
-
-#define _RUN_BENCH(R, ARG, EL)                                                                                      \
-  std::cout << ARG(BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL))(BOOST_PP_SEQ_ELEM(1, EL), BOOST_PP_SEQ_ELEM(1, EL)), \
-                   BOOST_PP_SEQ_ELEM(2, EL), tries)                                                                 \
-            << "   ";
-
-  BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES);
-  std::cout << endl;
-  BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES);
-  std::cout << endl;
-
-  return 0;
-}
--- a/bench/basicbenchmark.h
+++ b/bench/basicbenchmark.h
@@ -1,54 +0,0 @@
-
-#ifndef EIGEN_BENCH_BASICBENCH_H
-#define EIGEN_BENCH_BASICBENCH_H
-
-enum { LazyEval, EarlyEval, OmpEval };
-
-template <int Mode, typename MatrixType>
-void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));
-
-template <int Mode, typename MatrixType>
-void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) {
-  for (int a = 0; a < iterations; a++) {
-    if (Mode == LazyEval) {
-      asm("#begin_bench_loop LazyEval");
-      if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
-      m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
-    } else if (Mode == OmpEval) {
-      asm("#begin_bench_loop OmpEval");
-      if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
-      m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
-    } else {
-      asm("#begin_bench_loop EarlyEval");
-      if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
-      m = I + 0.00005 * (m + m * m);
-    }
-    asm("#end_bench_loop");
-  }
-}
-
-template <int Mode, typename MatrixType>
-double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));
-
-template <int Mode, typename MatrixType>
-double benchBasic(const MatrixType& mat, int iterations, int tries) {
-  const int rows = mat.rows();
-  const int cols = mat.cols();
-
-  MatrixType I(rows, cols);
-  MatrixType m(rows, cols);
-
-  initMatrix_identity(I);
-
-  Eigen::BenchTimer timer;
-  for (uint t = 0; t < tries; ++t) {
-    initMatrix_random(m);
-    timer.start();
-    benchBasic_loop<Mode>(I, m, iterations);
-    timer.stop();
-    cerr << m;
-  }
-  return timer.value();
-};
-
-#endif  // EIGEN_BENCH_BASICBENCH_H
--- a/bench/benchBlasGemm.cpp
+++ b/bench/benchBlasGemm.cpp
@@ -1,199 +0,0 @@
-// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
-// possible options:
-//    -DEIGEN_DONT_VECTORIZE
-//    -msse2
-
-// #define EIGEN_DEFAULT_TO_ROW_MAJOR
-#define _FLOAT
-
-#include <iostream>
-
-#include <Eigen/Core>
-#include "BenchTimer.h"
-
-// include the BLAS headers
-extern "C" {
-#include <cblas.h>
-}
-#include <string>
-
-#ifdef _FLOAT
-typedef float Scalar;
-#define CBLAS_GEMM cblas_sgemm
-#else
-typedef double Scalar;
-#define CBLAS_GEMM cblas_dgemm
-#endif
-
-typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix;
-void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
-void check_product(int M, int N, int K);
-void check_product(void);
-
-int main(int argc, char* argv[]) {
-// disable SSE exceptions
-#ifdef __GNUC__
-  {
-    int aux;
-    asm("stmxcsr   %[aux]           \n\t"
-        "orl       $32832, %[aux]   \n\t"
-        "ldmxcsr   %[aux]           \n\t"
-        :
-        : [aux] "m"(aux));
-  }
-#endif
-
-  int nbtries = 1, nbloops = 1, M, N, K;
-
-  if (argc == 2) {
-    if (std::string(argv[1]) == "check")
-      check_product();
-    else
-      M = N = K = atoi(argv[1]);
-  } else if ((argc == 3) && (std::string(argv[1]) == "auto")) {
-    M = N = K = atoi(argv[2]);
-    nbloops = 1000000000 / (M * M * M);
-    if (nbloops < 1) nbloops = 1;
-    nbtries = 6;
-  } else if (argc == 4) {
-    M = N = K = atoi(argv[1]);
-    nbloops = atoi(argv[2]);
-    nbtries = atoi(argv[3]);
-  } else if (argc == 6) {
-    M = atoi(argv[1]);
-    N = atoi(argv[2]);
-    K = atoi(argv[3]);
-    nbloops = atoi(argv[4]);
-    nbtries = atoi(argv[5]);
-  } else {
-    std::cout << "Usage: " << argv[0] << " size  \n";
-    std::cout << "Usage: " << argv[0] << " auto size\n";
-    std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
-    std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
-    std::cout << "Usage: " << argv[0] << " check\n";
-    std::cout << "Options:\n";
-    std::cout << "    size       unique size of the 2 matrices (integer)\n";
-    std::cout << "    auto       automatically set the number of repetitions and tries\n";
-    std::cout << "    nbloops    number of times the GEMM routines is executed\n";
-    std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
-    std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
-    std::cout << "    check      check eigen product using cblas as a reference\n";
-    exit(1);
-  }
-
-  double nbmad = double(M) * double(N) * double(K) * double(nbloops);
-
-  if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n";
-
-  Scalar alpha, beta;
-  MyMatrix ma(M, K), mb(K, N), mc(M, N);
-  ma = MyMatrix::Random(M, K);
-  mb = MyMatrix::Random(K, N);
-  mc = MyMatrix::Random(M, N);
-
-  Eigen::BenchTimer timer;
-
-  // we simply compute c += a*b, so:
-  alpha = 1;
-  beta = 1;
-
-  // bench cblas
-  // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
-  if (!(std::string(argv[1]) == "auto")) {
-    timer.reset();
-    for (uint k = 0; k < nbtries; ++k) {
-      timer.start();
-      for (uint j = 0; j < nbloops; ++j)
-#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
-        CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta,
-                   mc.data(), N);
-#else
-        CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta,
-                   mc.data(), M);
-#endif
-      timer.stop();
-    }
-    if (!(std::string(argv[1]) == "auto"))
-      std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
-    else
-      std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
-  }
-
-  // clear
-  ma = MyMatrix::Random(M, K);
-  mb = MyMatrix::Random(K, N);
-  mc = MyMatrix::Random(M, N);
-
-  // eigen
-  //   if (!(std::string(argv[1])=="auto"))
-  {
-    timer.reset();
-    for (uint k = 0; k < nbtries; ++k) {
-      timer.start();
-      bench_eigengemm(mc, ma, mb, nbloops);
-      timer.stop();
-    }
-    if (!(std::string(argv[1]) == "auto"))
-      std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
-    else
-      std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
-  }
-
-  std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
-  std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
-
-  return 0;
-}
-
-using namespace Eigen;
-
-void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) {
-  for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb;
-}
-
-#define MYVERIFY(A, M)                  \
-  if (!(A)) {                           \
-    std::cout << "FAIL: " << M << "\n"; \
-  }
-void check_product(int M, int N, int K) {
-  MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N);
-  ma = MyMatrix::Random(M, K);
-  mb = MyMatrix::Random(K, N);
-  maT = ma.transpose();
-  mbT = mb.transpose();
-  mc = MyMatrix::Random(M, N);
-
-  MyMatrix::Scalar eps = 1e-4;
-
-  meigen = mref = mc;
-  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
-  meigen += ma * mb;
-  MYVERIFY(meigen.isApprox(mref, eps), ". * .");
-
-  meigen = mref = mc;
-  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
-  meigen += maT.transpose() * mb;
-  MYVERIFY(meigen.isApprox(mref, eps), "T * .");
-
-  meigen = mref = mc;
-  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
-  meigen += (maT.transpose()) * (mbT.transpose());
-  MYVERIFY(meigen.isApprox(mref, eps), "T * T");
-
-  meigen = mref = mc;
-  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
-  meigen += ma * mbT.transpose();
-  MYVERIFY(meigen.isApprox(mref, eps), ". * T");
-}
-
-void check_product(void) {
-  int M, N, K;
-  for (uint i = 0; i < 1000; ++i) {
-    M = internal::random<int>(1, 64);
-    N = internal::random<int>(1, 768);
-    K = internal::random<int>(1, 768);
-    M = (0 + M) * 1;
-    std::cout << M << " x " << N << " x " << K << "\n";
-    check_product(M, N, K);
-  }
-}
--- a/bench/benchCholesky.cpp
+++ b/bench/benchCholesky.cpp
@@ -1,124 +0,0 @@
-// g++ -DNDEBUG -O3 -I.. benchCholesky.cpp  -o benchCholesky && ./benchCholesky
-// options:
-//  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
-//  -DEIGEN_DONT_VECTORIZE
-//  -msse2
-//  -DREPEAT=100
-//  -DTRIES=10
-//  -DSCALAR=double
-
-#include <iostream>
-
-#include <Eigen/Core>
-#include <Eigen/Cholesky>
-#include <bench/BenchUtil.h>
-using namespace Eigen;
-
-#ifndef REPEAT
-#define REPEAT 10000
-#endif
-
-#ifndef TRIES
-#define TRIES 10
-#endif
-
-typedef float Scalar;
-
-template <typename MatrixType>
-__attribute__((noinline)) void benchLLT(const MatrixType& m) {
-  int rows = m.rows();
-  int cols = m.cols();
-
-  double cost = 0;
-  for (int j = 0; j < rows; ++j) {
-    int r = std::max(rows - j - 1, 0);
-    cost += 2 * (r * j + r + j);
-  }
-
-  int repeats = (REPEAT * 1000) / (rows * rows);
-
-  typedef typename MatrixType::Scalar Scalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
-
-  MatrixType a = MatrixType::Random(rows, cols);
-  SquareMatrixType covMat = a * a.adjoint();
-
-  BenchTimer timerNoSqrt, timerSqrt;
-
-  Scalar acc = 0;
-  int r = internal::random<int>(0, covMat.rows() - 1);
-  int c = internal::random<int>(0, covMat.cols() - 1);
-  for (int t = 0; t < TRIES; ++t) {
-    timerNoSqrt.start();
-    for (int k = 0; k < repeats; ++k) {
-      LDLT<SquareMatrixType> cholnosqrt(covMat);
-      acc += cholnosqrt.matrixL().coeff(r, c);
-    }
-    timerNoSqrt.stop();
-  }
-
-  for (int t = 0; t < TRIES; ++t) {
-    timerSqrt.start();
-    for (int k = 0; k < repeats; ++k) {
-      LLT<SquareMatrixType> chol(covMat);
-      acc += chol.matrixL().coeff(r, c);
-    }
-    timerSqrt.stop();
-  }
-
-  if (MatrixType::RowsAtCompileTime == Dynamic)
-    std::cout << "dyn   ";
-  else
-    std::cout << "fixed ";
-  std::cout << covMat.rows() << " \t" << (timerNoSqrt.best()) / repeats << "s "
-            << "(" << 1e-9 * cost * repeats / timerNoSqrt.best() << " GFLOPS)\t" << (timerSqrt.best()) / repeats << "s "
-            << "(" << 1e-9 * cost * repeats / timerSqrt.best() << " GFLOPS)\n";
-
-#ifdef BENCH_GSL
-  if (MatrixType::RowsAtCompileTime == Dynamic) {
-    timerSqrt.reset();
-
-    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
-    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());
-
-    eiToGsl(covMat, &gslCovMat);
-    for (int t = 0; t < TRIES; ++t) {
-      timerSqrt.start();
-      for (int k = 0; k < repeats; ++k) {
-        gsl_matrix_memcpy(gslCopy, gslCovMat);
-        gsl_linalg_cholesky_decomp(gslCopy);
-        acc += gsl_matrix_get(gslCopy, r, c);
-      }
-      timerSqrt.stop();
-    }
-
-    std::cout << " | \t" << timerSqrt.value() * REPEAT / repeats << "s";
-
-    gsl_matrix_free(gslCovMat);
-  }
-#endif
-  std::cout << "\n";
-  // make sure the compiler does not optimize too much
-  if (acc == 123) std::cout << acc;
-}
-
-int main(int argc, char* argv[]) {
-  const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 1500, 0};
-  std::cout << "size            LDLT                            LLT";
-  //   #ifdef BENCH_GSL
-  //   std::cout << "       GSL (standard + double + ATLAS)  ";
-  //   #endif
-  std::cout << "\n";
-  for (int i = 0; dynsizes[i] > 0; ++i) benchLLT(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
-
-  benchLLT(Matrix<Scalar, 2, 2>());
-  benchLLT(Matrix<Scalar, 3, 3>());
-  benchLLT(Matrix<Scalar, 4, 4>());
-  benchLLT(Matrix<Scalar, 5, 5>());
-  benchLLT(Matrix<Scalar, 6, 6>());
-  benchLLT(Matrix<Scalar, 7, 7>());
-  benchLLT(Matrix<Scalar, 8, 8>());
-  benchLLT(Matrix<Scalar, 12, 12>());
-  benchLLT(Matrix<Scalar, 16, 16>());
-  return 0;
-}
--- a/bench/benchEigenSolver.cpp
+++ b/bench/benchEigenSolver.cpp
@@ -1,192 +0,0 @@
-
-// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp  -o benchEigenSolver && ./benchEigenSolver
-// options:
-//  -DBENCH_GMM
-//  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
-//  -DEIGEN_DONT_VECTORIZE
-//  -msse2
-//  -DREPEAT=100
-//  -DTRIES=10
-//  -DSCALAR=double
-
-#include <iostream>
-
-#include <Eigen/Core>
-#include <Eigen/QR>
-#include <bench/BenchUtil.h>
-using namespace Eigen;
-
-#ifndef REPEAT
-#define REPEAT 1000
-#endif
-
-#ifndef TRIES
-#define TRIES 4
-#endif
-
-#ifndef SCALAR
-#define SCALAR float
-#endif
-
-typedef SCALAR Scalar;
-
-template <typename MatrixType>
-__attribute__((noinline)) void benchEigenSolver(const MatrixType& m) {
-  int rows = m.rows();
-  int cols = m.cols();
-
-  int stdRepeats = std::max(1, int((REPEAT * 1000) / (rows * rows * sqrt(rows))));
-  int saRepeats = stdRepeats * 4;
-
-  typedef typename MatrixType::Scalar Scalar;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
-
-  MatrixType a = MatrixType::Random(rows, cols);
-  SquareMatrixType covMat = a * a.adjoint();
-
-  BenchTimer timerSa, timerStd;
-
-  Scalar acc = 0;
-  int r = internal::random<int>(0, covMat.rows() - 1);
-  int c = internal::random<int>(0, covMat.cols() - 1);
-  {
-    SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
-    for (int t = 0; t < TRIES; ++t) {
-      timerSa.start();
-      for (int k = 0; k < saRepeats; ++k) {
-        ei.compute(covMat);
-        acc += ei.eigenvectors().coeff(r, c);
-      }
-      timerSa.stop();
-    }
-  }
-
-  {
-    EigenSolver<SquareMatrixType> ei(covMat);
-    for (int t = 0; t < TRIES; ++t) {
-      timerStd.start();
-      for (int k = 0; k < stdRepeats; ++k) {
-        ei.compute(covMat);
-        acc += ei.eigenvectors().coeff(r, c);
-      }
-      timerStd.stop();
-    }
-  }
-
-  if (MatrixType::RowsAtCompileTime == Dynamic)
-    std::cout << "dyn   ";
-  else
-    std::cout << "fixed ";
-  std::cout << covMat.rows() << " \t" << timerSa.value() * REPEAT / saRepeats << "s \t"
-            << timerStd.value() * REPEAT / stdRepeats << "s";
-
-#ifdef BENCH_GMM
-  if (MatrixType::RowsAtCompileTime == Dynamic) {
-    timerSa.reset();
-    timerStd.reset();
-
-    gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(), covMat.cols());
-    gmm::dense_matrix<Scalar> eigvect(covMat.rows(), covMat.cols());
-    std::vector<Scalar> eigval(covMat.rows());
-    eiToGmm(covMat, gmmCovMat);
-    for (int t = 0; t < TRIES; ++t) {
-      timerSa.start();
-      for (int k = 0; k < saRepeats; ++k) {
-        gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
-        acc += eigvect(r, c);
-      }
-      timerSa.stop();
-    }
-    // the non-selfadjoint solver does not compute the eigen vectors
-    //     for (int t=0; t<TRIES; ++t)
-    //     {
-    //       timerStd.start();
-    //       for (int k=0; k<stdRepeats; ++k)
-    //       {
-    //         gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
-    //         acc += eigvect(r,c);
-    //       }
-    //       timerStd.stop();
-    //     }
-
-    std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s"
-              << /*timerStd.value() * REPEAT / stdRepeats << "s"*/ "   na   ";
-  }
-#endif
-
-#ifdef BENCH_GSL
-  if (MatrixType::RowsAtCompileTime == Dynamic) {
-    timerSa.reset();
-    timerStd.reset();
-
-    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
-    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());
-    gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(), covMat.cols());
-    gsl_vector* eigval = gsl_vector_alloc(covMat.rows());
-    gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
-
-    gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(), covMat.cols());
-    gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows());
-    gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
-
-    eiToGsl(covMat, &gslCovMat);
-    for (int t = 0; t < TRIES; ++t) {
-      timerSa.start();
-      for (int k = 0; k < saRepeats; ++k) {
-        gsl_matrix_memcpy(gslCopy, gslCovMat);
-        gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
-        acc += gsl_matrix_get(eigvect, r, c);
-      }
-      timerSa.stop();
-    }
-    for (int t = 0; t < TRIES; ++t) {
-      timerStd.start();
-      for (int k = 0; k < stdRepeats; ++k) {
-        gsl_matrix_memcpy(gslCopy, gslCovMat);
-        gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
-        acc += GSL_REAL(gsl_matrix_complex_get(eigvectz, r, c));
-      }
-      timerStd.stop();
-    }
-
-    std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s \t" << timerStd.value() * REPEAT / stdRepeats
-              << "s";
-
-    gsl_matrix_free(gslCovMat);
-    gsl_vector_free(gslCopy);
-    gsl_matrix_free(eigvect);
-    gsl_vector_free(eigval);
-    gsl_matrix_complex_free(eigvectz);
-    gsl_vector_complex_free(eigvalz);
-    gsl_eigen_symmv_free(eisymm);
-    gsl_eigen_nonsymmv_free(einonsymm);
-  }
-#endif
-
-  std::cout << "\n";
-
-  // make sure the compiler does not optimize too much
-  if (acc == 123) std::cout << acc;
-}
-
-int main(int argc, char* argv[]) {
-  const int dynsizes[] = {4, 6, 8, 12, 16, 24, 32, 64, 128, 256, 512, 0};
-  std::cout << "size            selfadjoint       generic";
-#ifdef BENCH_GMM
-  std::cout << "        GMM++          ";
-#endif
-#ifdef BENCH_GSL
-  std::cout << "       GSL (double + ATLAS)  ";
-#endif
-  std::cout << "\n";
-  for (uint i = 0; dynsizes[i] > 0; ++i) benchEigenSolver(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
-
-  benchEigenSolver(Matrix<Scalar, 2, 2>());
-  benchEigenSolver(Matrix<Scalar, 3, 3>());
-  benchEigenSolver(Matrix<Scalar, 4, 4>());
-  benchEigenSolver(Matrix<Scalar, 6, 6>());
-  benchEigenSolver(Matrix<Scalar, 8, 8>());
-  benchEigenSolver(Matrix<Scalar, 12, 12>());
-  benchEigenSolver(Matrix<Scalar, 16, 16>());
-  return 0;
-}
--- a/bench/benchFFT.cpp
+++ b/bench/benchFFT.cpp
@@ -1,117 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2009 Mark Borgerding mark a borgerding net
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include <iostream>
-
-#include <bench/BenchUtil.h>
-#include <complex>
-#include <vector>
-#include <Eigen/Core>
-
-#include <unsupported/Eigen/FFT>
-
-using namespace Eigen;
-using namespace std;
-
-template <typename T>
-string nameof();
-
-template <>
-string nameof<float>() {
-  return "float";
-}
-template <>
-string nameof<double>() {
-  return "double";
-}
-template <>
-string nameof<long double>() {
-  return "long double";
-}
-
-#ifndef TYPE
-#define TYPE float
-#endif
-
-#ifndef NFFT
-#define NFFT 1024
-#endif
-#ifndef NDATA
-#define NDATA 1000000
-#endif
-
-using namespace Eigen;
-
-template <typename T>
-void bench(int nfft, bool fwd, bool unscaled = false, bool halfspec = false) {
-  typedef typename NumTraits<T>::Real Scalar;
-  typedef typename std::complex<Scalar> Complex;
-  int nits = NDATA / nfft;
-  vector<T> inbuf(nfft);
-  vector<Complex> outbuf(nfft);
-  FFT<Scalar> fft;
-
-  if (unscaled) {
-    fft.SetFlag(fft.Unscaled);
-    cout << "unscaled ";
-  }
-  if (halfspec) {
-    fft.SetFlag(fft.HalfSpectrum);
-    cout << "halfspec ";
-  }
-
-  std::fill(inbuf.begin(), inbuf.end(), 0);
-  fft.fwd(outbuf, inbuf);
-
-  BenchTimer timer;
-  timer.reset();
-  for (int k = 0; k < 8; ++k) {
-    timer.start();
-    if (fwd)
-      for (int i = 0; i < nits; i++) fft.fwd(outbuf, inbuf);
-    else
-      for (int i = 0; i < nits; i++) fft.inv(inbuf, outbuf);
-    timer.stop();
-  }
-
-  cout << nameof<Scalar>() << " ";
-  double mflops = 5. * nfft * log2((double)nfft) / (1e6 * timer.value() / (double)nits);
-  if (NumTraits<T>::IsComplex) {
-    cout << "complex";
-  } else {
-    cout << "real   ";
-    mflops /= 2;
-  }
-
-  if (fwd)
-    cout << " fwd";
-  else
-    cout << " inv";
-
-  cout << " NFFT=" << nfft << "  " << (double(1e-6 * nfft * nits) / timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
-}
-
-int main(int argc, char** argv) {
-  bench<complex<float> >(NFFT, true);
-  bench<complex<float> >(NFFT, false);
-  bench<float>(NFFT, true);
-  bench<float>(NFFT, false);
-  bench<float>(NFFT, false, true);
-  bench<float>(NFFT, false, true, true);
-
-  bench<complex<double> >(NFFT, true);
-  bench<complex<double> >(NFFT, false);
-  bench<double>(NFFT, true);
-  bench<double>(NFFT, false);
-  bench<complex<long double> >(NFFT, true);
-  bench<complex<long double> >(NFFT, false);
-  bench<long double>(NFFT, true);
-  bench<long double>(NFFT, false);
-  return 0;
-}
--- a/bench/benchGeometry.cpp
+++ b/bench/benchGeometry.cpp
@@ -1,120 +0,0 @@
-#include <iostream>
-#include <iomanip>
-#include <Eigen/Core>
-#include <Eigen/Geometry>
-#include <bench/BenchTimer.h>
-
-using namespace Eigen;
-using namespace std;
-
-#ifndef REPEAT
-#define REPEAT 1000000
-#endif
-
-enum func_opt {
-  TV,
-  TMATV,
-  TMATVMAT,
-};
-
-template <class res, class arg1, class arg2, int opt>
-struct func;
-
-template <class res, class arg1, class arg2>
-struct func<res, arg1, arg2, TV> {
-  static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
-    asm("");
-    return a1 * a2;
-  }
-};
-
-template <class res, class arg1, class arg2>
-struct func<res, arg1, arg2, TMATV> {
-  static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
-    asm("");
-    return a1.matrix() * a2;
-  }
-};
-
-template <class res, class arg1, class arg2>
-struct func<res, arg1, arg2, TMATVMAT> {
-  static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
-    asm("");
-    return res(a1.matrix() * a2.matrix());
-  }
-};
-
-template <class func, class arg1, class arg2>
-struct test_transform {
-  static void run() {
-    arg1 a1;
-    a1.setIdentity();
-    arg2 a2;
-    a2.setIdentity();
-
-    BenchTimer timer;
-    timer.reset();
-    for (int k = 0; k < 10; ++k) {
-      timer.start();
-      for (int k = 0; k < REPEAT; ++k) a2 = func::run(a1, a2);
-      timer.stop();
-    }
-    cout << setprecision(4) << fixed << timer.value() << "s  " << endl;
-    ;
-  }
-};
-
-#define run_vec(op, scalar, mode, option, vsize)                                   \
-  std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
-  {                                                                                \
-    typedef Transform<scalar, 3, mode, option> Trans;                              \
-    typedef Matrix<scalar, vsize, 1, option> Vec;                                  \
-    typedef func<Vec, Trans, Vec, op> Func;                                        \
-    test_transform<Func, Trans, Vec>::run();                                       \
-  }
-
-#define run_trans(op, scalar, mode, option)                            \
-  std::cout << #scalar << "\t " << #mode << "\t " << #option << "   "; \
-  {                                                                    \
-    typedef Transform<scalar, 3, mode, option> Trans;                  \
-    typedef func<Trans, Trans, Trans, op> Func;                        \
-    test_transform<Func, Trans, Trans>::run();                         \
-  }
-
-int main(int argc, char* argv[]) {
-  cout << "vec = trans * vec" << endl;
-  run_vec(TV, float, Isometry, AutoAlign, 3);
-  run_vec(TV, float, Isometry, DontAlign, 3);
-  run_vec(TV, float, Isometry, AutoAlign, 4);
-  run_vec(TV, float, Isometry, DontAlign, 4);
-  run_vec(TV, float, Projective, AutoAlign, 4);
-  run_vec(TV, float, Projective, DontAlign, 4);
-  run_vec(TV, double, Isometry, AutoAlign, 3);
-  run_vec(TV, double, Isometry, DontAlign, 3);
-  run_vec(TV, double, Isometry, AutoAlign, 4);
-  run_vec(TV, double, Isometry, DontAlign, 4);
-  run_vec(TV, double, Projective, AutoAlign, 4);
-  run_vec(TV, double, Projective, DontAlign, 4);
-
-  cout << "vec = trans.matrix() * vec" << endl;
-  run_vec(TMATV, float, Isometry, AutoAlign, 4);
-  run_vec(TMATV, float, Isometry, DontAlign, 4);
-  run_vec(TMATV, double, Isometry, AutoAlign, 4);
-  run_vec(TMATV, double, Isometry, DontAlign, 4);
-
-  cout << "trans = trans1 * trans" << endl;
-  run_trans(TV, float, Isometry, AutoAlign);
-  run_trans(TV, float, Isometry, DontAlign);
-  run_trans(TV, double, Isometry, AutoAlign);
-  run_trans(TV, double, Isometry, DontAlign);
-  run_trans(TV, float, Projective, AutoAlign);
-  run_trans(TV, float, Projective, DontAlign);
-  run_trans(TV, double, Projective, AutoAlign);
-  run_trans(TV, double, Projective, DontAlign);
-
-  cout << "trans = trans1.matrix() * trans.matrix()" << endl;
-  run_trans(TMATVMAT, float, Isometry, AutoAlign);
-  run_trans(TMATVMAT, float, Isometry, DontAlign);
-  run_trans(TMATVMAT, double, Isometry, AutoAlign);
-  run_trans(TMATVMAT, double, Isometry, DontAlign);
-}
--- a/bench/benchVecAdd.cpp
+++ b/bench/benchVecAdd.cpp
@@ -1,131 +0,0 @@
-
-#include <iostream>
-#include <Eigen/Core>
-#include <bench/BenchTimer.h>
-using namespace Eigen;
-
-#ifndef SIZE
-#define SIZE 50
-#endif
-
-#ifndef REPEAT
-#define REPEAT 10000
-#endif
-
-typedef float Scalar;
-
-__attribute__((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
-__attribute__((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
-__attribute__((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
-
-int main(int argc, char* argv[]) {
-  int size = SIZE * 8;
-  int size2 = size * size;
-  Scalar* a = internal::aligned_new<Scalar>(size2);
-  Scalar* b = internal::aligned_new<Scalar>(size2 + 4) + 1;
-  Scalar* c = internal::aligned_new<Scalar>(size2);
-
-  for (int i = 0; i < size; ++i) {
-    a[i] = b[i] = c[i] = 0;
-  }
-
-  BenchTimer timer;
-
-  timer.reset();
-  for (int k = 0; k < 10; ++k) {
-    timer.start();
-    benchVec(a, b, c, size2);
-    timer.stop();
-  }
-  std::cout << timer.value() << "s  " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
-            << " GFlops\n";
-  return 0;
-  for (int innersize = size; innersize > 2; --innersize) {
-    if (size2 % innersize == 0) {
-      int outersize = size2 / innersize;
-      MatrixXf ma = Map<MatrixXf>(a, innersize, outersize);
-      MatrixXf mb = Map<MatrixXf>(b, innersize, outersize);
-      MatrixXf mc = Map<MatrixXf>(c, innersize, outersize);
-      timer.reset();
-      for (int k = 0; k < 3; ++k) {
-        timer.start();
-        benchVec(ma, mb, mc);
-        timer.stop();
-      }
-      std::cout << innersize << " x " << outersize << "  " << timer.value() << "s   "
-                << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) << " GFlops\n";
-    }
-  }
-
-  VectorXf va = Map<VectorXf>(a, size2);
-  VectorXf vb = Map<VectorXf>(b, size2);
-  VectorXf vc = Map<VectorXf>(c, size2);
-  timer.reset();
-  for (int k = 0; k < 3; ++k) {
-    timer.start();
-    benchVec(va, vb, vc);
-    timer.stop();
-  }
-  std::cout << timer.value() << "s   " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
-            << " GFlops\n";
-
-  return 0;
-}
-
-void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) {
-  for (int k = 0; k < REPEAT; ++k) a = a + b;
-}
-
-void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) {
-  for (int k = 0; k < REPEAT; ++k) a = a + b;
-}
-
-void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) {
-  typedef internal::packet_traits<Scalar>::type PacketScalar;
-  const int PacketSize = internal::packet_traits<Scalar>::size;
-  PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
-  for (int k = 0; k < REPEAT; ++k)
-    for (int i = 0; i < size; i += PacketSize * 8) {
-      //             a0 = internal::pload(&a[i]);
-      //             b0 = internal::pload(&b[i]);
-      //             a1 = internal::pload(&a[i+1*PacketSize]);
-      //             b1 = internal::pload(&b[i+1*PacketSize]);
-      //             a2 = internal::pload(&a[i+2*PacketSize]);
-      //             b2 = internal::pload(&b[i+2*PacketSize]);
-      //             a3 = internal::pload(&a[i+3*PacketSize]);
-      //             b3 = internal::pload(&b[i+3*PacketSize]);
-      //             internal::pstore(&a[i], internal::padd(a0, b0));
-      //             a0 = internal::pload(&a[i+4*PacketSize]);
-      //             b0 = internal::pload(&b[i+4*PacketSize]);
-      //
-      //             internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
-      //             a1 = internal::pload(&a[i+5*PacketSize]);
-      //             b1 = internal::pload(&b[i+5*PacketSize]);
-      //
-      //             internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
-      //             a2 = internal::pload(&a[i+6*PacketSize]);
-      //             b2 = internal::pload(&b[i+6*PacketSize]);
-      //
-      //             internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
-      //             a3 = internal::pload(&a[i+7*PacketSize]);
-      //             b3 = internal::pload(&b[i+7*PacketSize]);
-      //
-      //             internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
-      //             internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
-      //             internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
-      //             internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
-
-      internal::pstore(&a[i + 2 * PacketSize], internal::padd(internal::ploadu(&a[i + 2 * PacketSize]),
-                                                              internal::ploadu(&b[i + 2 * PacketSize])));
-      internal::pstore(&a[i + 3 * PacketSize], internal::padd(internal::ploadu(&a[i + 3 * PacketSize]),
-                                                              internal::ploadu(&b[i + 3 * PacketSize])));
-      internal::pstore(&a[i + 4 * PacketSize], internal::padd(internal::ploadu(&a[i + 4 * PacketSize]),
-                                                              internal::ploadu(&b[i + 4 * PacketSize])));
-      internal::pstore(&a[i + 5 * PacketSize], internal::padd(internal::ploadu(&a[i + 5 * PacketSize]),
-                                                              internal::ploadu(&b[i + 5 * PacketSize])));
-      internal::pstore(&a[i + 6 * PacketSize], internal::padd(internal::ploadu(&a[i + 6 * PacketSize]),
-                                                              internal::ploadu(&b[i + 6 * PacketSize])));
-      internal::pstore(&a[i + 7 * PacketSize], internal::padd(internal::ploadu(&a[i + 7 * PacketSize]),
-                                                              internal::ploadu(&b[i + 7 * PacketSize])));
-    }
-}
--- a/bench/bench_gemm.cpp
+++ b/bench/bench_gemm.cpp
@@ -1,393 +0,0 @@
-
-// g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2  ./a.out
-// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp  && OMP_NUM_THREADS=2  ./a.out
-
-// Compilation options:
-//
-// -DSCALAR=std::complex<double>
-// -DSCALARA=double or -DSCALARB=double
-// -DHAVE_BLAS
-// -DDECOUPLED
-//
-
-#include <iostream>
-#include <bench/BenchTimer.h>
-#include <Eigen/Core>
-
-using namespace std;
-using namespace Eigen;
-
-#ifndef SCALAR
-// #define SCALAR std::complex<float>
-#define SCALAR float
-#endif
-
-#ifndef SCALARA
-#define SCALARA SCALAR
-#endif
-
-#ifndef SCALARB
-#define SCALARB SCALAR
-#endif
-
-#ifdef ROWMAJ_A
-const int opt_A = RowMajor;
-#else
-const int opt_A = ColMajor;
-#endif
-
-#ifdef ROWMAJ_B
-const int opt_B = RowMajor;
-#else
-const int opt_B = ColMajor;
-#endif
-
-typedef SCALAR Scalar;
-typedef NumTraits<Scalar>::Real RealScalar;
-typedef Matrix<SCALARA, Dynamic, Dynamic, opt_A> A;
-typedef Matrix<SCALARB, Dynamic, Dynamic, opt_B> B;
-typedef Matrix<Scalar, Dynamic, Dynamic> C;
-typedef Matrix<RealScalar, Dynamic, Dynamic> M;
-
-#ifdef HAVE_BLAS
-
-extern "C" {
-#include <Eigen/src/misc/blas.h>
-}
-
-static float fone = 1;
-static float fzero = 0;
-static double done = 1;
-static double szero = 0;
-static std::complex<float> cfone = 1;
-static std::complex<float> cfzero = 0;
-static std::complex<double> cdone = 1;
-static std::complex<double> cdzero = 0;
-static char notrans = 'N';
-static char trans = 'T';
-static char nonunit = 'N';
-static char lower = 'L';
-static char right = 'R';
-static int intone = 1;
-
-#ifdef ROWMAJ_A
-const char transA = trans;
-#else
-const char transA = notrans;
-#endif
-
-#ifdef ROWMAJ_B
-const char transB = trans;
-#else
-const char transB = notrans;
-#endif
-
-template <typename A, typename B>
-void blas_gemm(const A& a, const B& b, MatrixXf& c) {
-  int M = c.rows();
-  int N = c.cols();
-  int K = a.cols();
-  int lda = a.outerStride();
-  int ldb = b.outerStride();
-  int ldc = c.rows();
-
-  sgemm_(&transA, &transB, &M, &N, &K, &fone, const_cast<float*>(a.data()), &lda, const_cast<float*>(b.data()), &ldb,
-         &fone, c.data(), &ldc);
-}
-
-template <typename A, typename B>
-void blas_gemm(const A& a, const B& b, MatrixXd& c) {
-  int M = c.rows();
-  int N = c.cols();
-  int K = a.cols();
-  int lda = a.outerStride();
-  int ldb = b.outerStride();
-  int ldc = c.rows();
-
-  dgemm_(&transA, &transB, &M, &N, &K, &done, const_cast<double*>(a.data()), &lda, const_cast<double*>(b.data()), &ldb,
-         &done, c.data(), &ldc);
-}
-
-template <typename A, typename B>
-void blas_gemm(const A& a, const B& b, MatrixXcf& c) {
-  int M = c.rows();
-  int N = c.cols();
-  int K = a.cols();
-  int lda = a.outerStride();
-  int ldb = b.outerStride();
-  int ldc = c.rows();
-
-  cgemm_(&transA, &transB, &M, &N, &K, (float*)&cfone, const_cast<float*>((const float*)a.data()), &lda,
-         const_cast<float*>((const float*)b.data()), &ldb, (float*)&cfone, (float*)c.data(), &ldc);
-}
-
-template <typename A, typename B>
-void blas_gemm(const A& a, const B& b, MatrixXcd& c) {
-  int M = c.rows();
-  int N = c.cols();
-  int K = a.cols();
-  int lda = a.outerStride();
-  int ldb = b.outerStride();
-  int ldc = c.rows();
-
-  zgemm_(&transA, &transB, &M, &N, &K, (double*)&cdone, const_cast<double*>((const double*)a.data()), &lda,
-         const_cast<double*>((const double*)b.data()), &ldb, (double*)&cdone, (double*)c.data(), &ldc);
-}
-
-#endif
-
-void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) {
-  cr.noalias() += ar * br;
-  cr.noalias() -= ai * bi;
-  ci.noalias() += ar * bi;
-  ci.noalias() += ai * br;
-  // [cr ci] += [ar ai] * br + [-ai ar] * bi
-}
-
-void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) {
-  cr.noalias() += a * br;
-  ci.noalias() += a * bi;
-}
-
-void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) {
-  cr.noalias() += ar * b;
-  ci.noalias() += ai * b;
-}
-
-template <typename A, typename B, typename C>
-EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) {
-  c.noalias() += a * b;
-}
-
-int main(int argc, char** argv) {
-  std::ptrdiff_t l1 = internal::queryL1CacheSize();
-  std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
-  std::cout << "L1 cache size     = " << (l1 > 0 ? l1 / 1024 : -1) << " KB\n";
-  std::cout << "L2/L3 cache size  = " << (l2 > 0 ? l2 / 1024 : -1) << " KB\n";
-  typedef internal::gebp_traits<Scalar, Scalar> Traits;
-  std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n";
-
-  int rep = 1;    // number of repetitions per try
-  int tries = 2;  // number of tries, we keep the best
-
-  int s = 2048;
-  int m = s;
-  int n = s;
-  int p = s;
-  int cache_size1 = -1, cache_size2 = l2, cache_size3 = 0;
-
-  bool need_help = false;
-  for (int i = 1; i < argc;) {
-    if (argv[i][0] == '-') {
-      if (argv[i][1] == 's') {
-        ++i;
-        s = atoi(argv[i++]);
-        m = n = p = s;
-        if (argv[i][0] != '-') {
-          n = atoi(argv[i++]);
-          p = atoi(argv[i++]);
-        }
-      } else if (argv[i][1] == 'c') {
-        ++i;
-        cache_size1 = atoi(argv[i++]);
-        if (argv[i][0] != '-') {
-          cache_size2 = atoi(argv[i++]);
-          if (argv[i][0] != '-') cache_size3 = atoi(argv[i++]);
-        }
-      } else if (argv[i][1] == 't') {
-        tries = atoi(argv[++i]);
-        ++i;
-      } else if (argv[i][1] == 'p') {
-        ++i;
-        rep = atoi(argv[i++]);
-      }
-    } else {
-      need_help = true;
-      break;
-    }
-  }
-
-  if (need_help) {
-    std::cout << argv[0] << " -s <matrix sizes> -c <cache sizes> -t <nb tries> -p <nb repeats>\n";
-    std::cout << "   <matrix sizes> : size\n";
-    std::cout << "   <matrix sizes> : rows columns depth\n";
-    return 1;
-  }
-
-#if EIGEN_VERSION_AT_LEAST(3, 2, 90)
-  if (cache_size1 > 0) setCpuCacheSizes(cache_size1, cache_size2, cache_size3);
-#endif
-
-  A a(m, p);
-  a.setRandom();
-  B b(p, n);
-  b.setRandom();
-  C c(m, n);
-  c.setOnes();
-  C rc = c;
-
-  std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
-  std::ptrdiff_t mc(m), nc(n), kc(p);
-  internal::computeProductBlockingSizes<Scalar, Scalar>(kc, mc, nc);
-  std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << " x " << nc << "\n";
-
-  C r = c;
-
-// check the parallel product is correct
-#if defined EIGEN_HAS_OPENMP
-  Eigen::initParallel();
-  int procs = omp_get_max_threads();
-  if (procs > 1) {
-#ifdef HAVE_BLAS
-    blas_gemm(a, b, r);
-#else
-    omp_set_num_threads(1);
-    r.noalias() += a * b;
-    omp_set_num_threads(procs);
-#endif
-    c.noalias() += a * b;
-    if (!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
-  }
-#elif defined HAVE_BLAS
-  blas_gemm(a, b, r);
-  c.noalias() += a * b;
-  if (!r.isApprox(c)) {
-    std::cout << (r - c).norm() / r.norm() << "\n";
-    std::cerr << "Warning, your product is crap!\n\n";
-  }
-#else
-  if (1. * m * n * p < 2000. * 2000 * 2000) {
-    gemm(a, b, c);
-    r.noalias() += a.cast<Scalar>().lazyProduct(b.cast<Scalar>());
-    if (!r.isApprox(c)) {
-      std::cout << (r - c).norm() / r.norm() << "\n";
-      std::cerr << "Warning, your product is crap!\n\n";
-    }
-  }
-#endif
-
-#ifdef HAVE_BLAS
-  BenchTimer tblas;
-  c = rc;
-  BENCH(tblas, tries, rep, blas_gemm(a, b, c));
-  std::cout << "blas  cpu         " << tblas.best(CPU_TIMER) / rep << "s  \t"
-            << (double(m) * n * p * rep * 2 / tblas.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER)
-            << "s)\n";
-  std::cout << "blas  real        " << tblas.best(REAL_TIMER) / rep << "s  \t"
-            << (double(m) * n * p * rep * 2 / tblas.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER)
-            << "s)\n";
-#endif
-
-  // warm start
-  if (b.norm() + a.norm() == 123.554) std::cout << "\n";
-
-  BenchTimer tmt;
-  c = rc;
-  BENCH(tmt, tries, rep, gemm(a, b, c));
-  std::cout << "eigen cpu         " << tmt.best(CPU_TIMER) / rep << "s  \t"
-            << (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
-            << "s)\n";
-  std::cout << "eigen real        " << tmt.best(REAL_TIMER) / rep << "s  \t"
-            << (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
-            << "s)\n";
-
-#ifdef EIGEN_HAS_OPENMP
-  if (procs > 1) {
-    BenchTimer tmono;
-    omp_set_num_threads(1);
-    Eigen::setNbThreads(1);
-    c = rc;
-    BENCH(tmono, tries, rep, gemm(a, b, c));
-    std::cout << "eigen mono cpu    " << tmono.best(CPU_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / tmono.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER)
-              << "s)\n";
-    std::cout << "eigen mono real   " << tmono.best(REAL_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / tmono.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t("
-              << tmono.total(REAL_TIMER) << "s)\n";
-    std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => "
-              << (100.0 * tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)) / procs << "%\n";
-  }
-#endif
-
-  if (1. * m * n * p < 30 * 30 * 30) {
-    BenchTimer tmt;
-    c = rc;
-    BENCH(tmt, tries, rep, c.noalias() += a.lazyProduct(b));
-    std::cout << "lazy cpu         " << tmt.best(CPU_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
-              << "s)\n";
-    std::cout << "lazy real        " << tmt.best(REAL_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
-              << "s)\n";
-  }
-
-#ifdef DECOUPLED
-  if ((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
-    M ar(m, p);
-    ar.setRandom();
-    M ai(m, p);
-    ai.setRandom();
-    M br(p, n);
-    br.setRandom();
-    M bi(p, n);
-    bi.setRandom();
-    M cr(m, n);
-    cr.setRandom();
-    M ci(m, n);
-    ci.setRandom();
-
-    BenchTimer t;
-    BENCH(t, tries, rep, matlab_cplx_cplx(ar, ai, br, bi, cr, ci));
-    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
-              << "s)\n";
-    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
-              << "s)\n";
-  }
-  if ((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
-    M a(m, p);
-    a.setRandom();
-    M br(p, n);
-    br.setRandom();
-    M bi(p, n);
-    bi.setRandom();
-    M cr(m, n);
-    cr.setRandom();
-    M ci(m, n);
-    ci.setRandom();
-
-    BenchTimer t;
-    BENCH(t, tries, rep, matlab_real_cplx(a, br, bi, cr, ci));
-    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
-              << "s)\n";
-    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
-              << "s)\n";
-  }
-  if ((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex)) {
-    M ar(m, p);
-    ar.setRandom();
-    M ai(m, p);
-    ai.setRandom();
-    M b(p, n);
-    b.setRandom();
-    M cr(m, n);
-    cr.setRandom();
-    M ci(m, n);
-    ci.setRandom();
-
-    BenchTimer t;
-    BENCH(t, tries, rep, matlab_cplx_real(ar, ai, b, cr, ci));
-    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
-              << "s)\n";
-    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER) / rep << "s  \t"
-              << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
-              << "s)\n";
-  }
-#endif
-
-  return 0;
-}
--- a/bench/bench_move_semantics.cpp
+++ b/bench/bench_move_semantics.cpp
@@ -1,52 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2020 Sebastien Boisvert <seb@boisvert.info>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include "BenchTimer.h"
-#include "../test/MovableScalar.h"
-
-#include <Eigen/Core>
-
-#include <iostream>
-#include <utility>
-
-template <typename MatrixType>
-void copy_matrix(MatrixType& m) {
-  MatrixType tmp(m);
-  m = tmp;
-}
-
-template <typename MatrixType>
-void move_matrix(MatrixType&& m) {
-  MatrixType tmp(std::move(m));
-  m = std::move(tmp);
-}
-
-template <typename Scalar>
-void bench(const std::string& label) {
-  using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
-  Eigen::BenchTimer t;
-
-  int tries = 10;
-  int rep = 1000000;
-
-  MatrixType data = MatrixType::Random().eval();
-  MatrixType dest;
-
-  BENCH(t, tries, rep, copy_matrix(data));
-  std::cout << label << " copy semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
-
-  BENCH(t, tries, rep, move_matrix(std::move(data)));
-  std::cout << label << " move semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
-}
-
-int main() {
-  bench<float>("float");
-  bench<double>("double");
-  return 0;
-}
--- a/bench/bench_multi_compilers.sh
+++ b/bench/bench_multi_compilers.sh
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-if (($# < 2)); then
-    echo "Usage: $0 compilerlist.txt benchfile.cpp"
-else
-
-compilerlist=$1
-benchfile=$2
-
-g=0
-source $compilerlist
-
-# for each compiler, compile benchfile and run the benchmark
-for (( i=0 ; i<g ; ++i )) ; do
-  # check the compiler exists
-  compiler=`echo ${CLIST[$i]} | cut -d " " -f 1`
-  if [ -e `which $compiler` ]; then
-    echo "${CLIST[$i]}"
-#     echo "${CLIST[$i]} $benchfile -I.. -o bench~"
-#     if [ -e ./.bench ] ; then rm .bench; fi
-    ${CLIST[$i]} $benchfile -I.. -o .bench && ./.bench 2> /dev/null
-    echo ""
-  else
-    echo "compiler not found: $compiler"
-  fi
-done
-
-fi
--- a/bench/bench_norm.cpp
+++ b/bench/bench_norm.cpp
@@ -1,342 +0,0 @@
-#include <typeinfo>
-#include <iostream>
-#include <Eigen/Core>
-#include "BenchTimer.h"
-using namespace Eigen;
-using namespace std;
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) {
-  return v.norm();
-}
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) {
-  return v.stableNorm();
-}
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) {
-  return v.hypotNorm();
-}
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) {
-  return v.blueNorm();
-}
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) {
-  typedef typename T::Scalar Scalar;
-  int n = v.size();
-  Scalar scale = 0;
-  Scalar ssq = 1;
-  for (int i = 0; i < n; ++i) {
-    Scalar ax = std::abs(v.coeff(i));
-    if (scale >= ax) {
-      ssq += numext::abs2(ax / scale);
-    } else {
-      ssq = Scalar(1) + ssq * numext::abs2(scale / ax);
-      scale = ax;
-    }
-  }
-  return scale * std::sqrt(ssq);
-}
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) {
-  typedef typename T::Scalar Scalar;
-  Scalar s = v.array().abs().maxCoeff();
-  return s * (v / s).norm();
-}
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) {
-  return v.stableNorm();
-}
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) {
-  int n = v.size() / 2;
-  for (int i = 0; i < n; ++i) v(i) = v(2 * i) * v(2 * i) + v(2 * i + 1) * v(2 * i + 1);
-  n = n / 2;
-  while (n > 0) {
-    for (int i = 0; i < n; ++i) v(i) = v(2 * i) + v(2 * i + 1);
-    n = n / 2;
-  }
-  return std::sqrt(v(0));
-}
-
-namespace Eigen {
-namespace internal {
-#ifdef EIGEN_VECTORIZE
-Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a, b); }
-Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a, b); }
-
-Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a, b); }
-Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a, b); }
-#endif
-}  // namespace internal
-}  // namespace Eigen
-
-template <typename T>
-EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) {
-#ifndef EIGEN_VECTORIZE
-  return v.blueNorm();
-#else
-  typedef typename T::Scalar Scalar;
-
-  static int nmax = 0;
-  static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
-  int n;
-
-  if (nmax <= 0) {
-    int nbig, ibeta, it, iemin, iemax, iexp;
-    Scalar abig, eps;
-
-    nbig = NumTraits<int>::highest();            // largest integer
-    ibeta = std::numeric_limits<Scalar>::radix;  // NumTraits<Scalar>::Base;                    // base for
-                                                 // floating-point numbers
-    it = NumTraits<Scalar>::digits();  // NumTraits<Scalar>::Mantissa;                // number of base-beta digits in
-                                       // mantissa
-    iemin = NumTraits<Scalar>::min_exponent();  // minimum exponent
-    iemax = NumTraits<Scalar>::max_exponent();  // maximum exponent
-    rbig = NumTraits<Scalar>::highest();        // largest floating-point number
-
-    // Check the basic machine-dependent constants.
-    if (iemin > 1 - 2 * it || 1 + it > iemax || (it == 2 && ibeta < 5) || (it <= 4 && ibeta <= 3) || it < 2) {
-      eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
-    }
-    iexp = -((1 - iemin) / 2);
-    b1 = std::pow(ibeta, iexp);  // lower boundary of midrange
-    iexp = (iemax + 1 - it) / 2;
-    b2 = std::pow(ibeta, iexp);  // upper boundary of midrange
-
-    iexp = (2 - iemin) / 2;
-    s1m = std::pow(ibeta, iexp);  // scaling factor for lower range
-    iexp = -((iemax + it) / 2);
-    s2m = std::pow(ibeta, iexp);  // scaling factor for upper range
-
-    overfl = rbig * s2m;  // overflow boundary for abig
-    eps = std::pow(ibeta, 1 - it);
-    relerr = std::sqrt(eps);  // tolerance for neglecting asml
-    abig = 1.0 / eps - 1.0;
-    if (Scalar(nbig) > abig)
-      nmax = abig;  // largest safe n
-    else
-      nmax = nbig;
-  }
-
-  typedef typename internal::packet_traits<Scalar>::type Packet;
-  const int ps = internal::packet_traits<Scalar>::size;
-  Packet pasml = internal::pset1<Packet>(Scalar(0));
-  Packet pamed = internal::pset1<Packet>(Scalar(0));
-  Packet pabig = internal::pset1<Packet>(Scalar(0));
-  Packet ps2m = internal::pset1<Packet>(s2m);
-  Packet ps1m = internal::pset1<Packet>(s1m);
-  Packet pb2 = internal::pset1<Packet>(b2);
-  Packet pb1 = internal::pset1<Packet>(b1);
-  for (int j = 0; j < v.size(); j += ps) {
-    Packet ax = internal::pabs(v.template packet<Aligned>(j));
-    Packet ax_s2m = internal::pmul(ax, ps2m);
-    Packet ax_s1m = internal::pmul(ax, ps1m);
-    Packet maskBig = internal::plt(pb2, ax);
-    Packet maskSml = internal::plt(ax, pb1);
-
-    //     Packet maskMed = internal::pand(maskSml,maskBig);
-    //     Packet scale = internal::pset1(Scalar(0));
-    //     scale = internal::por(scale, internal::pand(maskBig,ps2m));
-    //     scale = internal::por(scale, internal::pand(maskSml,ps1m));
-    //     scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
-    //     ax = internal::pmul(ax,scale);
-    //     ax = internal::pmul(ax,ax);
-    //     pabig = internal::padd(pabig, internal::pand(maskBig, ax));
-    //     pasml = internal::padd(pasml, internal::pand(maskSml, ax));
-    //     pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
-
-    pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m, ax_s2m)));
-    pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m, ax_s1m)));
-    pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax, ax), internal::pand(maskSml, maskBig)));
-  }
-  Scalar abig = internal::predux(pabig);
-  Scalar asml = internal::predux(pasml);
-  Scalar amed = internal::predux(pamed);
-  if (abig > Scalar(0)) {
-    abig = std::sqrt(abig);
-    if (abig > overfl) {
-      eigen_assert(false && "overflow");
-      return rbig;
-    }
-    if (amed > Scalar(0)) {
-      abig = abig / s2m;
-      amed = std::sqrt(amed);
-    } else {
-      return abig / s2m;
-    }
-
-  } else if (asml > Scalar(0)) {
-    if (amed > Scalar(0)) {
-      abig = std::sqrt(amed);
-      amed = std::sqrt(asml) / s1m;
-    } else {
-      return std::sqrt(asml) / s1m;
-    }
-  } else {
-    return std::sqrt(amed);
-  }
-  asml = std::min(abig, amed);
-  abig = std::max(abig, amed);
-  if (asml <= abig * relerr)
-    return abig;
-  else
-    return abig * std::sqrt(Scalar(1) + numext::abs2(asml / abig));
-#endif
-}
-
-#define BENCH_PERF(NRM)                                                                              \
-  {                                                                                                  \
-    float af = 0;                                                                                    \
-    double ad = 0;                                                                                   \
-    std::complex<float> ac = 0;                                                                      \
-    Eigen::BenchTimer tf, td, tcf;                                                                   \
-    tf.reset();                                                                                      \
-    td.reset();                                                                                      \
-    tcf.reset();                                                                                     \
-    for (int k = 0; k < tries; ++k) {                                                                \
-      tf.start();                                                                                    \
-      for (int i = 0; i < iters; ++i) {                                                              \
-        af += NRM(vf);                                                                               \
-      }                                                                                              \
-      tf.stop();                                                                                     \
-    }                                                                                                \
-    for (int k = 0; k < tries; ++k) {                                                                \
-      td.start();                                                                                    \
-      for (int i = 0; i < iters; ++i) {                                                              \
-        ad += NRM(vd);                                                                               \
-      }                                                                                              \
-      td.stop();                                                                                     \
-    }                                                                                                \
-    /*for (int k=0; k<std::max(1,tries/3); ++k) {                                                    \
-      tcf.start();                                                                                   \
-      for (int i=0; i<iters; ++i) { ac += NRM(vcf); }                                                \
-      tcf.stop();                                                                                    \
-    } */                                                                                             \
-    std::cout << #NRM << "\t" << tf.value() << "   " << td.value() << "    " << tcf.value() << "\n"; \
-  }
-
-void check_accuracy(double basef, double based, int s) {
-  double yf = basef * std::abs(internal::random<double>());
-  double yd = based * std::abs(internal::random<double>());
-  VectorXf vf = VectorXf::Ones(s) * yf;
-  VectorXd vd = VectorXd::Ones(s) * yd;
-
-  std::cout << "reference\t" << std::sqrt(double(s)) * yf << "\t" << std::sqrt(double(s)) * yd << "\n";
-  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
-  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
-  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
-  std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n";
-  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n";
-  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n";
-  std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
-}
-
-void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) {
-  VectorXf vf(s);
-  VectorXd vd(s);
-  for (int i = 0; i < s; ++i) {
-    vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0, ef1));
-    vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0, ed1));
-  }
-
-  // std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
-  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>())
-            << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
-  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>())
-            << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
-  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t"
-            << blueNorm(vd.cast<long double>()) << "\n";
-  std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>())
-            << "\t" << blueNorm(vd.cast<long double>()) << "\n";
-  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>())
-            << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
-  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t"
-            << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
-  //   std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long
-  //   double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
-}
-
-int main(int argc, char** argv) {
-  int tries = 10;
-  int iters = 100000;
-  double y = 1.1345743233455785456788e12 * internal::random<double>();
-  VectorXf v = VectorXf::Ones(1024) * y;
-
-  // return 0;
-  int s = 10000;
-  double basef_ok = 1.1345743233455785456788e15;
-  double based_ok = 1.1345743233455785456788e95;
-
-  double basef_under = 1.1345743233455785456788e-27;
-  double based_under = 1.1345743233455785456788e-303;
-
-  double basef_over = 1.1345743233455785456788e+27;
-  double based_over = 1.1345743233455785456788e+302;
-
-  std::cout.precision(20);
-
-  std::cerr << "\nNo under/overflow:\n";
-  check_accuracy(basef_ok, based_ok, s);
-
-  std::cerr << "\nUnderflow:\n";
-  check_accuracy(basef_under, based_under, s);
-
-  std::cerr << "\nOverflow:\n";
-  check_accuracy(basef_over, based_over, s);
-
-  std::cerr << "\nVarying (over):\n";
-  for (int k = 0; k < 1; ++k) {
-    check_accuracy_var(20, 27, 190, 302, s);
-    std::cout << "\n";
-  }
-
-  std::cerr << "\nVarying (under):\n";
-  for (int k = 0; k < 1; ++k) {
-    check_accuracy_var(-27, 20, -302, -190, s);
-    std::cout << "\n";
-  }
-
-  y = 1;
-  std::cout.precision(4);
-  int s1 = 1024 * 1024 * 32;
-  std::cerr << "Performance (out of cache, " << s1 << "):\n";
-  {
-    int iters = 1;
-    VectorXf vf = VectorXf::Random(s1) * y;
-    VectorXd vd = VectorXd::Random(s1) * y;
-    VectorXcf vcf = VectorXcf::Random(s1) * y;
-    BENCH_PERF(sqsumNorm);
-    BENCH_PERF(stableNorm);
-    BENCH_PERF(blueNorm);
-    BENCH_PERF(pblueNorm);
-    BENCH_PERF(lapackNorm);
-    BENCH_PERF(hypotNorm);
-    BENCH_PERF(twopassNorm);
-    BENCH_PERF(bl2passNorm);
-  }
-
-  std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
-  {
-    int iters = 100000;
-    VectorXf vf = VectorXf::Random(512) * y;
-    VectorXd vd = VectorXd::Random(512) * y;
-    VectorXcf vcf = VectorXcf::Random(512) * y;
-    BENCH_PERF(sqsumNorm);
-    BENCH_PERF(stableNorm);
-    BENCH_PERF(blueNorm);
-    BENCH_PERF(pblueNorm);
-    BENCH_PERF(lapackNorm);
-    BENCH_PERF(hypotNorm);
-    BENCH_PERF(twopassNorm);
-    BENCH_PERF(bl2passNorm);
-  }
-}
--- a/bench/bench_reverse.cpp
+++ b/bench/bench_reverse.cpp
@@ -1,76 +0,0 @@
-
-#include <iostream>
-#include <Eigen/Core>
-#include <bench/BenchUtil.h>
-using namespace Eigen;
-
-#ifndef REPEAT
-#define REPEAT 100000
-#endif
-
-#ifndef TRIES
-#define TRIES 20
-#endif
-
-typedef double Scalar;
-
-template <typename MatrixType>
-__attribute__((noinline)) void bench_reverse(const MatrixType& m) {
-  int rows = m.rows();
-  int cols = m.cols();
-  int size = m.size();
-
-  int repeats = (REPEAT * 1000) / size;
-  MatrixType a = MatrixType::Random(rows, cols);
-  MatrixType b = MatrixType::Random(rows, cols);
-
-  BenchTimer timerB, timerH, timerV;
-
-  Scalar acc = 0;
-  int r = internal::random<int>(0, rows - 1);
-  int c = internal::random<int>(0, cols - 1);
-  for (int t = 0; t < TRIES; ++t) {
-    timerB.start();
-    for (int k = 0; k < repeats; ++k) {
-      asm("#begin foo");
-      b = a.reverse();
-      asm("#end foo");
-      acc += b.coeff(r, c);
-    }
-    timerB.stop();
-  }
-
-  if (MatrixType::RowsAtCompileTime == Dynamic)
-    std::cout << "dyn   ";
-  else
-    std::cout << "fixed ";
-  std::cout << rows << " x " << cols << " \t" << (timerB.value() * REPEAT) / repeats << "s "
-            << "(" << 1e-6 * size * repeats / timerB.value() << " MFLOPS)\t";
-
-  std::cout << "\n";
-  // make sure the compiler does not optimize too much
-  if (acc == 123) std::cout << acc;
-}
-
-int main(int argc, char* argv[]) {
-  const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 0};
-  std::cout << "size            no sqrt                           standard";
-  //   #ifdef BENCH_GSL
-  //   std::cout << "       GSL (standard + double + ATLAS)  ";
-  //   #endif
-  std::cout << "\n";
-  for (uint i = 0; dynsizes[i] > 0; ++i) {
-    bench_reverse(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
-    bench_reverse(Matrix<Scalar, Dynamic, 1>(dynsizes[i] * dynsizes[i]));
-  }
-  //   bench_reverse(Matrix<Scalar,2,2>());
-  //   bench_reverse(Matrix<Scalar,3,3>());
-  //   bench_reverse(Matrix<Scalar,4,4>());
-  //   bench_reverse(Matrix<Scalar,5,5>());
-  //   bench_reverse(Matrix<Scalar,6,6>());
-  //   bench_reverse(Matrix<Scalar,7,7>());
-  //   bench_reverse(Matrix<Scalar,8,8>());
-  //   bench_reverse(Matrix<Scalar,12,12>());
-  //   bench_reverse(Matrix<Scalar,16,16>());
-  return 0;
-}
--- a/bench/bench_sum.cpp
+++ b/bench/bench_sum.cpp
@@ -1,16 +0,0 @@
-#include <iostream>
-#include <Eigen/Core>
-using namespace Eigen;
-using namespace std;
-
-int main() {
-  typedef Matrix<SCALAR, Eigen::Dynamic, 1> Vec;
-  Vec v(SIZE);
-  v.setZero();
-  v[0] = 1;
-  v[1] = 2;
-  for (int i = 0; i < 1000000; i++) {
-    v.coeffRef(0) += v.sum() * SCALAR(1e-20);
-  }
-  cout << v.sum() << endl;
-}
--- a/bench/bench_unrolling
+++ b/bench/bench_unrolling
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-# gcc : CXX="g++  -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000"
-# icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions"
-CXX=${CXX-g++  -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value
-
-for ((i=1; i<16; ++i)); do
-    echo "Matrix size: $i x $i :"
-    $CXX -O3 -I.. -DNDEBUG  benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
-    $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
-    echo " "
-done
--- a/bench/benchmark-blocking-sizes.cpp
+++ b/bench/benchmark-blocking-sizes.cpp
@@ -1,617 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include <iostream>
-#include <cstdint>
-#include <cstdlib>
-#include <vector>
-#include <fstream>
-#include <memory>
-#include <cstdio>
-
-bool eigen_use_specific_block_size;
-int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n;
-#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
-#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
-#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
-#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
-#include <Eigen/Core>
-
-#include <bench/BenchTimer.h>
-
-using namespace Eigen;
-using namespace std;
-
-static BenchTimer timer;
-
-// how many times we repeat each measurement.
-// measurements are randomly shuffled - we're not doing
-// all N identical measurements in a row.
-const int measurement_repetitions = 3;
-
-// Timings below this value are too short to be accurate,
-// we'll repeat measurements with more iterations until
-// we get a timing above that threshold.
-const float min_accurate_time = 1e-2f;
-
-// See --min-working-set-size command line parameter.
-size_t min_working_set_size = 0;
-
-float max_clock_speed = 0.0f;
-
-// range of sizes that we will benchmark (in all 3 K,M,N dimensions)
-const size_t maxsize = 2048;
-const size_t minsize = 16;
-
-typedef MatrixXf MatrixType;
-typedef MatrixType::Scalar Scalar;
-typedef internal::packet_traits<Scalar>::type Packet;
-
-static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two");
-static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two");
-static_assert(maxsize > minsize, "maxsize must be larger than minsize");
-static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");
-
-// just a helper to store a triple of K,M,N sizes for matrix product
-struct size_triple_t {
-  size_t k, m, n;
-  size_triple_t() : k(0), m(0), n(0) {}
-  size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
-  size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
-  size_triple_t(uint16_t compact) {
-    k = 1 << ((compact & 0xf00) >> 8);
-    m = 1 << ((compact & 0x0f0) >> 4);
-    n = 1 << ((compact & 0x00f) >> 0);
-  }
-};
-
-uint8_t log2_pot(size_t x) {
-  size_t l = 0;
-  while (x >>= 1) l++;
-  return l;
-}
-
-// Convert between size tripes and a compact form fitting in 12 bits
-// where each size, which must be a POT, is encoded as its log2, on 4 bits
-// so the largest representable size is 2^15 == 32k  ... big enough.
-uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
-  return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
-}
-
-uint16_t compact_size_triple(const size_triple_t& t) { return compact_size_triple(t.k, t.m, t.n); }
-
-// A single benchmark. Initially only contains benchmark params.
-// Then call run(), which stores the result in the gflops field.
-struct benchmark_t {
-  uint16_t compact_product_size;
-  uint16_t compact_block_size;
-  bool use_default_block_size;
-  float gflops;
-  benchmark_t() : compact_product_size(0), compact_block_size(0), use_default_block_size(false), gflops(0) {}
-  benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn)
-      : compact_product_size(compact_size_triple(pk, pm, pn)),
-        compact_block_size(compact_size_triple(bk, bm, bn)),
-        use_default_block_size(false),
-        gflops(0) {}
-  benchmark_t(size_t pk, size_t pm, size_t pn)
-      : compact_product_size(compact_size_triple(pk, pm, pn)),
-        compact_block_size(0),
-        use_default_block_size(true),
-        gflops(0) {}
-
-  void run();
-};
-
-ostream& operator<<(ostream& s, const benchmark_t& b) {
-  s << hex << b.compact_product_size << dec;
-  if (b.use_default_block_size) {
-    size_triple_t t(b.compact_product_size);
-    Index k = t.k, m = t.m, n = t.n;
-    internal::computeProductBlockingSizes<Scalar, Scalar>(k, m, n);
-    s << " default(" << k << ", " << m << ", " << n << ")";
-  } else {
-    s << " " << hex << b.compact_block_size << dec;
-  }
-  s << " " << b.gflops;
-  return s;
-}
-
-// We sort first by increasing benchmark parameters,
-// then by decreasing performance.
-bool operator<(const benchmark_t& b1, const benchmark_t& b2) {
-  return b1.compact_product_size < b2.compact_product_size ||
-         (b1.compact_product_size == b2.compact_product_size &&
-          ((b1.compact_block_size < b2.compact_block_size ||
-            (b1.compact_block_size == b2.compact_block_size && b1.gflops > b2.gflops))));
-}
-
-void benchmark_t::run() {
-  size_triple_t productsizes(compact_product_size);
-
-  if (use_default_block_size) {
-    eigen_use_specific_block_size = false;
-  } else {
-    // feed eigen with our custom blocking params
-    eigen_use_specific_block_size = true;
-    size_triple_t blocksizes(compact_block_size);
-    eigen_block_size_k = blocksizes.k;
-    eigen_block_size_m = blocksizes.m;
-    eigen_block_size_n = blocksizes.n;
-  }
-
-  // set up the matrix pool
-
-  const size_t combined_three_matrices_sizes =
-      sizeof(Scalar) *
-      (productsizes.k * productsizes.m + productsizes.k * productsizes.n + productsizes.m * productsizes.n);
-
-  // 64 M is large enough that nobody has a cache bigger than that,
-  // while still being small enough that everybody has this much RAM,
-  // so conveniently we don't need to special-case platforms here.
-  const size_t unlikely_large_cache_size = 64 << 20;
-
-  const size_t working_set_size = min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
-
-  const size_t matrix_pool_size = 1 + working_set_size / combined_three_matrices_sizes;
-
-  MatrixType* lhs = new MatrixType[matrix_pool_size];
-  MatrixType* rhs = new MatrixType[matrix_pool_size];
-  MatrixType* dst = new MatrixType[matrix_pool_size];
-
-  for (size_t i = 0; i < matrix_pool_size; i++) {
-    lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k);
-    rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n);
-    dst[i] = MatrixType::Zero(productsizes.m, productsizes.n);
-  }
-
-  // main benchmark loop
-
-  int iters_at_a_time = 1;
-  float time_per_iter = 0.0f;
-  size_t matrix_index = 0;
-  while (true) {
-    double starttime = timer.getCpuTime();
-    for (int i = 0; i < iters_at_a_time; i++) {
-      dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
-      matrix_index++;
-      if (matrix_index == matrix_pool_size) {
-        matrix_index = 0;
-      }
-    }
-    double endtime = timer.getCpuTime();
-
-    const float timing = float(endtime - starttime);
-
-    if (timing >= min_accurate_time) {
-      time_per_iter = timing / iters_at_a_time;
-      break;
-    }
-
-    iters_at_a_time *= 2;
-  }
-
-  delete[] lhs;
-  delete[] rhs;
-  delete[] dst;
-
-  gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
-}
-
-void print_cpuinfo() {
-#ifdef __linux__
-  cout << "contents of /proc/cpuinfo:" << endl;
-  string line;
-  ifstream cpuinfo("/proc/cpuinfo");
-  if (cpuinfo.is_open()) {
-    while (getline(cpuinfo, line)) {
-      cout << line << endl;
-    }
-    cpuinfo.close();
-  }
-  cout << endl;
-#elif defined __APPLE__
-  cout << "output of sysctl hw:" << endl;
-  system("sysctl hw");
-  cout << endl;
-#endif
-}
-
-template <typename T>
-string type_name() {
-  return "unknown";
-}
-
-template <>
-string type_name<float>() {
-  return "float";
-}
-
-template <>
-string type_name<double>() {
-  return "double";
-}
-
-struct action_t {
-  virtual const char* invokation_name() const {
-    abort();
-    return nullptr;
-  }
-  virtual void run() const { abort(); }
-  virtual ~action_t() {}
-};
-
-void show_usage_and_exit(int /*argc*/, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
-  cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
-  cerr << "available actions:" << endl << endl;
-  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
-    cerr << "  " << (*it)->invokation_name() << endl;
-  }
-  cerr << endl;
-  cerr << "options:" << endl << endl;
-  cerr << "  --min-working-set-size=N:" << endl;
-  cerr << "       Set the minimum working set size to N bytes." << endl;
-  cerr << "       This is rounded up as needed to a multiple of matrix size." << endl;
-  cerr << "       A larger working set lowers the chance of a warm cache." << endl;
-  cerr << "       The default value 0 means use a large enough working" << endl;
-  cerr << "       set to likely outsize caches." << endl;
-  cerr << "       A value of 1 (that is, 1 byte) would mean don't do anything to" << endl;
-  cerr << "       avoid warm caches." << endl;
-  exit(1);
-}
-
-float measure_clock_speed() {
-  cerr << "Measuring clock speed...                              \r" << flush;
-
-  vector<float> all_gflops;
-  for (int i = 0; i < 8; i++) {
-    benchmark_t b(1024, 1024, 1024);
-    b.run();
-    all_gflops.push_back(b.gflops);
-  }
-
-  sort(all_gflops.begin(), all_gflops.end());
-  float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5];
-
-  // multiply by an arbitrary constant to discourage trying doing anything with the
-  // returned values besides just comparing them with each other.
-  float result = stable_estimate * 123.456f;
-
-  return result;
-}
-
-struct human_duration_t {
-  int seconds;
-  human_duration_t(int s) : seconds(s) {}
-};
-
-ostream& operator<<(ostream& s, const human_duration_t& d) {
-  int remainder = d.seconds;
-  if (remainder > 3600) {
-    int hours = remainder / 3600;
-    s << hours << " h ";
-    remainder -= hours * 3600;
-  }
-  if (remainder > 60) {
-    int minutes = remainder / 60;
-    s << minutes << " min ";
-    remainder -= minutes * 60;
-  }
-  if (d.seconds < 600) {
-    s << remainder << " s";
-  }
-  return s;
-}
-
-const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
-
-void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run) {
-  FILE* file = fopen(filename, "w");
-  if (!file) {
-    cerr << "Could not open file " << filename << " for writing." << endl;
-    cerr << "Do you have write permissions on the current working directory?" << endl;
-    exit(1);
-  }
-  size_t benchmarks_vector_size = benchmarks.size();
-  fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file);
-  fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file);
-  fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file);
-  fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file);
-  fclose(file);
-}
-
-bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run) {
-  FILE* file = fopen(filename, "r");
-  if (!file) {
-    return false;
-  }
-  if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) {
-    return false;
-  }
-  size_t benchmarks_vector_size = 0;
-  if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) {
-    return false;
-  }
-  if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) {
-    return false;
-  }
-  benchmarks.resize(benchmarks_vector_size);
-  if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) {
-    return false;
-  }
-  unlink(filename);
-  return true;
-}
-
-void try_run_some_benchmarks(vector<benchmark_t>& benchmarks, double time_start, size_t& first_benchmark_to_run) {
-  if (first_benchmark_to_run == benchmarks.size()) {
-    return;
-  }
-
-  double time_last_progress_update = 0;
-  double time_last_clock_speed_measurement = 0;
-  double time_now = 0;
-
-  size_t benchmark_index = first_benchmark_to_run;
-
-  while (true) {
-    float ratio_done = float(benchmark_index) / benchmarks.size();
-    time_now = timer.getRealTime();
-
-    // We check clock speed every minute and at the end.
-    if (benchmark_index == benchmarks.size() || time_now > time_last_clock_speed_measurement + 60.0f) {
-      time_last_clock_speed_measurement = time_now;
-
-      // Ensure that clock speed is as expected
-      float current_clock_speed = measure_clock_speed();
-
-      // The tolerance needs to be smaller than the relative difference between
-      // clock speeds that a device could operate under.
-      // It seems unlikely that a device would be throttling clock speeds by
-      // amounts smaller than 2%.
-      // With a value of 1%, I was getting within noise on a Sandy Bridge.
-      const float clock_speed_tolerance = 0.02f;
-
-      if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) {
-        // Clock speed is now higher than we previously measured.
-        // Either our initial measurement was inaccurate, which won't happen
-        // too many times as we are keeping the best clock speed value and
-        // and allowing some tolerance; or an unexpected condition occurred,
-        // which invalidates all benchmark results collected so far.
-        // Either way, we better restart all over again now.
-        if (benchmark_index) {
-          cerr << "Restarting at " << 100.0f * ratio_done << " % because clock speed increased.          " << endl;
-        }
-        max_clock_speed = current_clock_speed;
-        first_benchmark_to_run = 0;
-        return;
-      }
-
-      bool rerun_last_tests = false;
-
-      if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
-        cerr << "Measurements completed so far: " << 100.0f * ratio_done << " %                             " << endl;
-        cerr << "Clock speed seems to be only " << current_clock_speed / max_clock_speed << " times what it used to be."
-             << endl;
-
-        unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
-
-        while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
-          if (seconds_to_sleep_if_lower_clock_speed > 32) {
-            cerr << "Sleeping longer probably won't make a difference." << endl;
-            cerr << "Serializing benchmarks to " << session_filename << endl;
-            serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
-            cerr << "Now restart this benchmark, and it should pick up where we left." << endl;
-            exit(2);
-          }
-          rerun_last_tests = true;
-          cerr << "Sleeping " << seconds_to_sleep_if_lower_clock_speed << " s...                                   \r"
-               << endl;
-          sleep(seconds_to_sleep_if_lower_clock_speed);
-          current_clock_speed = measure_clock_speed();
-          seconds_to_sleep_if_lower_clock_speed *= 2;
-        }
-      }
-
-      if (rerun_last_tests) {
-        cerr << "Redoing the last " << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
-             << " % because clock speed had been low.   " << endl;
-        return;
-      }
-
-      // nothing wrong with the clock speed so far, so there won't be a need to rerun
-      // benchmarks run so far in case we later encounter a lower clock speed.
-      first_benchmark_to_run = benchmark_index;
-    }
-
-    if (benchmark_index == benchmarks.size()) {
-      // We're done!
-      first_benchmark_to_run = benchmarks.size();
-      // Erase progress info
-      cerr << "                                                            " << endl;
-      return;
-    }
-
-    // Display progress info on stderr
-    if (time_now > time_last_progress_update + 1.0f) {
-      time_last_progress_update = time_now;
-      cerr << "Measurements... " << 100.0f * ratio_done << " %, ETA "
-           << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
-           << "                          \r" << flush;
-    }
-
-    // This is where we actually run a benchmark!
-    benchmarks[benchmark_index].run();
-    benchmark_index++;
-  }
-}
-
-void run_benchmarks(vector<benchmark_t>& benchmarks) {
-  size_t first_benchmark_to_run;
-  vector<benchmark_t> deserialized_benchmarks;
-  bool use_deserialized_benchmarks = false;
-  if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
-    cerr << "Found serialized session with " << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
-         << " % already done" << endl;
-    if (deserialized_benchmarks.size() == benchmarks.size() && first_benchmark_to_run > 0 &&
-        first_benchmark_to_run < benchmarks.size()) {
-      use_deserialized_benchmarks = true;
-    }
-  }
-
-  if (use_deserialized_benchmarks) {
-    benchmarks = deserialized_benchmarks;
-  } else {
-    // not using deserialized benchmarks, starting from scratch
-    first_benchmark_to_run = 0;
-
-    // Randomly shuffling benchmarks allows us to get accurate enough progress info,
-    // as now the cheap/expensive benchmarks are randomly mixed so they average out.
-    // It also means that if data is corrupted for some time span, the odds are that
-    // not all repetitions of a given benchmark will be corrupted.
-    random_shuffle(benchmarks.begin(), benchmarks.end());
-  }
-
-  for (int i = 0; i < 4; i++) {
-    max_clock_speed = max(max_clock_speed, measure_clock_speed());
-  }
-
-  double time_start = 0.0;
-  while (first_benchmark_to_run < benchmarks.size()) {
-    if (first_benchmark_to_run == 0) {
-      time_start = timer.getRealTime();
-    }
-    try_run_some_benchmarks(benchmarks, time_start, first_benchmark_to_run);
-  }
-
-  // Sort timings by increasing benchmark parameters, and decreasing gflops.
-  // The latter is very important. It means that we can ignore all but the first
-  // benchmark with given parameters.
-  sort(benchmarks.begin(), benchmarks.end());
-
-  // Collect best (i.e. now first) results for each parameter values.
-  vector<benchmark_t> best_benchmarks;
-  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
-    if (best_benchmarks.empty() || best_benchmarks.back().compact_product_size != it->compact_product_size ||
-        best_benchmarks.back().compact_block_size != it->compact_block_size) {
-      best_benchmarks.push_back(*it);
-    }
-  }
-
-  // keep and return only the best benchmarks
-  benchmarks = best_benchmarks;
-}
-
-struct measure_all_pot_sizes_action_t : action_t {
-  virtual const char* invokation_name() const { return "all-pot-sizes"; }
-  virtual void run() const {
-    vector<benchmark_t> benchmarks;
-    for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
-      for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
-        for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
-          for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
-            for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) {
-              for (size_t mblock = minsize; mblock <= msize; mblock *= 2) {
-                for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) {
-                  benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock);
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-
-    run_benchmarks(benchmarks);
-
-    cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl;
-    for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
-      cout << *it << endl;
-    }
-  }
-};
-
-struct measure_default_sizes_action_t : action_t {
-  virtual const char* invokation_name() const { return "default-sizes"; }
-  virtual void run() const {
-    vector<benchmark_t> benchmarks;
-    for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
-      for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
-        for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
-          for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
-            benchmarks.emplace_back(ksize, msize, nsize);
-          }
-        }
-      }
-    }
-
-    run_benchmarks(benchmarks);
-
-    cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl;
-    for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
-      cout << *it << endl;
-    }
-  }
-};
-
-int main(int argc, char* argv[]) {
-  double time_start = timer.getRealTime();
-  cout.precision(4);
-  cerr.precision(4);
-
-  vector<unique_ptr<action_t>> available_actions;
-  available_actions.emplace_back(new measure_all_pot_sizes_action_t);
-  available_actions.emplace_back(new measure_default_sizes_action_t);
-
-  auto action = available_actions.end();
-
-  if (argc <= 1) {
-    show_usage_and_exit(argc, argv, available_actions);
-  }
-  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
-    if (!strcmp(argv[1], (*it)->invokation_name())) {
-      action = it;
-      break;
-    }
-  }
-
-  if (action == available_actions.end()) {
-    show_usage_and_exit(argc, argv, available_actions);
-  }
-
-  for (int i = 2; i < argc; i++) {
-    if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
-      const char* equals_sign = strchr(argv[i], '=');
-      min_working_set_size = strtoul(equals_sign + 1, nullptr, 10);
-    } else {
-      cerr << "unrecognized option: " << argv[i] << endl << endl;
-      show_usage_and_exit(argc, argv, available_actions);
-    }
-  }
-
-  print_cpuinfo();
-
-  cout << "benchmark parameters:" << endl;
-  cout << "pointer size: " << 8 * sizeof(void*) << " bits" << endl;
-  cout << "scalar type: " << type_name<Scalar>() << endl;
-  cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
-  cout << "minsize = " << minsize << endl;
-  cout << "maxsize = " << maxsize << endl;
-  cout << "measurement_repetitions = " << measurement_repetitions << endl;
-  cout << "min_accurate_time = " << min_accurate_time << endl;
-  cout << "min_working_set_size = " << min_working_set_size;
-  if (min_working_set_size == 0) {
-    cout << " (try to outsize caches)";
-  }
-  cout << endl << endl;
-
-  (*action)->run();
-
-  double time_end = timer.getRealTime();
-  cerr << "Finished in " << human_duration_t(time_end - time_start) << endl;
-}
--- a/bench/benchmark.cpp
+++ b/bench/benchmark.cpp
@@ -1,36 +0,0 @@
-// g++ -O3 -DNDEBUG -DMATSIZE=<x> benchmark.cpp -o benchmark && time ./benchmark
-
-#include <iostream>
-
-#include <Eigen/Core>
-
-#ifndef MATSIZE
-#define MATSIZE 3
-#endif
-
-using namespace std;
-using namespace Eigen;
-
-#ifndef REPEAT
-#define REPEAT 40000000
-#endif
-
-#ifndef SCALAR
-#define SCALAR double
-#endif
-
-int main(int argc, char *argv[]) {
-  Matrix<SCALAR, MATSIZE, MATSIZE> I = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones();
-  Matrix<SCALAR, MATSIZE, MATSIZE> m;
-  for (int i = 0; i < MATSIZE; i++)
-    for (int j = 0; j < MATSIZE; j++) {
-      m(i, j) = (i + MATSIZE * j);
-    }
-  asm("#begin");
-  for (int a = 0; a < REPEAT; a++) {
-    m = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones() + 0.00005 * (m + (m * m));
-  }
-  asm("#end");
-  cout << m << endl;
-  return 0;
-}
--- a/bench/benchmarkSlice.cpp
+++ b/bench/benchmarkSlice.cpp
@@ -1,36 +0,0 @@
-// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
-
-#include <iostream>
-
-#include <Eigen/Core>
-
-using namespace std;
-using namespace Eigen;
-
-#ifndef REPEAT
-#define REPEAT 10000
-#endif
-
-#ifndef SCALAR
-#define SCALAR float
-#endif
-
-int main(int argc, char *argv[]) {
-  typedef Matrix<SCALAR, Eigen::Dynamic, Eigen::Dynamic> Mat;
-  Mat m(100, 100);
-  m.setRandom();
-
-  for (int a = 0; a < REPEAT; a++) {
-    int r, c, nr, nc;
-    r = Eigen::internal::random<int>(0, 10);
-    c = Eigen::internal::random<int>(0, 10);
-    nr = Eigen::internal::random<int>(50, 80);
-    nc = Eigen::internal::random<int>(50, 80);
-    m.block(r, c, nr, nc) += Mat::Ones(nr, nc);
-    m.block(r, c, nr, nc) *= SCALAR(10);
-    m.block(r, c, nr, nc) -= Mat::constant(nr, nc, 10);
-    m.block(r, c, nr, nc) /= SCALAR(10);
-  }
-  cout << m[0] << endl;
-  return 0;
-}
--- a/bench/benchmarkX.cpp
+++ b/bench/benchmarkX.cpp
@@ -1,34 +0,0 @@
-// g++ -fopenmp -I .. -O3 -DNDEBUG -finline-limit=1000 benchmarkX.cpp -o b && time ./b
-
-#include <iostream>
-
-#include <Eigen/Core>
-
-using namespace std;
-using namespace Eigen;
-
-#ifndef MATTYPE
-#define MATTYPE MatrixXLd
-#endif
-
-#ifndef MATSIZE
-#define MATSIZE 400
-#endif
-
-#ifndef REPEAT
-#define REPEAT 100
-#endif
-
-int main(int argc, char *argv[]) {
-  MATTYPE I = MATTYPE::Ones(MATSIZE, MATSIZE);
-  MATTYPE m(MATSIZE, MATSIZE);
-  for (int i = 0; i < MATSIZE; i++)
-    for (int j = 0; j < MATSIZE; j++) {
-      m(i, j) = (i + j + 1) / (MATSIZE * MATSIZE);
-    }
-  for (int a = 0; a < REPEAT; a++) {
-    m = I + 0.0001 * (m + m * m);
-  }
-  cout << m(0, 0) << endl;
-  return 0;
-}
--- a/bench/benchmarkXcwise.cpp
+++ b/bench/benchmarkXcwise.cpp
@@ -1,32 +0,0 @@
-// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
-
-#include <iostream>
-#include <Eigen/Core>
-
-using namespace std;
-using namespace Eigen;
-
-#ifndef VECTYPE
-#define VECTYPE VectorXLd
-#endif
-
-#ifndef VECSIZE
-#define VECSIZE 1000000
-#endif
-
-#ifndef REPEAT
-#define REPEAT 1000
-#endif
-
-int main(int argc, char *argv[]) {
-  VECTYPE I = VECTYPE::Ones(VECSIZE);
-  VECTYPE m(VECSIZE, 1);
-  for (int i = 0; i < VECSIZE; i++) {
-    m[i] = 0.1 * i / VECSIZE;
-  }
-  for (int a = 0; a < REPEAT; a++) {
-    m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m / 4);
-  }
-  cout << m[0] << endl;
-  return 0;
-}
--- a/bench/benchmark_aocl.cpp
+++ b/bench/benchmark_aocl.cpp
@@ -1,362 +0,0 @@
-/*
- * benchmark_aocl.cpp - AOCL Performance Benchmark Suite for Eigen
- *
- * Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- *
- * Description:
- * ------------
- * This benchmark suite evaluates the performance of Eigen mathematical
- * operations when integrated with AMD Optimizing CPU Libraries (AOCL). It
- * tests:
- *
- * 1. Vector Math Operations: Transcendental functions (exp, sin, cos, sqrt,
- * log, etc.) using AOCL Vector Math Library (VML) for optimized
- * double-precision operations
- *
- * 2. Matrix Operations: BLAS Level-3 operations (DGEMM) using AOCL BLAS library
- *    with support for both single-threaded and multithreaded execution
- *
- * 3. Linear Algebra: LAPACK operations (eigenvalue decomposition) using
- * libflame
- *
- * 4. Real-world Scenarios: Financial risk computation simulating covariance
- * matrix calculations and eigenvalue analysis for portfolio optimization
- *
- * The benchmark automatically detects AOCL configuration and adjusts test
- * execution accordingly, providing performance comparisons between standard
- * Eigen operations and AOCL-accelerated implementations.
- *
- * Compilation:
- * ------------
- * # Using AOCC compiler (recommended for best AOCL compatibility):
- * clang++ -O3 -g -DEIGEN_USE_AOCL_ALL -I<PATH_TO_EIGEN_INCLUDE>
- * -I${AOCL_ROOT}/include \
- *         -Wno-parentheses src/benchmark_aocl.cpp -L${AOCL_ROOT}/lib \
- *         -lamdlibm -lm -lblis -lflame -lpthread -lrt -pthread \
- *         -o build/eigen_aocl_benchmark
- *
- * # Alternative: Using GCC with proper library paths:
- * g++ -O3 -g -DEIGEN_USE_AOCL_ALL -I<PATH_TO_EIGEN_INCLUDE>
- * -I${AOCL_ROOT}/include \
- *     -Wno-parentheses src/benchmark_aocl.cpp -L${AOCL_ROOT}/lib \
- *     -lamdlibm -lm -lblis -lflame -lpthread -lrt \
- *     -o build/eigen_aocl_benchmark
- *
- * # For multithreaded BLIS support:
- * clang++ -O3 -g -fopenmp -DEIGEN_USE_AOCL_MT -I<PATH_TO_EIGEN_INCLUDE> \
- *         -I${AOCL_ROOT}/include -Wno-parentheses src/benchmark_aocl.cpp \
- *         -L${AOCL_ROOT}/lib -lamdlibm -lm -lblis-mt -lflame -lpthread -lrt \
- *         -o build/eigen_aocl_benchmark_mt
- *
- * Usage:
- * ------
- * export AOCL_ROOT=/path/to/aocl/installation
- * export LD_LIBRARY_PATH=$AOCL_ROOT/lib:$LD_LIBRARY_PATH
- * ./build/eigen_aocl_benchmark
- *
- * Developer:
- * ----------
- * Name: Sharad Saurabh Bhaskar
- * Email: shbhaska@amd.com
- * Organization: Advanced Micro Devices, Inc.
- */
-
-#include <chrono>
-#include <cstdlib>
-#include <iostream>
-#include <thread>
-#include <vector>
-
-// Simple - just include Eigen headers
-#include <Eigen/Core>
-#include <Eigen/Dense>
-#include <Eigen/Eigenvalues>
-
-// Only include CBLAS if AOCL BLIS is available
-#ifdef EIGEN_USE_AOCL_ALL
-#include <cblas.h>
-#endif
-
-using namespace std;
-using namespace std::chrono;
-using namespace Eigen;
-
-void benchmarkVectorMath(int size) {
-  VectorXd v = VectorXd::LinSpaced(size, 0.1, 10.0);
-  VectorXd result(size);
-  double elapsed_ms = 0;
-
-  cout << "\n--- Vector Math Benchmark (size = " << size << ") ---" << endl;
-
-  auto start = high_resolution_clock::now();
-  result = v.array().exp();
-  auto end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "exp() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().sin();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "sin() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().cos();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "cos() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().sqrt();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "sqrt() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().cbrt();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "cbrt() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().abs();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "abs() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().log();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "log() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().log10();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "log10() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().exp2();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "exp2() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().asin();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "asin() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().sinh();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "sinh() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().acos();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "acos() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().cosh();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "cosh() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().tan();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "tan() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().atan();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "atan() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().tanh();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "tanh() time: " << elapsed_ms << " ms" << endl;
-
-  VectorXd v2 = VectorXd::Random(size);
-  start = high_resolution_clock::now();
-  result = v.array() + v2.array();
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "add() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().pow(2.0);
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "pow() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().max(v2.array());
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "max() time: " << elapsed_ms << " ms" << endl;
-
-  start = high_resolution_clock::now();
-  result = v.array().min(v2.array());
-  end = high_resolution_clock::now();
-  elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "min() time: " << elapsed_ms << " ms" << endl;
-}
-
-// Function to benchmark BLAS operation: Matrix multiplication.
-void benchmarkMatrixMultiplication(int matSize) {
-  cout << "\n--- BLIS-st DGEMM Benchmark (" << matSize << " x " << matSize
-       << ") ---" << endl;
-
-  MatrixXd A = MatrixXd::Random(matSize, matSize);
-  MatrixXd B = MatrixXd::Random(matSize, matSize);
-  MatrixXd C(matSize, matSize);
-
-  auto start = high_resolution_clock::now();
-  C = A * B;
-  auto end = high_resolution_clock::now();
-  double elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "Matrix multiplication time: " << elapsed_ms << " ms" << endl;
-}
-
-// Benchmark BLIS directly using its CBLAS interface if available.
-void benchmarkBlisMultithreaded(int matSize, int numThreads) {
-#if defined(EIGEN_AOCL_USE_BLIS_MT)
-  cout << "\n--- BLIS-mt DGEMM Benchmark (" << matSize << " x " << matSize
-       << ", threads=" << numThreads << ") ---" << endl;
-  vector<double> A(matSize * matSize);
-  vector<double> B(matSize * matSize);
-  vector<double> C(matSize * matSize);
-  for (auto &v : A)
-    v = static_cast<double>(rand()) / RAND_MAX;
-  for (auto &v : B)
-    v = static_cast<double>(rand()) / RAND_MAX;
-  double alpha = 1.0, beta = 0.0;
-  string th = to_string(numThreads);
-  setenv("BLIS_NUM_THREADS", th.c_str(), 1);
-  auto start = high_resolution_clock::now();
-  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, matSize, matSize,
-              matSize, alpha, A.data(), matSize, B.data(), matSize, beta,
-              C.data(), matSize);
-  auto end = high_resolution_clock::now();
-  double elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  cout << "BLIS dgemm time: " << elapsed_ms << " ms" << endl;
-#else
-  (void)matSize;
-  (void)numThreads;
-  cout << "\nBLIS multithreaded support not enabled." << endl;
-#endif
-}
-
-// Function to benchmark LAPACK operation: Eigenvalue decomposition.
-void benchmarkEigenDecomposition(int matSize) {
-  cout << "\n--- Eigenvalue Decomposition Benchmark (Matrix Size: " << matSize
-       << " x " << matSize << ") ---" << endl;
-  MatrixXd M = MatrixXd::Random(matSize, matSize);
-  // Make matrix symmetric (necessary for eigenvalue decomposition of
-  // self-adjoint matrices)
-  M = (M + M.transpose()) * 0.5;
-
-  SelfAdjointEigenSolver<MatrixXd> eigensolver;
-  auto start = high_resolution_clock::now();
-  eigensolver.compute(M);
-  auto end = high_resolution_clock::now();
-  double elapsed_ms = duration_cast<milliseconds>(end - start).count();
-  if (eigensolver.info() == Success) {
-    cout << "Eigenvalue decomposition time: " << elapsed_ms << " ms" << endl;
-  } else {
-    cout << "Eigenvalue decomposition failed." << endl;
-  }
-}
-
-// Function simulating a real-world FSI risk computation scenario.
-// Example: Compute covariance matrix from simulated asset returns, then perform
-// eigenvalue decomposition.
-void benchmarkFSIRiskComputation(int numPeriods, int numAssets) {
-  cout << "\n--- FSI Risk Computation Benchmark ---" << endl;
-  cout << "Simulating " << numPeriods << " periods for " << numAssets
-       << " assets." << endl;
-
-  // Simulate asset returns: each column represents an asset's returns.
-  MatrixXd returns = MatrixXd::Random(numPeriods, numAssets);
-
-  // Compute covariance matrix: cov = (returns^T * returns) / (numPeriods - 1)
-  auto start = high_resolution_clock::now();
-  MatrixXd cov = (returns.transpose() * returns) / (numPeriods - 1);
-  auto end = high_resolution_clock::now();
-  double cov_time = duration_cast<milliseconds>(end - start).count();
-  cout << "Covariance matrix computation time: " << cov_time << " ms" << endl;
-
-  // Eigenvalue decomposition on covariance matrix.
-  SelfAdjointEigenSolver<MatrixXd> eigensolver;
-  start = high_resolution_clock::now();
-  eigensolver.compute(cov);
-  end = high_resolution_clock::now();
-  double eig_time = duration_cast<milliseconds>(end - start).count();
-  if (eigensolver.info() == Success) {
-    cout << "Eigenvalue decomposition (covariance) time: " << eig_time << " ms"
-         << endl;
-    cout << "Top 3 Eigenvalues: "
-         << eigensolver.eigenvalues().tail(3).transpose() << endl;
-  } else {
-    cout << "Eigenvalue decomposition failed." << endl;
-  }
-}
-
-int main() {
-  cout << "=== AOCL Benchmark for Eigen on AMD Platforms ===" << endl;
-  cout << "Developer: Sharad Saurabh Bhaskar (shbhaska@amd.com)" << endl;
-  cout << "Organization: Advanced Micro Devices, Inc." << endl;
-  cout << "License: Mozilla Public License 2.0" << endl << endl;
-
-  // Print AOCL configuration
-#ifdef EIGEN_USE_AOCL_MT
-  cout << "AOCL Mode: MULTITHREADED (MT)" << endl;
-  cout << "Features: Multithreaded BLIS, AOCL VML, LAPACK" << endl;
-#elif defined(EIGEN_USE_AOCL_ALL)
-  cout << "AOCL Mode: SINGLE-THREADED (ALL)" << endl;
-  cout << "Features: Single-threaded BLIS, AOCL VML, LAPACK" << endl;
-#else
-  cout << "AOCL Mode: DISABLED" << endl;
-  cout << "Using standard Eigen implementation" << endl;
-#endif
-  cout << "Hardware threads available: " << thread::hardware_concurrency() << endl << endl;
-
-  // Benchmark vector math functions with varying vector sizes.
-  vector<int> vectorSizes = {5000000, 10000000, 50000000};
-  for (int size : vectorSizes) {
-    benchmarkVectorMath(size);
-  }
-
-  // Benchmark matrix multiplication for varying sizes.
-  vector<int> matrixSizes = {1024};
-  for (int msize : matrixSizes) {
-    benchmarkMatrixMultiplication(msize);
-#if defined(EIGEN_AOCL_USE_BLIS_MT)
-    benchmarkBlisMultithreaded(msize, thread::hardware_concurrency());
-#endif
-  }
-
-  // Benchmark LAPACK: Eigenvalue Decomposition.
-  for (int msize : matrixSizes) {
-    benchmarkEigenDecomposition(msize);
-  }
-
-  // Benchmark a complex FSI risk computation scenario.
-  // For example, simulate 10,000 time periods (days) for 500 assets.
-  benchmarkFSIRiskComputation(10000, 500);
-
-  cout << "\n=== Benchmark Complete ===" << endl;
-  return 0;
-}
--- a/bench/benchmark_suite
+++ b/bench/benchmark_suite
@@ -1,18 +0,0 @@
-#!/bin/bash
-CXX=${CXX-g++} # default value unless caller has defined CXX
-echo "Fixed size 3x3, column-major, -DNDEBUG"
-$CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
-echo "Fixed size 3x3, column-major, with asserts"
-$CXX -O3 -I .. benchmark.cpp -o benchmark && time ./benchmark >/dev/null
-echo "Fixed size 3x3, row-major, -DNDEBUG"
-$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
-echo "Fixed size 3x3, row-major, with asserts"
-$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmark.cpp -o benchmark && time ./benchmark >/dev/null
-echo "Dynamic size 20x20, column-major, -DNDEBUG"
-$CXX -O3 -I .. -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
-echo "Dynamic size 20x20, column-major, with asserts"
-$CXX -O3 -I .. benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
-echo "Dynamic size 20x20, row-major, -DNDEBUG"
-$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
-echo "Dynamic size 20x20, row-major, with asserts"
-$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
--- a/bench/btl/CMakeLists.txt
+++ b/bench/btl/CMakeLists.txt
@@ -1,107 +0,0 @@
-project(BTL)
-
-cmake_minimum_required(VERSION 2.6.2)
-
-set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${Eigen_SOURCE_DIR}/cmake)
-include(MacroOptionalAddSubdirectory)
-
-option(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF)
-
-set(CMAKE_INCLUDE_CURRENT_DIR ON)
-
-string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER})
-if(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
-  set(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}")
-  if(BTL_NOVEC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
-  endif(BTL_NOVEC)
-endif(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
-
-if(MSVC)
-  set(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG")
-#   set(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG")
-  if(BTL_NOVEC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
-  endif(BTL_NOVEC)
-endif(MSVC)
-
-if(IS_ICPC)
-  set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}")
-endif()
-
-include_directories(
-  ${PROJECT_SOURCE_DIR}/actions
-  ${PROJECT_SOURCE_DIR}/generic_bench
-  ${PROJECT_SOURCE_DIR}/generic_bench/utils
-  ${PROJECT_SOURCE_DIR}/libs/STL)
-
-# find_package(MKL)
-# if (MKL_FOUND)
-#   add_definitions(-DHAVE_MKL)
-#   set(DEFAULT_LIBRARIES ${MKL_LIBRARIES})
-# endif ()
-
-find_library(EIGEN_BTL_RT_LIBRARY rt)
-# if we cannot find it easily, then we don't need it!
-if(NOT EIGEN_BTL_RT_LIBRARY)
-  set(EIGEN_BTL_RT_LIBRARY "")
-endif()
-
-macro(BTL_ADD_BENCH targetname)
-
-  foreach(_current_var ${ARGN})
-    set(_last_var ${_current_var})
-  endforeach()
-
-  set(_sources ${ARGN})
-  list(LENGTH _sources _argn_length)
-
-  list(REMOVE_ITEM _sources ON OFF TRUE FALSE)
-
-  list(LENGTH _sources _src_length)
-
-  if (${_argn_length} EQUAL ${_src_length})
-    set(_last_var ON)
-  endif ()
-
-  option(BUILD_${targetname} "Build benchmark ${targetname}" ${_last_var})
-
-  if(BUILD_${targetname})
-    add_executable(${targetname} ${_sources})
-    add_test(${targetname} "${targetname}")
-    target_link_libraries(${targetname} ${DEFAULT_LIBRARIES} ${EIGEN_BTL_RT_LIBRARY})
-  endif(BUILD_${targetname})
-
-endmacro(BTL_ADD_BENCH)
-
-macro(btl_add_target_property target prop value)
-
-  if(BUILD_${target})
-    get_target_property(previous ${target} ${prop})
-    if(NOT previous)
-      set(previous "")
-    endif()
-    set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}")
-  endif()
-
-endmacro()
-
-enable_testing()
-
-add_subdirectory(libs/eigen3)
-add_subdirectory(libs/eigen2)
-add_subdirectory(libs/tensors)
-add_subdirectory(libs/BLAS)
-add_subdirectory(libs/ublas)
-add_subdirectory(libs/gmm)
-add_subdirectory(libs/mtl4)
-add_subdirectory(libs/blitz)
-add_subdirectory(libs/tvmet)
-add_subdirectory(libs/STL)
-add_subdirectory(libs/blaze)
-
-add_subdirectory(data)
-
-
--- a/bench/btl/COPYING
+++ b/bench/btl/COPYING
@@ -1,340 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users.  This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it.  (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.)  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
-  To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have.  You must make sure that they, too, receive or can get the
-source code.  And you must show them these terms so they know their
-rights.
-
-  We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
-  Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software.  If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
-  Finally, any free program is threatened constantly by software
-patents.  We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary.  To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                    GNU GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License.  The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language.  (Hereinafter, translation is included without limitation in
-the term "modification".)  Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
-  1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
-  2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) You must cause the modified files to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    b) You must cause any work that you distribute or publish, that in
-    whole or in part contains or is derived from the Program or any
-    part thereof, to be licensed as a whole at no charge to all third
-    parties under the terms of this License.
-
-    c) If the modified program normally reads commands interactively
-    when run, you must cause it, when started running for such
-    interactive use in the most ordinary way, to print or display an
-    announcement including an appropriate copyright notice and a
-    notice that there is no warranty (or else, saying that you provide
-    a warranty) and that users may redistribute the program under
-    these conditions, and telling the user how to view a copy of this
-    License.  (Exception: if the Program itself is interactive but
-    does not normally print such an announcement, your work based on
-    the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
-    a) Accompany it with the complete corresponding machine-readable
-    source code, which must be distributed under the terms of Sections
-    1 and 2 above on a medium customarily used for software interchange; or,
-
-    b) Accompany it with a written offer, valid for at least three
-    years, to give any third party, for a charge no more than your
-    cost of physically performing source distribution, a complete
-    machine-readable copy of the corresponding source code, to be
-    distributed under the terms of Sections 1 and 2 above on a medium
-    customarily used for software interchange; or,
-
-    c) Accompany it with the information you received as to the offer
-    to distribute corresponding source code.  (This alternative is
-    allowed only for noncommercial distribution and only if you
-    received the program in object code or executable form with such
-    an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it.  For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable.  However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License.  Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-  5. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Program or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-  6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
-  7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded.  In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-  9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation.  If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
-  10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission.  For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this.  Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-                            NO WARRANTY
-
-  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-    Gnomovision version 69, Copyright (C) year name of author
-    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary.  Here is a sample; alter the names:
-
-  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-  `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-  <signature of Ty Coon>, 1 April 1989
-  Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs.  If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library.  If this is what you want to do, use the GNU Library General
-Public License instead of this License.
--- a/bench/btl/README
+++ b/bench/btl/README
@@ -1,154 +0,0 @@
-Bench Template Library
-
-****************************************
-Introduction :
-
-The aim of this project is to compare the performance
-of available numerical libraries. The code is designed
-as generic and modular as possible. Thus, adding new
-numerical libraries or new numerical tests should
-require minimal effort.
-
-
-*****************************************
-
-Installation :
-
-BTL uses cmake / ctest:
-
-1 - create a build directory:
-
-  $ mkdir build
-  $ cd build
-
-2 - configure:
-
-  $ ccmake ..
-
-3 - run the bench using ctest:
-
-  $ ctest -V
-
-You can run the benchmarks only on libraries matching a given regular expression:
-  ctest -V -R <regexp>
-For instance:
-  ctest -V -R eigen2
-
-You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
-  BTL_CONFIG="-a action1{:action2}*" ctest -V
-An example:
-  BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
-
-Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
-  BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
-
-4 : Analyze the result. different data files (.dat) are produced in each libs directories.
- If gnuplot is available, choose a directory name in the data directory to store the results and type:
-        $ cd data
-        $ mkdir my_directory
-        $ cp ../libs/*/*.dat my_directory
- Build the data utilities in this (data) directory
-        make
- Then you can look the raw data,
-        go_mean my_directory
- or smooth the data first :
-	smooth_all.sh my_directory
-	go_mean my_directory_smooth
-
-
-*************************************************
-
-Files and directories :
-
- generic_bench : all the bench sources common to all libraries
-
- actions : sources for different action wrappers (axpy, matrix-matrix product) to be tested.
-
- libs/* : bench sources specific to each tested libraries.
-
- machine_dep : directory used to store machine specific Makefile.in
-
- data : directory used to store gnuplot scripts and data analysis utilities
-
-**************************************************
-
-Principles : the code modularity is achieved by defining two concepts :
-
- ****** Action concept : This is a class defining which kind
-  of test must be performed (e.g. a matrix_vector_product).
-	An Action should define the following methods :
-
-        *** Ctor using the size of the problem (matrix or vector size) as an argument
-	    Action action(size);
-        *** initialize : this method initialize the calculation (e.g. initialize the matrices and vectors arguments)
-	    action.initialize();
-	*** calculate : this method actually launch the calculation to be benchmarked
-	    action.calculate;
-	*** nb_op_base() : this method returns the complexity of the calculate method (allowing the mflops evaluation)
-        *** name() : this method returns the name of the action (std::string)
-
- ****** Interface concept : This is a class or namespace defining how to use a given library and
-  its specific containers (matrix and vector). Up to now an interface should following types
-
-	*** real_type : kind of float to be used (float or double)
-	*** stl_vector : must correspond to std::vector<real_type>
-	*** stl_matrix : must correspond to std::vector<stl_vector>
-	*** gene_vector : the vector type for this interface        --> e.g. (real_type *) for the C_interface
-	*** gene_matrix : the matrix type for this interface        --> e.g. (gene_vector *) for the C_interface
-
-	+ the following common methods
-
-        *** free_matrix(gene_matrix & A, int N)  dealocation of a N sized gene_matrix A
-        *** free_vector(gene_vector & B)  dealocation of a N sized gene_vector B
-        *** matrix_from_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an stl_matrix A_stl into a gene_matrix A.
-	     The allocation of A is done in this function.
-	*** vector_to_stl(gene_vector & B, stl_vector & B_stl)  copy the content of an stl_vector B_stl into a gene_vector B.
-	     The allocation of B is done in this function.
-        *** matrix_to_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an gene_matrix A into an stl_matrix A_stl.
-             The size of A_STL must corresponds to the size of A.
-        *** vector_to_stl(gene_vector & A, stl_vector & A_stl) copy the content of an gene_vector A into an stl_vector A_stl.
-             The size of B_STL must corresponds to the size of B.
-	*** copy_matrix(gene_matrix & source, gene_matrix & cible, int N) : copy the content of source in cible. Both source
-		and cible must be sized NxN.
-	*** copy_vector(gene_vector & source, gene_vector & cible, int N) : copy the content of source in cible. Both source
- 		and cible must be sized N.
-
-	and the following method corresponding to the action one wants to be benchmarked :
-
-	***  matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
-	***  matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
-        ***  ata_product(const gene_matrix & A, gene_matrix & X, int N)
-	***  aat_product(const gene_matrix & A, gene_matrix & X, int N)
-        ***  axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
-
- The bench algorithm (generic_bench/bench.hh) is templated with an action itself templated with
- an interface. A typical main.cpp source stored in a given library directory libs/A_LIB
- looks like :
-
- bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
-
- this function will produce XY data file containing measured  mflops as a function of the size for 50
- sizes between 10 and 10000.
-
- This algorithm can be adapted by providing a given Perf_Analyzer object which determines how the time
- measurements must be done. For example, the X86_Perf_Analyzer use the asm rdtsc function and provides
- a very fast and accurate (but less portable) timing method. The default is the Portable_Perf_Analyzer
- so
-
- bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
-
- is equivalent to
-
- bench< Portable_Perf_Analyzer,AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
-
- If your system supports it we suggest to use a mixed implementation (X86_perf_Analyzer+Portable_Perf_Analyzer).
- replace
-     bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
- with
-     bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
- in generic/bench.hh
-
-.
-
-
-
--- a/bench/btl/actions/action_aat_product.hh
+++ b/bench/btl/actions/action_aat_product.hh
@@ -1,118 +0,0 @@
-//=====================================================
-// File   :  action_aat_product.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_AAT_PRODUCT
-#define ACTION_AAT_PRODUCT
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_aat_product {
- public:
-  // Ctor
-
-  Action_aat_product(int size) : _size(size) {
-    MESSAGE("Action_aat_product Ctor");
-
-    // STL matrix and vector initialization
-
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_matrix<null_function>(X_stl, _size);
-    init_matrix<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(X_ref, X_stl);
-
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::matrix_from_stl(X, X_stl);
-  }
-
-  // invalidate copy ctor
-
-  Action_aat_product(const Action_aat_product&) {
-    INFOS("illegal call to Action_aat_product Copy Ctor");
-    exit(0);
-  }
-
-  // Dtor
-
-  ~Action_aat_product(void) {
-    MESSAGE("Action_aat_product Dtor");
-
-    // deallocation
-
-    Interface::free_matrix(A, _size);
-    Interface::free_matrix(X, _size);
-
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_matrix(X_ref, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "aat_" + Interface::name(); }
-
-  double nb_op_base(void) { return double(_size) * double(_size) * double(_size); }
-
-  inline void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_matrix(X_ref, X, _size);
-  }
-
-  inline void calculate(void) { Interface::aat_product(A, X, _size); }
-
-  void check_result(void) {
-    if (_size > 128) return;
-    // calculation check
-
-    Interface::matrix_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::aat_product(A_stl, X_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-6) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(1);
-    }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_matrix X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_matrix X;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_ata_product.hh
+++ b/bench/btl/actions/action_ata_product.hh
@@ -1,118 +0,0 @@
-//=====================================================
-// File   :  action_ata_product.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_ATA_PRODUCT
-#define ACTION_ATA_PRODUCT
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_ata_product {
- public:
-  // Ctor
-
-  Action_ata_product(int size) : _size(size) {
-    MESSAGE("Action_ata_product Ctor");
-
-    // STL matrix and vector initialization
-
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_matrix<null_function>(X_stl, _size);
-    init_matrix<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(X_ref, X_stl);
-
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::matrix_from_stl(X, X_stl);
-  }
-
-  // invalidate copy ctor
-
-  Action_ata_product(const Action_ata_product&) {
-    INFOS("illegal call to Action_ata_product Copy Ctor");
-    exit(0);
-  }
-
-  // Dtor
-
-  ~Action_ata_product(void) {
-    MESSAGE("Action_ata_product Dtor");
-
-    // deallocation
-
-    Interface::free_matrix(A, _size);
-    Interface::free_matrix(X, _size);
-
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_matrix(X_ref, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "ata_" + Interface::name(); }
-
-  double nb_op_base(void) { return 2.0 * _size * _size * _size; }
-
-  inline void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_matrix(X_ref, X, _size);
-  }
-
-  inline void calculate(void) { Interface::ata_product(A, X, _size); }
-
-  void check_result(void) {
-    if (_size > 128) return;
-    // calculation check
-
-    Interface::matrix_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::ata_product(A_stl, X_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-6) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(1);
-    }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_matrix X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_matrix X;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_atv_product.hh
+++ b/bench/btl/actions/action_atv_product.hh
@@ -1,120 +0,0 @@
-//=====================================================
-// File   :  action_atv_product.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_ATV_PRODUCT
-#define ACTION_ATV_PRODUCT
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_atv_product {
- public:
-  Action_atv_product(int size) : _size(size) {
-    MESSAGE("Action_atv_product Ctor");
-
-    // STL matrix and vector initialization
-
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_vector<pseudo_random>(B_stl, _size);
-    init_vector<null_function>(X_stl, _size);
-    init_vector<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::vector_from_stl(B_ref, B_stl);
-    Interface::vector_from_stl(X_ref, X_stl);
-
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::vector_from_stl(B, B_stl);
-    Interface::vector_from_stl(X, X_stl);
-  }
-
-  // invalidate copy ctor
-  Action_atv_product(const Action_atv_product&) {
-    INFOS("illegal call to Action_atv_product Copy Ctor");
-    exit(1);
-  }
-
-  ~Action_atv_product(void) {
-    MESSAGE("Action_atv_product Dtor");
-
-    Interface::free_matrix(A, _size);
-    Interface::free_vector(B);
-    Interface::free_vector(X);
-
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_vector(B_ref);
-    Interface::free_vector(X_ref);
-  }
-
-  static inline std::string name() { return "atv_" + Interface::name(); }
-
-  double nb_op_base(void) { return 2.0 * _size * _size; }
-
-  inline void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_vector(B_ref, B, _size);
-    Interface::copy_vector(X_ref, X, _size);
-  }
-
-  BTL_DONT_INLINE void calculate(void) {
-    BTL_ASM_COMMENT("begin atv");
-    Interface::atv_product(A, B, X, _size);
-    BTL_ASM_COMMENT("end atv");
-  }
-
-  void check_result(void) {
-    if (_size > 128) return;
-    Interface::vector_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::atv_product(A_stl, B_stl, X_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-6) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(1);
-    }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_vector B_stl;
-  typename Interface::stl_vector X_stl;
-  typename Interface::stl_vector resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_vector B_ref;
-  typename Interface::gene_vector X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_vector B;
-  typename Interface::gene_vector X;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_axpby.hh
+++ b/bench/btl/actions/action_axpby.hh
@@ -1,116 +0,0 @@
-//=====================================================
-// File   :  action_axpby.hh
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_AXPBY
-#define ACTION_AXPBY
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_axpby {
- public:
-  // Ctor
-  Action_axpby(int size) : _alpha(0.5), _beta(0.95), _size(size) {
-    MESSAGE("Action_axpby Ctor");
-
-    // STL vector initialization
-    init_vector<pseudo_random>(X_stl, _size);
-    init_vector<pseudo_random>(Y_stl, _size);
-    init_vector<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::vector_from_stl(X_ref, X_stl);
-    Interface::vector_from_stl(Y_ref, Y_stl);
-
-    Interface::vector_from_stl(X, X_stl);
-    Interface::vector_from_stl(Y, Y_stl);
-  }
-
-  // invalidate copy ctor
-  Action_axpby(const Action_axpby&) {
-    INFOS("illegal call to Action_axpby Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-  ~Action_axpby(void) {
-    MESSAGE("Action_axpby Dtor");
-
-    // deallocation
-    Interface::free_vector(X_ref);
-    Interface::free_vector(Y_ref);
-
-    Interface::free_vector(X);
-    Interface::free_vector(Y);
-  }
-
-  // action name
-  static inline std::string name(void) { return "axpby_" + Interface::name(); }
-
-  double nb_op_base(void) { return 3.0 * _size; }
-
-  inline void initialize(void) {
-    Interface::copy_vector(X_ref, X, _size);
-    Interface::copy_vector(Y_ref, Y, _size);
-  }
-
-  inline void calculate(void) {
-    BTL_ASM_COMMENT("mybegin axpby");
-    Interface::axpby(_alpha, X, _beta, Y, _size);
-    BTL_ASM_COMMENT("myend axpby");
-  }
-
-  void check_result(void) {
-    if (_size > 128) return;
-    // calculation check
-    Interface::vector_to_stl(Y, resu_stl);
-
-    STL_interface<typename Interface::real_type>::axpby(_alpha, X_stl, _beta, Y_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);
-
-    if (error > 1.e-6) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(2);
-    }
-  }
-
- private:
-  typename Interface::stl_vector X_stl;
-  typename Interface::stl_vector Y_stl;
-  typename Interface::stl_vector resu_stl;
-
-  typename Interface::gene_vector X_ref;
-  typename Interface::gene_vector Y_ref;
-
-  typename Interface::gene_vector X;
-  typename Interface::gene_vector Y;
-
-  typename Interface::real_type _alpha;
-  typename Interface::real_type _beta;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_axpy.hh
+++ b/bench/btl/actions/action_axpy.hh
@@ -1,124 +0,0 @@
-//=====================================================
-// File   :  action_axpy.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_AXPY
-#define ACTION_AXPY
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_axpy {
- public:
-  // Ctor
-
-  Action_axpy(int size) : _coef(1.0), _size(size) {
-    MESSAGE("Action_axpy Ctor");
-
-    // STL vector initialization
-
-    init_vector<pseudo_random>(X_stl, _size);
-    init_vector<pseudo_random>(Y_stl, _size);
-    init_vector<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-
-    Interface::vector_from_stl(X_ref, X_stl);
-    Interface::vector_from_stl(Y_ref, Y_stl);
-
-    Interface::vector_from_stl(X, X_stl);
-    Interface::vector_from_stl(Y, Y_stl);
-  }
-
-  // invalidate copy ctor
-
-  Action_axpy(const Action_axpy&) {
-    INFOS("illegal call to Action_axpy Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-
-  ~Action_axpy(void) {
-    MESSAGE("Action_axpy Dtor");
-
-    // deallocation
-
-    Interface::free_vector(X_ref);
-    Interface::free_vector(Y_ref);
-
-    Interface::free_vector(X);
-    Interface::free_vector(Y);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "axpy_" + Interface::name(); }
-
-  double nb_op_base(void) { return 2.0 * _size; }
-
-  inline void initialize(void) {
-    Interface::copy_vector(X_ref, X, _size);
-    Interface::copy_vector(Y_ref, Y, _size);
-  }
-
-  inline void calculate(void) {
-    BTL_ASM_COMMENT("mybegin axpy");
-    Interface::axpy(_coef, X, Y, _size);
-    BTL_ASM_COMMENT("myend axpy");
-  }
-
-  void check_result(void) {
-    if (_size > 128) return;
-    // calculation check
-
-    Interface::vector_to_stl(Y, resu_stl);
-
-    STL_interface<typename Interface::real_type>::axpy(_coef, X_stl, Y_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);
-
-    if (error > 1.e-6) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(0);
-    }
-  }
-
- private:
-  typename Interface::stl_vector X_stl;
-  typename Interface::stl_vector Y_stl;
-  typename Interface::stl_vector resu_stl;
-
-  typename Interface::gene_vector X_ref;
-  typename Interface::gene_vector Y_ref;
-
-  typename Interface::gene_vector X;
-  typename Interface::gene_vector Y;
-
-  typename Interface::real_type _coef;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_cholesky.hh
+++ b/bench/btl/actions/action_cholesky.hh
@@ -1,110 +0,0 @@
-//=====================================================
-// File   :  action_cholesky.hh
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_CHOLESKY
-#define ACTION_CHOLESKY
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_cholesky {
- public:
-  // Ctor
-
-  Action_cholesky(int size) : _size(size) {
-    MESSAGE("Action_cholesky Ctor");
-
-    // STL mat/vec initialization
-    init_matrix_symm<pseudo_random>(X_stl, _size);
-    init_matrix<null_function>(C_stl, _size);
-
-    // make sure X is invertible
-    for (int i = 0; i < _size; ++i) X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref, X_stl);
-    Interface::matrix_from_stl(X, X_stl);
-    Interface::matrix_from_stl(C, C_stl);
-
-    _cost = 0;
-    for (int j = 0; j < _size; ++j) {
-      double r = std::max(_size - j - 1, 0);
-      _cost += 2 * (r * j + r + j);
-    }
-  }
-
-  // invalidate copy ctor
-
-  Action_cholesky(const Action_cholesky&) {
-    INFOS("illegal call to Action_cholesky Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-
-  ~Action_cholesky(void) {
-    MESSAGE("Action_cholesky Dtor");
-
-    // deallocation
-    Interface::free_matrix(X_ref, _size);
-    Interface::free_matrix(X, _size);
-    Interface::free_matrix(C, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "cholesky_" + Interface::name(); }
-
-  double nb_op_base(void) { return _cost; }
-
-  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
-
-  inline void calculate(void) { Interface::cholesky(X, C, _size); }
-
-  void check_result(void) {
-    // calculation check
-    //     STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
-    //
-    //     typename Interface::real_type error=
-    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-    //
-    //     if (error>1.e-6){
-    //       INFOS("WRONG CALCULATION...residual=" << error);
-    //       exit(0);
-    //     }
-  }
-
- private:
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix C_stl;
-
-  typename Interface::gene_matrix X_ref;
-  typename Interface::gene_matrix X;
-  typename Interface::gene_matrix C;
-
-  int _size;
-  double _cost;
-};
-
-#endif
--- a/bench/btl/actions/action_ger.hh
+++ b/bench/btl/actions/action_ger.hh
@@ -1,114 +0,0 @@
-
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_GER
-#define ACTION_GER
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_ger {
- public:
-  // Ctor
-  BTL_DONT_INLINE Action_ger(int size) : _size(size) {
-    MESSAGE("Action_ger Ctor");
-
-    // STL matrix and vector initialization
-    typename Interface::stl_matrix tmp;
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_vector<pseudo_random>(B_stl, _size);
-    init_vector<pseudo_random>(X_stl, _size);
-    init_vector<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::vector_from_stl(B_ref, B_stl);
-    Interface::vector_from_stl(B, B_stl);
-    Interface::vector_from_stl(X_ref, X_stl);
-    Interface::vector_from_stl(X, X_stl);
-  }
-
-  // invalidate copy ctor
-  Action_ger(const Action_ger&) {
-    INFOS("illegal call to Action_ger Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-  BTL_DONT_INLINE ~Action_ger(void) {
-    MESSAGE("Action_ger Dtor");
-    Interface::free_matrix(A, _size);
-    Interface::free_vector(B);
-    Interface::free_vector(X);
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_vector(B_ref);
-    Interface::free_vector(X_ref);
-  }
-
-  // action name
-  static inline std::string name(void) { return "ger_" + Interface::name(); }
-
-  double nb_op_base(void) { return 2.0 * _size * _size; }
-
-  BTL_DONT_INLINE void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_vector(B_ref, B, _size);
-    Interface::copy_vector(X_ref, X, _size);
-  }
-
-  BTL_DONT_INLINE void calculate(void) {
-    BTL_ASM_COMMENT("#begin ger");
-    Interface::ger(A, B, X, _size);
-    BTL_ASM_COMMENT("end ger");
-  }
-
-  BTL_DONT_INLINE void check_result(void) {
-    // calculation check
-    Interface::vector_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::ger(A_stl, B_stl, X_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-3) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      //       exit(0);
-    }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_vector B_stl;
-  typename Interface::stl_vector X_stl;
-  typename Interface::stl_vector resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_vector B_ref;
-  typename Interface::gene_vector X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_vector B;
-  typename Interface::gene_vector X;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_hessenberg.hh
+++ b/bench/btl/actions/action_hessenberg.hh
@@ -1,200 +0,0 @@
-//=====================================================
-// File   :  action_hessenberg.hh
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_HESSENBERG
-#define ACTION_HESSENBERG
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_hessenberg {
- public:
-  // Ctor
-
-  Action_hessenberg(int size) : _size(size) {
-    MESSAGE("Action_hessenberg Ctor");
-
-    // STL vector initialization
-    init_matrix<pseudo_random>(X_stl, _size);
-
-    init_matrix<null_function>(C_stl, _size);
-    init_matrix<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref, X_stl);
-    Interface::matrix_from_stl(X, X_stl);
-    Interface::matrix_from_stl(C, C_stl);
-
-    _cost = 0;
-    for (int j = 0; j < _size - 2; ++j) {
-      double r = std::max(0, _size - j - 1);
-      double b = std::max(0, _size - j - 2);
-      _cost += 6 + 3 * b + r * r * 4 + r * _size * 4;
-    }
-  }
-
-  // invalidate copy ctor
-
-  Action_hessenberg(const Action_hessenberg&) {
-    INFOS("illegal call to Action_hessenberg Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-
-  ~Action_hessenberg(void) {
-    MESSAGE("Action_hessenberg Dtor");
-
-    // deallocation
-    Interface::free_matrix(X_ref, _size);
-    Interface::free_matrix(X, _size);
-    Interface::free_matrix(C, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "hessenberg_" + Interface::name(); }
-
-  double nb_op_base(void) { return _cost; }
-
-  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
-
-  inline void calculate(void) { Interface::hessenberg(X, C, _size); }
-
-  void check_result(void) {
-    // calculation check
-    Interface::matrix_to_stl(C, resu_stl);
-
-    //     STL_interface<typename Interface::real_type>::hessenberg(X_stl,C_stl,_size);
-    //
-    //     typename Interface::real_type error=
-    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-    //
-    //     if (error>1.e-6){
-    //       INFOS("WRONG CALCULATION...residual=" << error);
-    //       exit(0);
-    //     }
-  }
-
- private:
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix C_stl;
-  typename Interface::stl_matrix resu_stl;
-
-  typename Interface::gene_matrix X_ref;
-  typename Interface::gene_matrix X;
-  typename Interface::gene_matrix C;
-
-  int _size;
-  double _cost;
-};
-
-template <class Interface>
-class Action_tridiagonalization {
- public:
-  // Ctor
-
-  Action_tridiagonalization(int size) : _size(size) {
-    MESSAGE("Action_tridiagonalization Ctor");
-
-    // STL vector initialization
-    init_matrix<pseudo_random>(X_stl, _size);
-
-    for (int i = 0; i < _size; ++i) {
-      for (int j = 0; j < i; ++j) X_stl[i][j] = X_stl[j][i];
-    }
-
-    init_matrix<null_function>(C_stl, _size);
-    init_matrix<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref, X_stl);
-    Interface::matrix_from_stl(X, X_stl);
-    Interface::matrix_from_stl(C, C_stl);
-
-    _cost = 0;
-    for (int j = 0; j < _size - 2; ++j) {
-      double r = std::max(0, _size - j - 1);
-      double b = std::max(0, _size - j - 2);
-      _cost += 6. + 3. * b + r * r * 8.;
-    }
-  }
-
-  // invalidate copy ctor
-
-  Action_tridiagonalization(const Action_tridiagonalization&) {
-    INFOS("illegal call to Action_tridiagonalization Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-
-  ~Action_tridiagonalization(void) {
-    MESSAGE("Action_tridiagonalization Dtor");
-
-    // deallocation
-    Interface::free_matrix(X_ref, _size);
-    Interface::free_matrix(X, _size);
-    Interface::free_matrix(C, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "tridiagonalization_" + Interface::name(); }
-
-  double nb_op_base(void) { return _cost; }
-
-  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
-
-  inline void calculate(void) { Interface::tridiagonalization(X, C, _size); }
-
-  void check_result(void) {
-    // calculation check
-    Interface::matrix_to_stl(C, resu_stl);
-
-    //     STL_interface<typename Interface::real_type>::tridiagonalization(X_stl,C_stl,_size);
-    //
-    //     typename Interface::real_type error=
-    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-    //
-    //     if (error>1.e-6){
-    //       INFOS("WRONG CALCULATION...residual=" << error);
-    //       exit(0);
-    //     }
-  }
-
- private:
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix C_stl;
-  typename Interface::stl_matrix resu_stl;
-
-  typename Interface::gene_matrix X_ref;
-  typename Interface::gene_matrix X;
-  typename Interface::gene_matrix C;
-
-  int _size;
-  double _cost;
-};
-
-#endif
--- a/bench/btl/actions/action_lu_decomp.hh
+++ b/bench/btl/actions/action_lu_decomp.hh
@@ -1,108 +0,0 @@
-//=====================================================
-// File   :  action_lu_decomp.hh
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_LU_DECOMP
-#define ACTION_LU_DECOMP
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_lu_decomp {
- public:
-  // Ctor
-
-  Action_lu_decomp(int size) : _size(size) {
-    MESSAGE("Action_lu_decomp Ctor");
-
-    // STL vector initialization
-    init_matrix<pseudo_random>(X_stl, _size);
-
-    init_matrix<null_function>(C_stl, _size);
-    init_matrix<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref, X_stl);
-    Interface::matrix_from_stl(X, X_stl);
-    Interface::matrix_from_stl(C, C_stl);
-
-    _cost = 2.0 * size * size * size / 3.0 + size * size;
-  }
-
-  // invalidate copy ctor
-
-  Action_lu_decomp(const Action_lu_decomp&) {
-    INFOS("illegal call to Action_lu_decomp Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-
-  ~Action_lu_decomp(void) {
-    MESSAGE("Action_lu_decomp Dtor");
-
-    // deallocation
-    Interface::free_matrix(X_ref, _size);
-    Interface::free_matrix(X, _size);
-    Interface::free_matrix(C, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "complete_lu_decomp_" + Interface::name(); }
-
-  double nb_op_base(void) { return _cost; }
-
-  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
-
-  inline void calculate(void) { Interface::lu_decomp(X, C, _size); }
-
-  void check_result(void) {
-    // calculation check
-    Interface::matrix_to_stl(C, resu_stl);
-
-    //     STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
-    //
-    //     typename Interface::real_type error=
-    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-    //
-    //     if (error>1.e-6){
-    //       INFOS("WRONG CALCULATION...residual=" << error);
-    //       exit(0);
-    //     }
-  }
-
- private:
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix C_stl;
-  typename Interface::stl_matrix resu_stl;
-
-  typename Interface::gene_matrix X_ref;
-  typename Interface::gene_matrix X;
-  typename Interface::gene_matrix C;
-
-  int _size;
-  double _cost;
-};
-
-#endif
--- a/bench/btl/actions/action_lu_solve.hh
+++ b/bench/btl/actions/action_lu_solve.hh
@@ -1,120 +0,0 @@
-//=====================================================
-// File   :  action_lu_solve.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_LU_SOLVE
-#define ACTION_LU_SOLVE
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_lu_solve {
- public:
-  static inline std::string name(void) { return "lu_solve_" + Interface::name(); }
-
-  static double nb_op_base(int size) {
-    return 2.0 * size * size * size / 3.0;  // questionable but not really important
-  }
-
-  static double calculate(int nb_calc, int size) {
-    // STL matrix and vector initialization
-
-    typename Interface::stl_matrix A_stl;
-    typename Interface::stl_vector B_stl;
-    typename Interface::stl_vector X_stl;
-
-    init_matrix<pseudo_random>(A_stl, size);
-    init_vector<pseudo_random>(B_stl, size);
-    init_vector<null_function>(X_stl, size);
-
-    // generic matrix and vector initialization
-
-    typename Interface::gene_matrix A;
-    typename Interface::gene_vector B;
-    typename Interface::gene_vector X;
-
-    typename Interface::gene_matrix LU;
-
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::vector_from_stl(B, B_stl);
-    Interface::vector_from_stl(X, X_stl);
-    Interface::matrix_from_stl(LU, A_stl);
-
-    // local variable :
-
-    typename Interface::Pivot_Vector pivot;  // pivot vector
-    Interface::new_Pivot_Vector(pivot, size);
-
-    // timer utilities
-
-    Portable_Timer chronos;
-
-    // time measurement
-
-    chronos.start();
-
-    for (int ii = 0; ii < nb_calc; ii++) {
-      // LU factorization
-      Interface::copy_matrix(A, LU, size);
-      Interface::LU_factor(LU, pivot, size);
-
-      // LU solve
-
-      Interface::LU_solve(LU, pivot, B, X, size);
-    }
-
-    // Time stop
-
-    chronos.stop();
-
-    double time = chronos.user_time();
-
-    // check result :
-
-    typename Interface::stl_vector B_new_stl(size);
-    Interface::vector_to_stl(X, X_stl);
-
-    STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, X_stl, B_new_stl, size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(B_stl, B_new_stl);
-
-    if (error > 1.e-5) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      STL_interface<typename Interface::real_type>::display_vector(B_stl);
-      STL_interface<typename Interface::real_type>::display_vector(B_new_stl);
-      exit(0);
-    }
-
-    // deallocation and return time
-
-    Interface::free_matrix(A, size);
-    Interface::free_vector(B);
-    Interface::free_vector(X);
-    Interface::free_Pivot_Vector(pivot);
-
-    return time;
-  }
-};
-
-#endif
--- a/bench/btl/actions/action_matrix_matrix_product.hh
+++ b/bench/btl/actions/action_matrix_matrix_product.hh
@@ -1,124 +0,0 @@
-//=====================================================
-// File   :  action_matrix_matrix_product.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_MATRIX_MATRIX_PRODUCT
-#define ACTION_MATRIX_MATRIX_PRODUCT
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_matrix_matrix_product {
- public:
-  // Ctor
-
-  Action_matrix_matrix_product(int size) : _size(size) {
-    MESSAGE("Action_matrix_matrix_product Ctor");
-
-    // STL matrix and vector initialization
-
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_matrix<pseudo_random>(B_stl, _size);
-    init_matrix<null_function>(X_stl, _size);
-    init_matrix<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(B_ref, B_stl);
-    Interface::matrix_from_stl(X_ref, X_stl);
-
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::matrix_from_stl(B, B_stl);
-    Interface::matrix_from_stl(X, X_stl);
-  }
-
-  // invalidate copy ctor
-
-  Action_matrix_matrix_product(const Action_matrix_matrix_product&) {
-    INFOS("illegal call to Action_matrix_matrix_product Copy Ctor");
-    exit(0);
-  }
-
-  // Dtor
-
-  ~Action_matrix_matrix_product(void) {
-    MESSAGE("Action_matrix_matrix_product Dtor");
-
-    // deallocation
-
-    Interface::free_matrix(A, _size);
-    Interface::free_matrix(B, _size);
-    Interface::free_matrix(X, _size);
-
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_matrix(B_ref, _size);
-    Interface::free_matrix(X_ref, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
-
-  double nb_op_base(void) { return 2.0 * _size * _size * _size; }
-
-  inline void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_matrix(B_ref, B, _size);
-    Interface::copy_matrix(X_ref, X, _size);
-  }
-
-  inline void calculate(void) { Interface::matrix_matrix_product(A, B, X, _size); }
-
-  void check_result(void) {
-    // calculation check
-    if (_size < 200) {
-      Interface::matrix_to_stl(X, resu_stl);
-      STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, _size);
-      typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-      if (error > 1.e-6) {
-        INFOS("WRONG CALCULATION...residual=" << error);
-        exit(1);
-      }
-    }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_matrix B_stl;
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_matrix B_ref;
-  typename Interface::gene_matrix X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_matrix B;
-  typename Interface::gene_matrix X;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_matrix_matrix_product_bis.hh
+++ b/bench/btl/actions/action_matrix_matrix_product_bis.hh
@@ -1,131 +0,0 @@
-//=====================================================
-// File   :  action_matrix_matrix_product_bis.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_MATRIX_MATRIX_PRODUCT_BIS
-#define ACTION_MATRIX_MATRIX_PRODUCT_BIS
-#include "utilities.h"
-#include "STL_interface.hh"
-#include "STL_timer.hh"
-#include <string>
-#include "init_function.hh"
-#include "init_vector.hh"
-#include "init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_matrix_matrix_product_bis {
- public:
-  static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
-
-  static double nb_op_base(int size) { return 2.0 * size * size * size; }
-
-  static double calculate(int nb_calc, int size) {
-    // STL matrix and vector initialization
-
-    typename Interface::stl_matrix A_stl;
-    typename Interface::stl_matrix B_stl;
-    typename Interface::stl_matrix X_stl;
-
-    init_matrix<pseudo_random>(A_stl, size);
-    init_matrix<pseudo_random>(B_stl, size);
-    init_matrix<null_function>(X_stl, size);
-
-    // generic matrix and vector initialization
-
-    typename Interface::gene_matrix A_ref;
-    typename Interface::gene_matrix B_ref;
-    typename Interface::gene_matrix X_ref;
-
-    typename Interface::gene_matrix A;
-    typename Interface::gene_matrix B;
-    typename Interface::gene_matrix X;
-
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(B_ref, B_stl);
-    Interface::matrix_from_stl(X_ref, X_stl);
-
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::matrix_from_stl(B, B_stl);
-    Interface::matrix_from_stl(X, X_stl);
-
-    // STL_timer utilities
-
-    STL_timer chronos;
-
-    // Baseline evaluation
-
-    chronos.start_baseline(nb_calc);
-
-    do {
-      Interface::copy_matrix(A_ref, A, size);
-      Interface::copy_matrix(B_ref, B, size);
-      Interface::copy_matrix(X_ref, X, size);
-
-      //      Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!!
-    } while (chronos.check());
-
-    chronos.report(true);
-
-    // Time measurement
-
-    chronos.start(nb_calc);
-
-    do {
-      Interface::copy_matrix(A_ref, A, size);
-      Interface::copy_matrix(B_ref, B, size);
-      Interface::copy_matrix(X_ref, X, size);
-
-      Interface::matrix_matrix_product(A, B, X, size);  // here it is not commented !!!!
-    } while (chronos.check());
-
-    chronos.report(true);
-
-    double time = chronos.calculated_time / 2000.0;
-
-    // calculation check
-
-    typename Interface::stl_matrix resu_stl(size);
-
-    Interface::matrix_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-6) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(1);
-    }
-
-    // deallocation and return time
-
-    Interface::free_matrix(A, size);
-    Interface::free_matrix(B, size);
-    Interface::free_matrix(X, size);
-
-    Interface::free_matrix(A_ref, size);
-    Interface::free_matrix(B_ref, size);
-    Interface::free_matrix(X_ref, size);
-
-    return time;
-  }
-};
-
-#endif
--- a/bench/btl/actions/action_matrix_vector_product.hh
+++ b/bench/btl/actions/action_matrix_vector_product.hh
@@ -1,129 +0,0 @@
-//=====================================================
-// File   :  action_matrix_vector_product.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_MATRIX_VECTOR_PRODUCT
-#define ACTION_MATRIX_VECTOR_PRODUCT
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_matrix_vector_product {
- public:
-  // Ctor
-
-  BTL_DONT_INLINE Action_matrix_vector_product(int size) : _size(size) {
-    MESSAGE("Action_matrix_vector_product Ctor");
-
-    // STL matrix and vector initialization
-
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_vector<pseudo_random>(B_stl, _size);
-    init_vector<null_function>(X_stl, _size);
-    init_vector<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::vector_from_stl(B_ref, B_stl);
-    Interface::vector_from_stl(B, B_stl);
-    Interface::vector_from_stl(X_ref, X_stl);
-    Interface::vector_from_stl(X, X_stl);
-  }
-
-  // invalidate copy ctor
-
-  Action_matrix_vector_product(const Action_matrix_vector_product&) {
-    INFOS("illegal call to Action_matrix_vector_product Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-
-  BTL_DONT_INLINE ~Action_matrix_vector_product(void) {
-    MESSAGE("Action_matrix_vector_product Dtor");
-
-    // deallocation
-
-    Interface::free_matrix(A, _size);
-    Interface::free_vector(B);
-    Interface::free_vector(X);
-
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_vector(B_ref);
-    Interface::free_vector(X_ref);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "matrix_vector_" + Interface::name(); }
-
-  double nb_op_base(void) { return 2.0 * _size * _size; }
-
-  BTL_DONT_INLINE void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_vector(B_ref, B, _size);
-    Interface::copy_vector(X_ref, X, _size);
-  }
-
-  BTL_DONT_INLINE void calculate(void) {
-    BTL_ASM_COMMENT("#begin matrix_vector_product");
-    Interface::matrix_vector_product(A, B, X, _size);
-    BTL_ASM_COMMENT("end matrix_vector_product");
-  }
-
-  BTL_DONT_INLINE void check_result(void) {
-    // calculation check
-
-    Interface::vector_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, B_stl, X_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-5) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(0);
-    }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_vector B_stl;
-  typename Interface::stl_vector X_stl;
-  typename Interface::stl_vector resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_vector B_ref;
-  typename Interface::gene_vector X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_vector B;
-  typename Interface::gene_vector X;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_partial_lu.hh
+++ b/bench/btl/actions/action_partial_lu.hh
@@ -1,108 +0,0 @@
-//=====================================================
-// File   :  action_lu_decomp.hh
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_PARTIAL_LU
-#define ACTION_PARTIAL_LU
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_partial_lu {
- public:
-  // Ctor
-
-  Action_partial_lu(int size) : _size(size) {
-    MESSAGE("Action_partial_lu Ctor");
-
-    // STL vector initialization
-    init_matrix<pseudo_random>(X_stl, _size);
-    init_matrix<null_function>(C_stl, _size);
-
-    // make sure X is invertible
-    for (int i = 0; i < _size; ++i) X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref, X_stl);
-    Interface::matrix_from_stl(X, X_stl);
-    Interface::matrix_from_stl(C, C_stl);
-
-    _cost = 2.0 * size * size * size / 3.0 + size * size;
-  }
-
-  // invalidate copy ctor
-
-  Action_partial_lu(const Action_partial_lu&) {
-    INFOS("illegal call to Action_partial_lu Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-
-  ~Action_partial_lu(void) {
-    MESSAGE("Action_partial_lu Dtor");
-
-    // deallocation
-    Interface::free_matrix(X_ref, _size);
-    Interface::free_matrix(X, _size);
-    Interface::free_matrix(C, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "partial_lu_decomp_" + Interface::name(); }
-
-  double nb_op_base(void) { return _cost; }
-
-  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
-
-  inline void calculate(void) { Interface::partial_lu_decomp(X, C, _size); }
-
-  void check_result(void) {
-    // calculation check
-    //     Interface::matrix_to_stl(C,resu_stl);
-
-    //     STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
-    //
-    //     typename Interface::real_type error=
-    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-    //
-    //     if (error>1.e-6){
-    //       INFOS("WRONG CALCULATION...residual=" << error);
-    //       exit(0);
-    //     }
-  }
-
- private:
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix C_stl;
-
-  typename Interface::gene_matrix X_ref;
-  typename Interface::gene_matrix X;
-  typename Interface::gene_matrix C;
-
-  int _size;
-  double _cost;
-};
-
-#endif
--- a/bench/btl/actions/action_rot.hh
+++ b/bench/btl/actions/action_rot.hh
@@ -1,104 +0,0 @@
-
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_ROT
-#define ACTION_ROT
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_rot {
- public:
-  // Ctor
-  BTL_DONT_INLINE Action_rot(int size) : _size(size) {
-    MESSAGE("Action_rot Ctor");
-
-    // STL matrix and vector initialization
-    typename Interface::stl_matrix tmp;
-    init_vector<pseudo_random>(A_stl, _size);
-    init_vector<pseudo_random>(B_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::vector_from_stl(A_ref, A_stl);
-    Interface::vector_from_stl(A, A_stl);
-    Interface::vector_from_stl(B_ref, B_stl);
-    Interface::vector_from_stl(B, B_stl);
-  }
-
-  // invalidate copy ctor
-  Action_rot(const Action_rot&) {
-    INFOS("illegal call to Action_rot Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-  BTL_DONT_INLINE ~Action_rot(void) {
-    MESSAGE("Action_rot Dtor");
-    Interface::free_vector(A);
-    Interface::free_vector(B);
-    Interface::free_vector(A_ref);
-    Interface::free_vector(B_ref);
-  }
-
-  // action name
-  static inline std::string name(void) { return "rot_" + Interface::name(); }
-
-  double nb_op_base(void) { return 6.0 * _size; }
-
-  BTL_DONT_INLINE void initialize(void) {
-    Interface::copy_vector(A_ref, A, _size);
-    Interface::copy_vector(B_ref, B, _size);
-  }
-
-  BTL_DONT_INLINE void calculate(void) {
-    BTL_ASM_COMMENT("#begin rot");
-    Interface::rot(A, B, 0.5, 0.6, _size);
-    BTL_ASM_COMMENT("end rot");
-  }
-
-  BTL_DONT_INLINE void check_result(void) {
-    // calculation check
-    //     Interface::vector_to_stl(X,resu_stl);
-
-    //     STL_interface<typename Interface::real_type>::rot(A_stl,B_stl,X_stl,_size);
-
-    //     typename Interface::real_type error=
-    //       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
-
-    //     if (error>1.e-3){
-    //       INFOS("WRONG CALCULATION...residual=" << error);
-    //       exit(0);
-    //     }
-  }
-
- private:
-  typename Interface::stl_vector A_stl;
-  typename Interface::stl_vector B_stl;
-
-  typename Interface::gene_vector A_ref;
-  typename Interface::gene_vector B_ref;
-
-  typename Interface::gene_vector A;
-  typename Interface::gene_vector B;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_symv.hh
+++ b/bench/btl/actions/action_symv.hh
@@ -1,121 +0,0 @@
-//=====================================================
-// File   :  action_symv.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_SYMV
-#define ACTION_SYMV
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_symv {
- public:
-  // Ctor
-
-  BTL_DONT_INLINE Action_symv(int size) : _size(size) {
-    MESSAGE("Action_symv Ctor");
-
-    // STL matrix and vector initialization
-    init_matrix_symm<pseudo_random>(A_stl, _size);
-    init_vector<pseudo_random>(B_stl, _size);
-    init_vector<null_function>(X_stl, _size);
-    init_vector<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::vector_from_stl(B_ref, B_stl);
-    Interface::vector_from_stl(B, B_stl);
-    Interface::vector_from_stl(X_ref, X_stl);
-    Interface::vector_from_stl(X, X_stl);
-  }
-
-  // invalidate copy ctor
-
-  Action_symv(const Action_symv&) {
-    INFOS("illegal call to Action_symv Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-  BTL_DONT_INLINE ~Action_symv(void) {
-    Interface::free_matrix(A, _size);
-    Interface::free_vector(B);
-    Interface::free_vector(X);
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_vector(B_ref);
-    Interface::free_vector(X_ref);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "symv_" + Interface::name(); }
-
-  double nb_op_base(void) { return 2.0 * _size * _size; }
-
-  BTL_DONT_INLINE void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_vector(B_ref, B, _size);
-    Interface::copy_vector(X_ref, X, _size);
-  }
-
-  BTL_DONT_INLINE void calculate(void) {
-    BTL_ASM_COMMENT("#begin symv");
-    Interface::symv(A, B, X, _size);
-    BTL_ASM_COMMENT("end symv");
-  }
-
-  BTL_DONT_INLINE void check_result(void) {
-    if (_size > 128) return;
-    // calculation check
-    Interface::vector_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::symv(A_stl, B_stl, X_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-5) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(0);
-    }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_vector B_stl;
-  typename Interface::stl_vector X_stl;
-  typename Interface::stl_vector resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_vector B_ref;
-  typename Interface::gene_vector X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_vector B;
-  typename Interface::gene_vector X;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_syr2.hh
+++ b/bench/btl/actions/action_syr2.hh
@@ -1,118 +0,0 @@
-//=====================================================
-// File   :  action_syr2.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_SYR2
-#define ACTION_SYR2
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_syr2 {
- public:
-  // Ctor
-
-  BTL_DONT_INLINE Action_syr2(int size) : _size(size) {
-    // STL matrix and vector initialization
-    typename Interface::stl_matrix tmp;
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_vector<pseudo_random>(B_stl, _size);
-    init_vector<pseudo_random>(X_stl, _size);
-    init_vector<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::vector_from_stl(B_ref, B_stl);
-    Interface::vector_from_stl(B, B_stl);
-    Interface::vector_from_stl(X_ref, X_stl);
-    Interface::vector_from_stl(X, X_stl);
-  }
-
-  // invalidate copy ctor
-  Action_syr2(const Action_syr2&) {
-    INFOS("illegal call to Action_syr2 Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-  BTL_DONT_INLINE ~Action_syr2(void) {
-    Interface::free_matrix(A, _size);
-    Interface::free_vector(B);
-    Interface::free_vector(X);
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_vector(B_ref);
-    Interface::free_vector(X_ref);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "syr2_" + Interface::name(); }
-
-  double nb_op_base(void) { return 2.0 * _size * _size; }
-
-  BTL_DONT_INLINE void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_vector(B_ref, B, _size);
-    Interface::copy_vector(X_ref, X, _size);
-  }
-
-  BTL_DONT_INLINE void calculate(void) {
-    BTL_ASM_COMMENT("#begin syr2");
-    Interface::syr2(A, B, X, _size);
-    BTL_ASM_COMMENT("end syr2");
-  }
-
-  BTL_DONT_INLINE void check_result(void) {
-    // calculation check
-    Interface::vector_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::syr2(A_stl, B_stl, X_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-3) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      //       exit(0);
-    }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_vector B_stl;
-  typename Interface::stl_vector X_stl;
-  typename Interface::stl_vector resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_vector B_ref;
-  typename Interface::gene_vector X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_vector B;
-  typename Interface::gene_vector X;
-
-  int _size;
-};
-
-#endif
--- a/bench/btl/actions/action_trisolve.hh
+++ b/bench/btl/actions/action_trisolve.hh
@@ -1,119 +0,0 @@
-//=====================================================
-// File   :  action_trisolve.hh
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_TRISOLVE
-#define ACTION_TRISOLVE
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_trisolve {
- public:
-  // Ctor
-
-  Action_trisolve(int size) : _size(size) {
-    MESSAGE("Action_trisolve Ctor");
-
-    // STL vector initialization
-    init_matrix<pseudo_random>(L_stl, _size);
-    init_vector<pseudo_random>(B_stl, _size);
-    init_vector<null_function>(X_stl, _size);
-    for (int j = 0; j < _size; ++j) {
-      for (int i = 0; i < j; ++i) L_stl[j][i] = 0;
-      L_stl[j][j] += 3;
-    }
-
-    init_vector<null_function>(resu_stl, _size);
-
-    // generic matrix and vector initialization
-    Interface::matrix_from_stl(L, L_stl);
-    Interface::vector_from_stl(X, X_stl);
-    Interface::vector_from_stl(B, B_stl);
-
-    _cost = 0;
-    for (int j = 0; j < _size; ++j) {
-      _cost += 2 * j + 1;
-    }
-  }
-
-  // invalidate copy ctor
-
-  Action_trisolve(const Action_trisolve&) {
-    INFOS("illegal call to Action_trisolve Copy Ctor");
-    exit(1);
-  }
-
-  // Dtor
-
-  ~Action_trisolve(void) {
-    MESSAGE("Action_trisolve Dtor");
-
-    // deallocation
-    Interface::free_matrix(L, _size);
-    Interface::free_vector(B);
-    Interface::free_vector(X);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "trisolve_vector_" + Interface::name(); }
-
-  double nb_op_base(void) { return _cost; }
-
-  inline void initialize(void) {
-    // Interface::copy_vector(X_ref,X,_size);
-  }
-
-  inline void calculate(void) { Interface::trisolve_lower(L, B, X, _size); }
-
-  void check_result() {
-    if (_size > 128) return;
-    // calculation check
-    Interface::vector_to_stl(X, resu_stl);
-
-    STL_interface<typename Interface::real_type>::trisolve_lower(L_stl, B_stl, X_stl, _size);
-
-    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
-
-    if (error > 1.e-4) {
-      INFOS("WRONG CALCULATION...residual=" << error);
-      exit(2);
-    }  // else INFOS("CALCULATION OK...residual=" << error);
-  }
-
- private:
-  typename Interface::stl_matrix L_stl;
-  typename Interface::stl_vector X_stl;
-  typename Interface::stl_vector B_stl;
-  typename Interface::stl_vector resu_stl;
-
-  typename Interface::gene_matrix L;
-  typename Interface::gene_vector X;
-  typename Interface::gene_vector B;
-
-  int _size;
-  double _cost;
-};
-
-#endif
--- a/bench/btl/actions/action_trisolve_matrix.hh
+++ b/bench/btl/actions/action_trisolve_matrix.hh
@@ -1,139 +0,0 @@
-//=====================================================
-// File   :  action_matrix_matrix_product.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_TRISOLVE_MATRIX_PRODUCT
-#define ACTION_TRISOLVE_MATRIX_PRODUCT
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_trisolve_matrix {
- public:
-  // Ctor
-
-  Action_trisolve_matrix(int size) : _size(size) {
-    MESSAGE("Action_trisolve_matrix Ctor");
-
-    // STL matrix and vector initialization
-
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_matrix<pseudo_random>(B_stl, _size);
-    init_matrix<null_function>(X_stl, _size);
-    init_matrix<null_function>(resu_stl, _size);
-
-    for (int j = 0; j < _size; ++j) {
-      for (int i = 0; i < j; ++i) A_stl[j][i] = 0;
-      A_stl[j][j] += 3;
-    }
-
-    // generic matrix and vector initialization
-
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(B_ref, B_stl);
-    Interface::matrix_from_stl(X_ref, X_stl);
-
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::matrix_from_stl(B, B_stl);
-    Interface::matrix_from_stl(X, X_stl);
-
-    _cost = 0;
-    for (int j = 0; j < _size; ++j) {
-      _cost += 2 * j + 1;
-    }
-    _cost *= _size;
-  }
-
-  // invalidate copy ctor
-
-  Action_trisolve_matrix(const Action_trisolve_matrix&) {
-    INFOS("illegal call to Action_trisolve_matrix Copy Ctor");
-    exit(0);
-  }
-
-  // Dtor
-
-  ~Action_trisolve_matrix(void) {
-    MESSAGE("Action_trisolve_matrix Dtor");
-
-    // deallocation
-
-    Interface::free_matrix(A, _size);
-    Interface::free_matrix(B, _size);
-    Interface::free_matrix(X, _size);
-
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_matrix(B_ref, _size);
-    Interface::free_matrix(X_ref, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "trisolve_matrix_" + Interface::name(); }
-
-  double nb_op_base(void) { return _cost; }
-
-  inline void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_matrix(B_ref, B, _size);
-    Interface::copy_matrix(X_ref, X, _size);
-  }
-
-  inline void calculate(void) { Interface::trisolve_lower_matrix(A, B, X, _size); }
-
-  void check_result(void) {
-    // calculation check
-
-    //     Interface::matrix_to_stl(X,resu_stl);
-    //
-    //     STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
-    //
-    //     typename Interface::real_type error=
-    //       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
-    //
-    //     if (error>1.e-6){
-    //       INFOS("WRONG CALCULATION...residual=" << error);
-    // //       exit(1);
-    //     }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_matrix B_stl;
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_matrix B_ref;
-  typename Interface::gene_matrix X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_matrix B;
-  typename Interface::gene_matrix X;
-
-  int _size;
-  double _cost;
-};
-
-#endif
--- a/bench/btl/actions/action_trmm.hh
+++ b/bench/btl/actions/action_trmm.hh
@@ -1,139 +0,0 @@
-//=====================================================
-// File   :  action_matrix_matrix_product.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef ACTION_TRMM
-#define ACTION_TRMM
-#include "utilities.h"
-#include "STL_interface.hh"
-#include <string>
-#include "init/init_function.hh"
-#include "init/init_vector.hh"
-#include "init/init_matrix.hh"
-
-using namespace std;
-
-template <class Interface>
-class Action_trmm {
- public:
-  // Ctor
-
-  Action_trmm(int size) : _size(size) {
-    MESSAGE("Action_trmm Ctor");
-
-    // STL matrix and vector initialization
-
-    init_matrix<pseudo_random>(A_stl, _size);
-    init_matrix<pseudo_random>(B_stl, _size);
-    init_matrix<null_function>(X_stl, _size);
-    init_matrix<null_function>(resu_stl, _size);
-
-    for (int j = 0; j < _size; ++j) {
-      for (int i = 0; i < j; ++i) A_stl[j][i] = 0;
-      A_stl[j][j] += 3;
-    }
-
-    // generic matrix and vector initialization
-
-    Interface::matrix_from_stl(A_ref, A_stl);
-    Interface::matrix_from_stl(B_ref, B_stl);
-    Interface::matrix_from_stl(X_ref, X_stl);
-
-    Interface::matrix_from_stl(A, A_stl);
-    Interface::matrix_from_stl(B, B_stl);
-    Interface::matrix_from_stl(X, X_stl);
-
-    _cost = 0;
-    for (int j = 0; j < _size; ++j) {
-      _cost += 2 * j + 1;
-    }
-    _cost *= _size;
-  }
-
-  // invalidate copy ctor
-
-  Action_trmm(const Action_trmm&) {
-    INFOS("illegal call to Action_trmm Copy Ctor");
-    exit(0);
-  }
-
-  // Dtor
-
-  ~Action_trmm(void) {
-    MESSAGE("Action_trmm Dtor");
-
-    // deallocation
-
-    Interface::free_matrix(A, _size);
-    Interface::free_matrix(B, _size);
-    Interface::free_matrix(X, _size);
-
-    Interface::free_matrix(A_ref, _size);
-    Interface::free_matrix(B_ref, _size);
-    Interface::free_matrix(X_ref, _size);
-  }
-
-  // action name
-
-  static inline std::string name(void) { return "trmm_" + Interface::name(); }
-
-  double nb_op_base(void) { return _cost; }
-
-  inline void initialize(void) {
-    Interface::copy_matrix(A_ref, A, _size);
-    Interface::copy_matrix(B_ref, B, _size);
-    Interface::copy_matrix(X_ref, X, _size);
-  }
-
-  inline void calculate(void) { Interface::trmm(A, B, X, _size); }
-
-  void check_result(void) {
-    // calculation check
-
-    //     Interface::matrix_to_stl(X,resu_stl);
-    //
-    //     STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
-    //
-    //     typename Interface::real_type error=
-    //       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
-    //
-    //     if (error>1.e-6){
-    //       INFOS("WRONG CALCULATION...residual=" << error);
-    // //       exit(1);
-    //     }
-  }
-
- private:
-  typename Interface::stl_matrix A_stl;
-  typename Interface::stl_matrix B_stl;
-  typename Interface::stl_matrix X_stl;
-  typename Interface::stl_matrix resu_stl;
-
-  typename Interface::gene_matrix A_ref;
-  typename Interface::gene_matrix B_ref;
-  typename Interface::gene_matrix X_ref;
-
-  typename Interface::gene_matrix A;
-  typename Interface::gene_matrix B;
-  typename Interface::gene_matrix X;
-
-  int _size;
-  double _cost;
-};
-
-#endif
--- a/bench/btl/actions/basic_actions.hh
+++ b/bench/btl/actions/basic_actions.hh
@@ -1,20 +0,0 @@
-
-#include "action_axpy.hh"
-#include "action_axpby.hh"
-
-#include "action_matrix_vector_product.hh"
-#include "action_atv_product.hh"
-
-#include "action_matrix_matrix_product.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-
-#include "action_trisolve.hh"
-#include "action_trmm.hh"
-#include "action_symv.hh"
-// #include "action_symm.hh"
-#include "action_syr2.hh"
-#include "action_ger.hh"
-#include "action_rot.hh"
-
-// #include "action_lu_solve.hh"
--- a/bench/btl/cmake/FindACML.cmake
+++ b/bench/btl/cmake/FindACML.cmake
@@ -1,51 +0,0 @@
-
-if (ACML_LIBRARIES)
-  set(ACML_FIND_QUIETLY TRUE)
-endif ()
-
-find_library(ACML_LIBRARIES
-  NAMES
-  acml_mp acml_mv
-  PATHS
-  $ENV{ACMLDIR}/lib
-  $ENV{ACML_DIR}/lib
-  ${LIB_INSTALL_DIR}
-)
-
-find_file(ACML_LIBRARIES
-  NAMES
-  libacml_mp.so
-  PATHS
-  /usr/lib
-  /usr/lib64
-  $ENV{ACMLDIR}/lib
-  ${LIB_INSTALL_DIR}
-)
-
-if(NOT ACML_LIBRARIES)
-    message(STATUS "Multi-threaded library not found, looking for single-threaded")
-    find_library(ACML_LIBRARIES
-        NAMES
-        acml acml_mv
-        PATHS
-        $ENV{ACMLDIR}/lib
-        $ENV{ACML_DIR}/lib
-        ${LIB_INSTALL_DIR}
-        )
-    find_file(ACML_LIBRARIES
-        libacml.so libacml_mv.so
-        PATHS
-        /usr/lib
-        /usr/lib64
-        $ENV{ACMLDIR}/lib
-        ${LIB_INSTALL_DIR}
-        )
-endif()
-
-
-
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(ACML DEFAULT_MSG ACML_LIBRARIES)
-
-mark_as_advanced(ACML_LIBRARIES)
--- a/bench/btl/cmake/FindATLAS.cmake
+++ b/bench/btl/cmake/FindATLAS.cmake
@@ -1,31 +0,0 @@
-
-if (ATLAS_LIBRARIES)
-  set(ATLAS_FIND_QUIETLY TRUE)
-endif ()
-
-find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-find_library(ATLAS_LIB satlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-
-find_file(ATLAS_LAPACK NAMES liblapack_atlas.so.3 liblapack.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-find_library(ATLAS_LAPACK NAMES lapack_atlas lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-
-find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-
-if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
-
-  set(ATLAS_LIBRARIES ${ATLAS_LAPACK}  ${ATLAS_LIB})
-  
-  # search the default lapack lib link to it
-  find_file(ATLAS_REFERENCE_LAPACK liblapack.so.3 PATHS /usr/lib /usr/lib64)
-  find_library(ATLAS_REFERENCE_LAPACK NAMES lapack)
-#   if(ATLAS_REFERENCE_LAPACK)
-#     set(ATLAS_LIBRARIES ${ATLAS_LIBRARIES} ${ATLAS_REFERENCE_LAPACK})
-#   endif()
-  
-endif()
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(ATLAS DEFAULT_MSG ATLAS_LIBRARIES)
-
-mark_as_advanced(ATLAS_LIBRARIES)
--- a/bench/btl/cmake/FindBLAZE.cmake
+++ b/bench/btl/cmake/FindBLAZE.cmake
@@ -1,31 +0,0 @@
-# - Try to find eigen2 headers
-# Once done this will define
-#
-#  BLAZE_FOUND - system has blaze lib
-#  BLAZE_INCLUDE_DIR - the blaze include directory
-#
-# Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-# Adapted from FindEigen.cmake:
-# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
-# Redistribution and use is allowed according to the terms of the BSD license.
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
-
-if (BLAZE_INCLUDE_DIR)
-
-  # in cache already
-  set(BLAZE_FOUND TRUE)
-
-else ()
-
-find_path(BLAZE_INCLUDE_DIR NAMES blaze/Blaze.h
-     PATHS
-     ${INCLUDE_INSTALL_DIR}
-   )
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(BLAZE DEFAULT_MSG BLAZE_INCLUDE_DIR)
-
-mark_as_advanced(BLAZE_INCLUDE_DIR)
-
-endif()
-
--- a/bench/btl/cmake/FindBlitz.cmake
+++ b/bench/btl/cmake/FindBlitz.cmake
@@ -1,40 +0,0 @@
-# - Try to find blitz lib
-# Once done this will define
-#
-#  BLITZ_FOUND - system has blitz lib
-#  BLITZ_INCLUDES - the blitz include directory
-#  BLITZ_LIBRARIES - The libraries needed to use blitz
-
-# Copyright (c) 2006, Montel Laurent, <montel@kde.org>
-# Copyright (c) 2007, Allen Winter, <winter@kde.org>
-# Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-# Redistribution and use is allowed according to the terms of the BSD license.
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
-
-# include(FindLibraryWithDebug)
-
-if (BLITZ_INCLUDES AND BLITZ_LIBRARIES)
-  set(Blitz_FIND_QUIETLY TRUE)
-endif ()
-
-find_path(BLITZ_INCLUDES
-  NAMES
-  blitz/array.h
-  PATH_SUFFIXES blitz*
-  PATHS
-  $ENV{BLITZDIR}/include
-  ${INCLUDE_INSTALL_DIR}
-)
-
-find_library(BLITZ_LIBRARIES
-  blitz
-  PATHS
-  $ENV{BLITZDIR}/lib
-  ${LIB_INSTALL_DIR}
-)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(Blitz DEFAULT_MSG
-                                  BLITZ_INCLUDES BLITZ_LIBRARIES)
-
-mark_as_advanced(BLITZ_INCLUDES BLITZ_LIBRARIES)
--- a/bench/btl/cmake/FindCBLAS.cmake
+++ b/bench/btl/cmake/FindCBLAS.cmake
@@ -1,35 +0,0 @@
-# include(FindLibraryWithDebug)
-
-if (CBLAS_INCLUDES AND CBLAS_LIBRARIES)
-  set(CBLAS_FIND_QUIETLY TRUE)
-endif ()
-
-find_path(CBLAS_INCLUDES
-  NAMES
-  cblas.h
-  PATHS
-  $ENV{CBLASDIR}/include
-  ${INCLUDE_INSTALL_DIR}
-)
-
-find_library(CBLAS_LIBRARIES
-  cblas
-  PATHS
-  $ENV{CBLASDIR}/lib
-  ${LIB_INSTALL_DIR}
-)
-
-find_file(CBLAS_LIBRARIES
-  libcblas.so.3
-  PATHS
-  /usr/lib
-  /usr/lib64
-  $ENV{CBLASDIR}/lib
-  ${LIB_INSTALL_DIR}
-)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(CBLAS DEFAULT_MSG
-                                  CBLAS_INCLUDES CBLAS_LIBRARIES)
-
-mark_as_advanced(CBLAS_INCLUDES CBLAS_LIBRARIES)
--- a/bench/btl/cmake/FindGMM.cmake
+++ b/bench/btl/cmake/FindGMM.cmake
@@ -1,17 +0,0 @@
-if (GMM_INCLUDE_DIR)
-  # in cache already
-  set(GMM_FOUND TRUE)
-else ()
-
-find_path(GMM_INCLUDE_DIR NAMES gmm/gmm.h
-     PATHS
-     ${INCLUDE_INSTALL_DIR}
-     ${GMM_INCLUDE_PATH}
-   )
-
-include(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(GMM DEFAULT_MSG GMM_INCLUDE_DIR )
-
-mark_as_advanced(GMM_INCLUDE_DIR)
-
-endif()
--- a/bench/btl/cmake/FindMKL.cmake
+++ b/bench/btl/cmake/FindMKL.cmake
@@ -1,65 +0,0 @@
-
-if (MKL_LIBRARIES)
-  set(MKL_FIND_QUIETLY TRUE)
-endif ()
-
-if(CMAKE_MINOR_VERSION GREATER 4)
-
-if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
-
-find_library(MKL_LIBRARIES
-  mkl_core
-  PATHS
-  $ENV{MKLLIB}
-  /opt/intel/mkl/*/lib/em64t
-  /opt/intel/Compiler/*/*/mkl/lib/em64t
-  ${LIB_INSTALL_DIR}
-)
-
-find_library(MKL_GUIDE
-  guide
-  PATHS
-  $ENV{MKLLIB}
-  /opt/intel/mkl/*/lib/em64t
-  /opt/intel/Compiler/*/*/mkl/lib/em64t
-  /opt/intel/Compiler/*/*/lib/intel64
-  ${LIB_INSTALL_DIR}
-)
-
-if(MKL_LIBRARIES AND MKL_GUIDE)
-  set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64 mkl_sequential ${MKL_GUIDE} pthread)
-endif()
-
-else()
-
-find_library(MKL_LIBRARIES
-  mkl_core
-  PATHS
-  $ENV{MKLLIB}
-  /opt/intel/mkl/*/lib/32
-  /opt/intel/Compiler/*/*/mkl/lib/32
-  ${LIB_INSTALL_DIR}
-)
-
-find_library(MKL_GUIDE
-  guide
-  PATHS
-  $ENV{MKLLIB}
-  /opt/intel/mkl/*/lib/32
-  /opt/intel/Compiler/*/*/mkl/lib/32
-  /opt/intel/Compiler/*/*/lib/intel32
-  ${LIB_INSTALL_DIR}
-)
-
-if(MKL_LIBRARIES AND MKL_GUIDE)
-  set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel mkl_sequential ${MKL_GUIDE} pthread)
-endif()
-
-endif()
-
-endif()
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(MKL DEFAULT_MSG MKL_LIBRARIES)
-
-mark_as_advanced(MKL_LIBRARIES)
--- a/bench/btl/cmake/FindMTL4.cmake
+++ b/bench/btl/cmake/FindMTL4.cmake
@@ -1,31 +0,0 @@
-# - Try to find eigen2 headers
-# Once done this will define
-#
-#  MTL4_FOUND - system has eigen2 lib
-#  MTL4_INCLUDE_DIR - the eigen2 include directory
-#
-# Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-# Adapted from FindEigen.cmake:
-# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
-# Redistribution and use is allowed according to the terms of the BSD license.
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
-
-if (MTL4_INCLUDE_DIR)
-
-  # in cache already
-  set(MTL4_FOUND TRUE)
-
-else ()
-
-find_path(MTL4_INCLUDE_DIR NAMES boost/numeric/mtl/mtl.hpp
-     PATHS
-     ${INCLUDE_INSTALL_DIR}
-   )
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(MTL4 DEFAULT_MSG MTL4_INCLUDE_DIR)
-
-mark_as_advanced(MTL4_INCLUDE_DIR)
-
-endif()
-
--- a/bench/btl/cmake/FindOPENBLAS.cmake
+++ b/bench/btl/cmake/FindOPENBLAS.cmake
@@ -1,17 +0,0 @@
-
-if (OPENBLAS_LIBRARIES)
-  set(OPENBLAS_FIND_QUIETLY TRUE)
-endif ()
-
-find_file(OPENBLAS_LIBRARIES NAMES libopenblas.so libopenblas.so.0 PATHS /usr/lib /usr/lib64 $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR})
-find_library(OPENBLAS_LIBRARIES openblas PATHS $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR})
-
-if(OPENBLAS_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX)
-  set(OPENBLAS_LIBRARIES ${OPENBLAS_LIBRARIES} "-lpthread -lgfortran")
-endif()
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(OPENBLAS DEFAULT_MSG
-                                  OPENBLAS_LIBRARIES)
-
-mark_as_advanced(OPENBLAS_LIBRARIES)
--- a/bench/btl/cmake/FindPackageHandleStandardArgs.cmake
+++ b/bench/btl/cmake/FindPackageHandleStandardArgs.cmake
@@ -1,60 +0,0 @@
-# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
-#
-# This macro is intended to be used in FindXXX.cmake modules files.
-# It handles the REQUIRED and QUIET argument to find_package() and
-# it also sets the <UPPERCASED_NAME>_FOUND variable.
-# The package is found if all variables listed are TRUE.
-# Example:
-#
-#    FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
-#
-# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and 
-# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
-# If it is not found and REQUIRED was used, it fails with FATAL_ERROR, 
-# independent whether QUIET was used or not.
-#
-# If it is found, the location is reported using the VAR1 argument, so 
-# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
-# If the second argument is DEFAULT_MSG, the message in the failure case will 
-# be "Could NOT find LibXml2", if you don't like this message you can specify
-# your own custom failure message there.
-
-macro(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
-
-  if("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
-    if (${_NAME}_FIND_REQUIRED)
-      set(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
-    else (${_NAME}_FIND_REQUIRED)
-      set(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
-    endif (${_NAME}_FIND_REQUIRED)
-  else("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
-    set(_FAIL_MESSAGE "${_FAIL_MSG}")
-  endif("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
-
-  string(TOUPPER ${_NAME} _NAME_UPPER)
-
-  set(${_NAME_UPPER}_FOUND TRUE)
-  if(NOT ${_VAR1})
-    set(${_NAME_UPPER}_FOUND FALSE)
-  endif(NOT ${_VAR1})
-
-  foreach(_CURRENT_VAR ${ARGN})
-    if(NOT ${_CURRENT_VAR})
-      set(${_NAME_UPPER}_FOUND FALSE)
-    endif(NOT ${_CURRENT_VAR})
-  endforeach(_CURRENT_VAR)
-
-  if (${_NAME_UPPER}_FOUND)
-    if (NOT ${_NAME}_FIND_QUIETLY)
-        message(STATUS "Found ${_NAME}: ${${_VAR1}}")
-    endif (NOT ${_NAME}_FIND_QUIETLY)
-  else (${_NAME_UPPER}_FOUND)
-    if (${_NAME}_FIND_REQUIRED)
-        message(FATAL_ERROR "${_FAIL_MESSAGE}")
-    else (${_NAME}_FIND_REQUIRED)
-      if (NOT ${_NAME}_FIND_QUIETLY)
-        message(STATUS "${_FAIL_MESSAGE}")
-      endif (NOT ${_NAME}_FIND_QUIETLY)
-    endif (${_NAME}_FIND_REQUIRED)
-  endif (${_NAME_UPPER}_FOUND)
-endmacro(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
--- a/bench/btl/cmake/FindTvmet.cmake
+++ b/bench/btl/cmake/FindTvmet.cmake
@@ -1,32 +0,0 @@
-# - Try to find tvmet headers
-# Once done this will define
-#
-#  TVMET_FOUND - system has tvmet lib
-#  TVMET_INCLUDE_DIR - the tvmet include directory
-#
-# Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-# Adapted from FindEigen.cmake:
-# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
-# Redistribution and use is allowed according to the terms of the BSD license.
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
-
-if (TVMET_INCLUDE_DIR)
-
-  # in cache already
-  set(TVMET_FOUND TRUE)
-
-else ()
-
-find_path(TVMET_INCLUDE_DIR NAMES tvmet/tvmet.h
-     PATHS
-     ${TVMETDIR}/
-     ${INCLUDE_INSTALL_DIR}
-   )
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(Tvmet DEFAULT_MSG TVMET_INCLUDE_DIR)
-
-mark_as_advanced(TVMET_INCLUDE_DIR)
-
-endif()
-
--- a/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake
+++ b/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake
@@ -1,31 +0,0 @@
-# - MACRO_OPTIONAL_ADD_SUBDIRECTORY() combines add_subdirectory() with an option()
-# MACRO_OPTIONAL_ADD_SUBDIRECTORY( <dir> )
-# If you use MACRO_OPTIONAL_ADD_SUBDIRECTORY() instead of add_subdirectory(),
-# this will have two effects
-# 1 - CMake will not complain if the directory doesn't exist
-#     This makes sense if you want to distribute just one of the subdirs
-#     in a source package, e.g. just one of the subdirs in kdeextragear.
-# 2 - If the directory exists, it will offer an option to skip the 
-#     subdirectory.
-#     This is useful if you want to compile only a subset of all
-#     directories.
-
-# Copyright (c) 2007, Alexander Neundorf, <neundorf@kde.org>
-#
-# Redistribution and use is allowed according to the terms of the BSD license.
-# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
-
-
-macro (MACRO_OPTIONAL_ADD_SUBDIRECTORY _dir )
-   get_filename_component(_fullPath ${_dir} ABSOLUTE)
-   if(EXISTS ${_fullPath})
-      if(${ARGC} EQUAL 2)
-        option(BUILD_${_dir} "Build directory ${_dir}" ${ARGV1})
-      else(${ARGC} EQUAL 2)
-        option(BUILD_${_dir} "Build directory ${_dir}" TRUE)
-      endif(${ARGC} EQUAL 2)
-      if(BUILD_${_dir})
-         add_subdirectory(${_dir})
-      endif(BUILD_${_dir})
-   endif(EXISTS ${_fullPath})
-endmacro (MACRO_OPTIONAL_ADD_SUBDIRECTORY)
--- a/bench/btl/data/CMakeLists.txt
+++ b/bench/btl/data/CMakeLists.txt
@@ -1,32 +0,0 @@
-
-add_custom_target(copy_scripts)
-
-set(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh
-    perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh )
-
-foreach(script_file ${script_files})
-add_custom_command(
-  TARGET copy_scripts
-  POST_BUILD
-  COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${script_file} ${CMAKE_CURRENT_BINARY_DIR}/
-  ARGS
-)
-endforeach(script_file)
-
-add_custom_command(
-  TARGET copy_scripts
-  POST_BUILD
-  COMMAND ${CMAKE_CXX_COMPILER} --version | head -n 1 > ${CMAKE_CURRENT_BINARY_DIR}/compiler_version.txt
-  ARGS
-)
-add_custom_command(
-  TARGET copy_scripts
-  POST_BUILD
-  COMMAND echo "${Eigen_SOURCE_DIR}" > ${CMAKE_CURRENT_BINARY_DIR}/eigen_root_dir.txt
-  ARGS
-)
-
-add_executable(smooth smooth.cxx)
-add_executable(regularize regularize.cxx)
-add_executable(main mean.cxx)
-add_dependencies(main copy_scripts)
--- a/bench/btl/data/action_settings.txt
+++ b/bench/btl/data/action_settings.txt
@@ -1,19 +0,0 @@
-aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:5000
-ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:5000
-atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:5000
-axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
-axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
-matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:5000
-matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:5000
-trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:5000
-trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:5000
-trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:5000
-cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:5000
-complete_lu_decomp ; "{/*1.5 Complete LU decomposition}" ; "matrix size" ; 4:5000
-partial_lu_decomp ; "{/*1.5 Partial LU decomposition}" ; "matrix size" ; 4:5000
-tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:5000
-hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:5000
-symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:5000
-syr2 ; "{/*1.5 symmretric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:5000
-ger ; "{/*1.5 general rank-1 update (A += u v^T)}" ; "matrix size" ; 4:5000
-rot ; "{/*1.5 apply rotation in the plane}" ; "vector size" ; 4:1000000
--- a/bench/btl/data/gnuplot_common_settings.hh
+++ b/bench/btl/data/gnuplot_common_settings.hh
@@ -1,87 +0,0 @@
-set noclip points
-set clip one
-set noclip two
-set bar 1.000000
-set border 31 lt -1 lw 1.000
-set xdata
-set ydata
-set zdata
-set x2data
-set y2data
-set boxwidth
-set dummy x,y
-set format x "%g"
-set format y "%g"
-set format x2 "%g"
-set format y2 "%g"
-set format z "%g"
-set angles radians
-set nogrid
-set key title ""
-set key left top Right noreverse box linetype -2 linewidth 1.000 samplen 4 spacing 1 width 0
-set nolabel
-set noarrow
-# set nolinestyle # deprecated
-set nologscale
-set logscale x 10
-set offsets 0, 0, 0, 0
-set pointsize 1
-set encoding default
-set nopolar
-set noparametric
-set view 60, 30, 1, 1
-set samples 100, 100
-set isosamples 10, 10
-set surface
-set nocontour
-set clabel '%8.3g'
-set mapping cartesian
-set nohidden3d
-set cntrparam order 4
-set cntrparam linear
-set cntrparam levels auto 5
-set cntrparam points 5
-set size ratio 0 1,1
-set origin 0,0
-# set data style lines
-# set function style lines
-set xzeroaxis lt -2 lw 1.000
-set x2zeroaxis lt -2 lw 1.000
-set yzeroaxis lt -2 lw 1.000
-set y2zeroaxis lt -2 lw 1.000
-set tics in
-set ticslevel 0.5
-set tics scale 1, 0.5
-set mxtics default
-set mytics default
-set mx2tics default
-set my2tics default
-set xtics border mirror norotate autofreq
-set ytics border mirror norotate autofreq
-set ztics border nomirror norotate autofreq
-set nox2tics
-set noy2tics
-set timestamp "" bottom norotate offset 0,0
-set rrange [ * : * ] noreverse nowriteback  # (currently [-0:10] )
-set trange [ * : * ] noreverse nowriteback  # (currently [-5:5] )
-set urange [ * : * ] noreverse nowriteback  # (currently [-5:5] )
-set vrange [ * : * ] noreverse nowriteback  # (currently [-5:5] )
-set xlabel "matrix size" offset 0,0
-set x2label "" offset 0,0
-set timefmt "%d/%m/%y\n%H:%M"
-set xrange [ 10 : 1000 ] noreverse nowriteback
-set x2range [ * : * ] noreverse nowriteback  # (currently [-10:10] )
-set ylabel "MFLOPS" offset 0,0
-set y2label "" offset 0,0
-set yrange [ * : * ] noreverse nowriteback  # (currently [-10:10] )
-set y2range [ * : * ] noreverse nowriteback  # (currently [-10:10] )
-set zlabel "" offset 0,0
-set zrange [ * : * ] noreverse nowriteback  # (currently [-10:10] )
-set zero 1e-08
-set lmargin -1
-set bmargin -1
-set rmargin -1
-set tmargin -1
-set locale "C"
-set xrange [4:1024]
-
--- a/bench/btl/data/go_mean
+++ b/bench/btl/data/go_mean
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-if [ $# < 1 ]; then
-  echo "Usage: $0 working_directory [tiny|large [prefix]]"
-else
-
-mkdir -p $1
-##cp ../libs/*/*.dat $1
-
-mode=large
-if [ $# > 2 ]; then
-  mode=$2
-fi
-if [ $# > 3 ]; then
-  prefix=$3
-fi
-
-EIGENDIR=`cat eigen_root_dir.txt`
-
-webpagefilename=$1/index.html
-meanstatsfilename=$1/mean.html
-
-echo ''  > $meanstatsfilename
-echo ''  > $webpagefilename
-echo '<p><strong>Configuration</strong>'  >> $webpagefilename
-echo '<ul>'\
-  '<li>' `cat /proc/cpuinfo | grep "model name" | head -n 1`\
-  '  (' `uname -m` ')</li>'\
-  '<li> compiler: ' `cat compiler_version.txt` '</li>'\
-  '<li> eigen3: ' `git ls-remote --refs  -q $EIGENDIR HEAD | cut  -f 1` '</li>'\
-  '</ul>' \
-  '</p>'  >> $webpagefilename
-
-source mk_mean_script.sh axpy $1 11 2500 100000 250000  $mode $prefix
-source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix
-source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix
-source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
-source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
-# source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh partial_lu_decomp $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix
-source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix
-source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix
-source mk_mean_script.sh ger $1 11 50 300 1000 $mode $prefix
-source mk_mean_script.sh rot $1 11 2500 100000 250000 $mode $prefix
-source mk_mean_script.sh complete_lu_decomp $1 11 100 300 1000 $mode $prefix
-
-fi
-
-## compile the web page ##
-
-#echo `cat footer.html` >> $webpagefilename
--- a/bench/btl/data/mean.cxx
+++ b/bench/btl/data/mean.cxx
@@ -1,165 +0,0 @@
-//=====================================================
-// File   :  mean.cxx
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#include "utilities.h"
-#include <vector>
-#include <string>
-#include <iostream>
-#include <fstream>
-#include "bench_parameter.hh"
-#include "utils/xy_file.hh"
-#include <set>
-
-using namespace std;
-
-double mean_calc(const vector<int> &tab_sizes, const vector<double> &tab_mflops, const int size_min,
-                 const int size_max);
-
-class Lib_Mean {
- public:
-  Lib_Mean(void) : _lib_name(), _mean_in_cache(), _mean_out_of_cache() {
-    MESSAGE("Lib_mean Default Ctor");
-    MESSAGE("!!! should not be used");
-    exit(0);
-  }
-  Lib_Mean(const string &name, const double &mic, const double &moc)
-      : _lib_name(name), _mean_in_cache(mic), _mean_out_of_cache(moc) {
-    MESSAGE("Lib_mean Ctor");
-  }
-  Lib_Mean(const Lib_Mean &lm)
-      : _lib_name(lm._lib_name), _mean_in_cache(lm._mean_in_cache), _mean_out_of_cache(lm._mean_out_of_cache) {
-    MESSAGE("Lib_mean Copy Ctor");
-  }
-  ~Lib_Mean(void) { MESSAGE("Lib_mean Dtor"); }
-
-  double _mean_in_cache;
-  double _mean_out_of_cache;
-  string _lib_name;
-
-  bool operator<(const Lib_Mean &right) const {
-    // return ( this->_mean_out_of_cache > right._mean_out_of_cache) ;
-    return (this->_mean_in_cache > right._mean_in_cache);
-  }
-};
-
-int main(int argc, char *argv[]) {
-  if (argc < 6) {
-    INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2...");
-    exit(0);
-  }
-  INFOS(argc);
-
-  int min_in_cache = atoi(argv[2]);
-  int max_in_cache = atoi(argv[3]);
-  int min_out_of_cache = atoi(argv[4]);
-  int max_out_of_cache = atoi(argv[5]);
-
-  multiset<Lib_Mean> s_lib_mean;
-
-  for (int i = 6; i < argc; i++) {
-    string filename = argv[i];
-
-    INFOS(filename);
-
-    double mic = 0;
-    double moc = 0;
-
-    {
-      vector<int> tab_sizes;
-      vector<double> tab_mflops;
-
-      read_xy_file(filename, tab_sizes, tab_mflops);
-
-      mic = mean_calc(tab_sizes, tab_mflops, min_in_cache, max_in_cache);
-      moc = mean_calc(tab_sizes, tab_mflops, min_out_of_cache, max_out_of_cache);
-
-      Lib_Mean cur_lib_mean(filename, mic, moc);
-
-      s_lib_mean.insert(cur_lib_mean);
-    }
-  }
-
-  cout << "<TABLE BORDER CELLPADDING=2>" << endl;
-  cout << "  <TR>" << endl;
-  cout << "    <TH ALIGN=CENTER> " << argv[1] << " </TH>" << endl;
-  cout << "    <TH ALIGN=CENTER> <a href="
-          "#mean_marker"
-          "> in cache <BR> mean perf <BR> Mflops </a></TH>"
-       << endl;
-  cout << "    <TH ALIGN=CENTER> in cache <BR> % best </TH>" << endl;
-  cout << "    <TH ALIGN=CENTER> <a href="
-          "#mean_marker"
-          "> out of cache <BR> mean perf <BR> Mflops </a></TH>"
-       << endl;
-  cout << "    <TH ALIGN=CENTER> out of cache <BR> % best </TH>" << endl;
-  cout << "    <TH ALIGN=CENTER> details </TH>" << endl;
-  cout << "    <TH ALIGN=CENTER> comments </TH>" << endl;
-  cout << "  </TR>" << endl;
-
-  multiset<Lib_Mean>::iterator is = s_lib_mean.begin();
-  Lib_Mean best(*is);
-
-  for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) {
-    cout << "  <TR>" << endl;
-    cout << "     <TD> " << is->_lib_name << " </TD>" << endl;
-    cout << "     <TD> " << is->_mean_in_cache << " </TD>" << endl;
-    cout << "     <TD> " << 100 * (is->_mean_in_cache / best._mean_in_cache) << " </TD>" << endl;
-    cout << "     <TD> " << is->_mean_out_of_cache << " </TD>" << endl;
-    cout << "     <TD> " << 100 * (is->_mean_out_of_cache / best._mean_out_of_cache) << " </TD>" << endl;
-    cout << "     <TD> "
-         << "<a href=\"#" << is->_lib_name << "_" << argv[1]
-         << "\">snippet</a>/"
-            "<a href=\"#"
-         << is->_lib_name << "_flags\">flags</a>  </TD>" << endl;
-    cout << "     <TD> "
-         << "<a href=\"#" << is->_lib_name << "_comments\">click here</a>  </TD>" << endl;
-    cout << "  </TR>" << endl;
-  }
-
-  cout << "</TABLE>" << endl;
-
-  ofstream output_file("../order_lib", ios::out);
-
-  for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) {
-    output_file << is->_lib_name << endl;
-  }
-
-  output_file.close();
-}
-
-double mean_calc(const vector<int> &tab_sizes, const vector<double> &tab_mflops, const int size_min,
-                 const int size_max) {
-  int size = tab_sizes.size();
-  int nb_sample = 0;
-  double mean = 0.0;
-
-  for (int i = 0; i < size; i++) {
-    if ((tab_sizes[i] >= size_min) && (tab_sizes[i] <= size_max)) {
-      nb_sample++;
-      mean += tab_mflops[i];
-    }
-  }
-
-  if (nb_sample == 0) {
-    INFOS("no data for mean calculation");
-    return 0.0;
-  }
-
-  return mean / nb_sample;
-}
--- a/bench/btl/data/mk_gnuplot_script.sh
+++ b/bench/btl/data/mk_gnuplot_script.sh
@@ -1,68 +0,0 @@
-#! /bin/bash
-WHAT=$1
-DIR=$2
-echo $WHAT script generation
-cat $WHAT.hh > $WHAT.gnuplot
-
-DATA_FILE=`find $DIR -name "*.dat" | grep $WHAT`
-
-echo plot \\ >> $WHAT.gnuplot
-
-for FILE in $DATA_FILE
-do
-    LAST=$FILE
-done
-
-echo LAST=$LAST
-
-for FILE in $DATA_FILE
-do
-     if [ $FILE != $LAST ]
-     then
-	BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
-	echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >>  $WHAT.gnuplot
-     fi
-done
-BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
-echo "'"$LAST"'" title "'"$TITLE"'" >>  $WHAT.gnuplot
-
-#echo set term postscript color >> $WHAT.gnuplot
-#echo set output "'"$WHAT.ps"'" >> $WHAT.gnuplot
-echo set term pbm small color >> $WHAT.gnuplot
-echo set output "'"$WHAT.ppm"'" >> $WHAT.gnuplot
-echo plot \\ >> $WHAT.gnuplot
-
-for FILE in $DATA_FILE
-do
-     if [ $FILE != $LAST ]
-     then
-	BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
-	echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >>  $WHAT.gnuplot
-     fi
-done
-BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
-echo "'"$LAST"'" title "'"$TITLE"'" >>  $WHAT.gnuplot
-
-echo set term jpeg large >> $WHAT.gnuplot
-echo set output "'"$WHAT.jpg"'" >> $WHAT.gnuplot
-echo plot \\ >> $WHAT.gnuplot
-
-for FILE in $DATA_FILE
-do
-     if [ $FILE != $LAST ]
-     then
-	BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
-	echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >>  $WHAT.gnuplot
-     fi
-done
-BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
-echo "'"$LAST"'" title "'"$TITLE"'" >>  $WHAT.gnuplot
-
-
-gnuplot -persist < $WHAT.gnuplot
-
-rm $WHAT.gnuplot
-
-
-
-
--- a/bench/btl/data/mk_mean_script.sh
+++ b/bench/btl/data/mk_mean_script.sh
@@ -1,52 +0,0 @@
-#! /bin/bash
-WHAT=$1
-DIR=$2
-MINIC=$3
-MAXIC=$4
-MINOC=$5
-MAXOC=$6
-prefix=$8
-
-meanstatsfilename=$2/mean.html
-
-WORK_DIR=tmp
-mkdir $WORK_DIR
-
-DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}`
-
-if [ -n "$DATA_FILE" ]; then
-
-  echo ""
-  echo "$1..."
-  for FILE in $DATA_FILE
-  do
-          ##echo hello world
-          ##echo "mk_mean_script1" ${FILE}
-    BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
-
-    ##echo "mk_mean_script1" ${TITLE}
-    cp $FILE ${WORK_DIR}/${TITLE}
-
-  done
-
-  cd $WORK_DIR
-  ../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename
-  ../mk_new_gnuplot.sh $1 $2 $7
-  rm -f *.gnuplot
-  cd ..
-
-  echo '<br/>' >> $meanstatsfilename
-
-  webpagefilename=$2/index.html
-  # echo '<h3>'${WHAT}'</h3>'  >> $webpagefilename
-  echo '<hr/><a href="'$prefix$1'.pdf"><img src="'$prefix$1'.png" alt="'${WHAT}'" /></a><br/>'  >> $webpagefilename
-
-fi
-
-rm -R $WORK_DIR
-
-
-
-
-
-
--- a/bench/btl/data/mk_new_gnuplot.sh
+++ b/bench/btl/data/mk_new_gnuplot.sh
@@ -1,54 +0,0 @@
-#!/bin/bash
-WHAT=$1
-DIR=$2
-
-cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot
-
-echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot
-echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` " offset 0,0" >> $WHAT.gnuplot
-echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot
-
-if [ $# > 3 ]; then
-  if [ "$3" == "tiny" ]; then
-    echo "set xrange [2:16]" >> $WHAT.gnuplot
-    echo "set nologscale" >> $WHAT.gnuplot
-  fi
-fi
-
-
-
-DATA_FILE=`cat ../order_lib`
-echo set term postscript color rounded enhanced >> $WHAT.gnuplot
-echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot
-
-# echo set term svg color rounded enhanced >> $WHAT.gnuplot
-# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot
-# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot
-
-echo plot \\ >> $WHAT.gnuplot
-
-for FILE in $DATA_FILE
-do
-    LAST=$FILE
-done
-
-for FILE in $DATA_FILE
-do
-    BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
-
-    echo "'"$FILE"'" `grep $TITLE ../perlib_plot_settings.txt | head -n 1 | cut -d ";" -f 2` "\\" >>  $WHAT.gnuplot
-    if [ $FILE != $LAST ]
-    then
-      echo ", \\" >>  $WHAT.gnuplot
-    fi
-done
-echo " " >>  $WHAT.gnuplot
-
-gnuplot -persist < $WHAT.gnuplot
-
-rm $WHAT.gnuplot
-
-ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf
-convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 ../${DIR}/$WHAT.ps -background white -flatten  ../${DIR}/$WHAT.png
-
-# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100  -f plot-svg aat.ps  aat2.svg
--- a/bench/btl/data/perlib_plot_settings.txt
+++ b/bench/btl/data/perlib_plot_settings.txt
@@ -1,16 +0,0 @@
-eigen3 ;          with lines lw 4 lt 1 lc rgbcolor "black"
-eigen2 ;          with lines lw 3 lt 1 lc rgbcolor "#999999"
-EigenBLAS ;       with lines lw 3 lt 3 lc rgbcolor "#999999"
-eigen3_novec ;    with lines lw 2 lt 1 lc rgbcolor "#999999"
-eigen3_nogccvec ; with lines lw 2 lt 2 lc rgbcolor "#991010"
-INTEL_MKL ;       with lines lw 3 lt 1 lc rgbcolor "#ff0000"
-ATLAS ;           with lines lw 3 lt 1 lc rgbcolor "#008000"
-gmm ;             with lines lw 3 lt 1 lc rgbcolor "#0000ff"
-ublas ;           with lines lw 3 lt 1 lc rgbcolor "#00b7ff"
-mtl4 ;            with lines lw 3 lt 1 lc rgbcolor "#d18847"
-blitz ;           with lines lw 3 lt 1 lc rgbcolor "#ff00ff"
-F77 ;             with lines lw 3 lt 3 lc rgbcolor "#e6e64c"
-OPENBLAS ;        with lines lw 3 lt 1 lc rgbcolor "#C05600"
-C ;               with lines lw 3 lt 3 lc rgbcolor "#e6bd96"
-ACML ;            with lines lw 2 lt 3 lc rgbcolor "#e6e64c"
-blaze ;           with lines lw 3 lt 1 lc rgbcolor "#ff00ff"
--- a/bench/btl/data/regularize.cxx
+++ b/bench/btl/data/regularize.cxx
@@ -1,113 +0,0 @@
-//=====================================================
-// File   :  regularize.cxx
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#include "utilities.h"
-#include <vector>
-#include <string>
-#include <iostream>
-#include <fstream>
-#include "bench_parameter.hh"
-#include <set>
-
-using namespace std;
-
-void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
-void regularize_curve(const string &filename, const vector<double> &tab_mflops, const vector<int> &tab_sizes,
-                      int start_cut_size, int stop_cut_size);
-/////////////////////////////////////////////////////////////////////////////////////////////////
-
-int main(int argc, char *argv[]) {
-  // input data
-
-  if (argc < 4) {
-    INFOS("!!! Error ... usage : main filename start_cut_size stop_cut_size regularize_filename");
-    exit(0);
-  }
-  INFOS(argc);
-
-  int start_cut_size = atoi(argv[2]);
-  int stop_cut_size = atoi(argv[3]);
-
-  string filename = argv[1];
-  string regularize_filename = argv[4];
-
-  INFOS(filename);
-  INFOS("start_cut_size=" << start_cut_size);
-
-  vector<int> tab_sizes;
-  vector<double> tab_mflops;
-
-  read_xy_file(filename, tab_sizes, tab_mflops);
-
-  // regularizeing
-
-  regularize_curve(regularize_filename, tab_mflops, tab_sizes, start_cut_size, stop_cut_size);
-}
-
-//////////////////////////////////////////////////////////////////////////////////////
-
-void regularize_curve(const string &filename, const vector<double> &tab_mflops, const vector<int> &tab_sizes,
-                      int start_cut_size, int stop_cut_size) {
-  int size = tab_mflops.size();
-  ofstream output_file(filename.c_str(), ios::out);
-
-  int i = 0;
-
-  while (tab_sizes[i] < start_cut_size) {
-    output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
-    i++;
-  }
-
-  output_file << endl;
-
-  while (tab_sizes[i] < stop_cut_size) {
-    i++;
-  }
-
-  while (i < size) {
-    output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
-    i++;
-  }
-
-  output_file.close();
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
-  ifstream input_file(filename.c_str(), ios::in);
-
-  if (!input_file) {
-    INFOS("!!! Error opening " << filename);
-    exit(0);
-  }
-
-  int nb_point = 0;
-  int size = 0;
-  double mflops = 0;
-
-  while (input_file >> size >> mflops) {
-    nb_point++;
-    tab_sizes.push_back(size);
-    tab_mflops.push_back(mflops);
-  }
-  SCRUTE(nb_point);
-
-  input_file.close();
-}
--- a/bench/btl/data/smooth.cxx
+++ b/bench/btl/data/smooth.cxx
@@ -1,165 +0,0 @@
-//=====================================================
-// File   :  smooth.cxx
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#include "utilities.h"
-#include <vector>
-#include <deque>
-#include <string>
-#include <iostream>
-#include <fstream>
-#include "bench_parameter.hh"
-#include <set>
-
-using namespace std;
-
-void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
-void write_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
-void smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width);
-void centered_smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width);
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-
-int main(int argc, char *argv[]) {
-  // input data
-
-  if (argc < 3) {
-    INFOS("!!! Error ... usage : main filename window_half_width smooth_filename");
-    exit(0);
-  }
-  INFOS(argc);
-
-  int window_half_width = atoi(argv[2]);
-
-  string filename = argv[1];
-  string smooth_filename = argv[3];
-
-  INFOS(filename);
-  INFOS("window_half_width=" << window_half_width);
-
-  vector<int> tab_sizes;
-  vector<double> tab_mflops;
-
-  read_xy_file(filename, tab_sizes, tab_mflops);
-
-  // smoothing
-
-  vector<double> smooth_tab_mflops;
-
-  // smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
-  centered_smooth_curve(tab_mflops, smooth_tab_mflops, window_half_width);
-
-  // output result
-
-  write_xy_file(smooth_filename, tab_sizes, smooth_tab_mflops);
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template <class VECTOR>
-double weighted_mean(const VECTOR &data) {
-  double mean = 0.0;
-
-  for (int i = 0; i < data.size(); i++) {
-    mean += data[i];
-  }
-
-  return mean / double(data.size());
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width) {
-  int window_width = 2 * window_half_width + 1;
-
-  int size = tab_mflops.size();
-
-  vector<double> sample(window_width);
-
-  for (int i = 0; i < size; i++) {
-    for (int j = 0; j < window_width; j++) {
-      int shifted_index = i + j - window_half_width;
-      if (shifted_index < 0) shifted_index = 0;
-      if (shifted_index > size - 1) shifted_index = size - 1;
-      sample[j] = tab_mflops[shifted_index];
-    }
-
-    smooth_tab_mflops.push_back(weighted_mean(sample));
-  }
-}
-
-void centered_smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width) {
-  int max_window_width = 2 * window_half_width + 1;
-
-  int size = tab_mflops.size();
-
-  for (int i = 0; i < size; i++) {
-    deque<double> sample;
-
-    sample.push_back(tab_mflops[i]);
-
-    for (int j = 1; j <= window_half_width; j++) {
-      int before = i - j;
-      int after = i + j;
-
-      if ((before >= 0) && (after < size))  // inside of the vector
-      {
-        sample.push_front(tab_mflops[before]);
-        sample.push_back(tab_mflops[after]);
-      }
-    }
-
-    smooth_tab_mflops.push_back(weighted_mean(sample));
-  }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void write_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
-  ofstream output_file(filename.c_str(), ios::out);
-
-  for (int i = 0; i < tab_sizes.size(); i++) {
-    output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
-  }
-
-  output_file.close();
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
-  ifstream input_file(filename.c_str(), ios::in);
-
-  if (!input_file) {
-    INFOS("!!! Error opening " << filename);
-    exit(0);
-  }
-
-  int nb_point = 0;
-  int size = 0;
-  double mflops = 0;
-
-  while (input_file >> size >> mflops) {
-    nb_point++;
-    tab_sizes.push_back(size);
-    tab_mflops.push_back(mflops);
-  }
-  SCRUTE(nb_point);
-
-  input_file.close();
-}
--- a/bench/btl/data/smooth_all.sh
+++ b/bench/btl/data/smooth_all.sh
@@ -1,68 +0,0 @@
-#! /bin/bash
-ORIG_DIR=$1
-SMOOTH_DIR=${ORIG_DIR}_smooth
-mkdir ${SMOOTH_DIR}
-
-AXPY_FILE=`find ${ORIG_DIR} -name "*.dat" | grep axpy`
-for FILE in ${AXPY_FILE}
-do
-    echo $FILE
-    BASE=${FILE##*/}
-    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp
-    ./regularize ${SMOOTH_DIR}/${BASE}_tmp 2500 15000 ${SMOOTH_DIR}/${BASE}
-    rm -f  ${SMOOTH_DIR}/${BASE}_tmp
-done
-
-
-MATRIX_VECTOR_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_vector`
-for FILE in ${MATRIX_VECTOR_FILE}
-do
-    echo $FILE
-    BASE=${FILE##*/}
-    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp
-    ./regularize ${SMOOTH_DIR}/${BASE}_tmp 50 180 ${SMOOTH_DIR}/${BASE}
-    rm -f  ${SMOOTH_DIR}/${BASE}_tmp
-done
-
-MATRIX_MATRIX_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_matrix`
-for FILE in ${MATRIX_MATRIX_FILE}
-do
-    echo $FILE
-    BASE=${FILE##*/}
-    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
-done
-
-AAT_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _aat`
-for FILE in ${AAT_FILE}
-do
-    echo $FILE
-    BASE=${FILE##*/}
-    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
-done
-
-
-ATA_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _ata`
-for FILE in ${ATA_FILE}
-do
-    echo $FILE
-    BASE=${FILE##*/}
-    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
-done
-
-### no smoothing for tinyvector and matrices libs
-
-TINY_BLITZ_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tiny_blitz`
-for FILE in ${TINY_BLITZ_FILE}
-do
-    echo $FILE
-    BASE=${FILE##*/}
-    cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE}
-done
-
-TVMET_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tvmet`
-for FILE in ${TVMET_FILE}
-do
-    echo $FILE
-    BASE=${FILE##*/}
-    cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE}
-done
--- a/bench/btl/generic_bench/bench.hh
+++ b/bench/btl/generic_bench/bench.hh
@@ -1,149 +0,0 @@
-//=====================================================
-// File   :  bench.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:16 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef BENCH_HH
-#define BENCH_HH
-
-#include "btl.hh"
-#include "bench_parameter.hh"
-#include <iostream>
-#include "utilities.h"
-#include "size_lin_log.hh"
-#include "xy_file.hh"
-#include <vector>
-#include <string>
-#include "timers/portable_perf_analyzer.hh"
-// #include "timers/mixed_perf_analyzer.hh"
-// #include "timers/x86_perf_analyzer.hh"
-// #include "timers/STL_perf_analyzer.hh"
-#ifdef HAVE_MKL
-extern "C" void cblas_saxpy(const int, const float, const float *, const int, float *, const int);
-#endif
-using namespace std;
-
-template <template <class> class Perf_Analyzer, class Action>
-BTL_DONT_INLINE void bench(int size_min, int size_max, int nb_point) {
-  if (BtlConfig::skipAction(Action::name())) return;
-
-  string filename = "bench_" + Action::name() + ".dat";
-
-  INFOS("starting " << filename);
-
-  // utilities
-
-  std::vector<double> tab_mflops(nb_point);
-  std::vector<int> tab_sizes(nb_point);
-
-  // matrices and vector size calculations
-  size_lin_log(nb_point, size_min, size_max, tab_sizes);
-
-  std::vector<int> oldSizes;
-  std::vector<double> oldFlops;
-  bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true);
-  int oldi = oldSizes.size() - 1;
-
-  // loop on matrix size
-  Perf_Analyzer<Action> perf_action;
-  for (int i = nb_point - 1; i >= 0; i--) {
-    // INFOS("size=" <<tab_sizes[i]<<"   ("<<nb_point-i<<"/"<<nb_point<<")");
-    std::cout << " "
-              << "size = " << tab_sizes[i] << "  " << std::flush;
-
-    BTL_DISABLE_SSE_EXCEPTIONS();
-#ifdef HAVE_MKL
-    {
-      float dummy;
-      cblas_saxpy(1, 0, &dummy, 1, &dummy, 1);
-    }
-#endif
-
-    tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
-    std::cout << tab_mflops[i];
-
-    if (hasOldResults) {
-      while (oldi >= 0 && oldSizes[oldi] > tab_sizes[i]) --oldi;
-      if (oldi >= 0 && oldSizes[oldi] == tab_sizes[i]) {
-        if (oldFlops[oldi] < tab_mflops[i])
-          std::cout << "\t > ";
-        else
-          std::cout << "\t < ";
-        std::cout << oldFlops[oldi];
-      }
-      --oldi;
-    }
-    std::cout << " MFlops    (" << nb_point - i << "/" << nb_point << ")" << std::endl;
-  }
-
-  if (!BtlConfig::Instance.overwriteResults) {
-    if (hasOldResults) {
-      // merge the two data
-      std::vector<int> newSizes;
-      std::vector<double> newFlops;
-      unsigned int i = 0;
-      unsigned int j = 0;
-      while (i < tab_sizes.size() && j < oldSizes.size()) {
-        if (tab_sizes[i] == oldSizes[j]) {
-          newSizes.push_back(tab_sizes[i]);
-          newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
-          ++i;
-          ++j;
-        } else if (tab_sizes[i] < oldSizes[j]) {
-          newSizes.push_back(tab_sizes[i]);
-          newFlops.push_back(tab_mflops[i]);
-          ++i;
-        } else {
-          newSizes.push_back(oldSizes[j]);
-          newFlops.push_back(oldFlops[j]);
-          ++j;
-        }
-      }
-      while (i < tab_sizes.size()) {
-        newSizes.push_back(tab_sizes[i]);
-        newFlops.push_back(tab_mflops[i]);
-        ++i;
-      }
-      while (j < oldSizes.size()) {
-        newSizes.push_back(oldSizes[j]);
-        newFlops.push_back(oldFlops[j]);
-        ++j;
-      }
-      tab_mflops = newFlops;
-      tab_sizes = newSizes;
-    }
-  }
-
-  // dump the result in a file  :
-  dump_xy_file(tab_sizes, tab_mflops, filename);
-}
-
-// default Perf Analyzer
-
-template <class Action>
-BTL_DONT_INLINE void bench(int size_min, int size_max, int nb_point) {
-  // if the rdtsc is not available :
-  bench<Portable_Perf_Analyzer, Action>(size_min, size_max, nb_point);
-  // if the rdtsc is available :
-  //    bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-
-  // Only for small problem size. Otherwise it will be too long
-  //   bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-  //   bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-}
-
-#endif
--- a/bench/btl/generic_bench/bench_parameter.hh
+++ b/bench/btl/generic_bench/bench_parameter.hh
@@ -1,53 +0,0 @@
-//=====================================================
-// File   :  bench_parameter.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:16 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef BENCH_PARAMETER_HH
-#define BENCH_PARAMETER_HH
-
-// minimal time for each measurement
-#define REAL_TYPE float
-// minimal time for each measurement
-#define MIN_TIME 0.2
-// nb of point on bench curves
-#define NB_POINT 100
-// min vector size for axpy bench
-#define MIN_AXPY 5
-// max vector size for axpy bench
-#define MAX_AXPY 3000000
-// min matrix size for matrix vector product bench
-#define MIN_MV 5
-// max matrix size for matrix vector product bench
-#define MAX_MV 5000
-// min matrix size for matrix matrix product bench
-#define MIN_MM 5
-// max matrix size for matrix matrix product bench
-#define MAX_MM MAX_MV
-// min matrix size for LU bench
-#define MIN_LU 5
-// max matrix size for LU bench
-#define MAX_LU 3000
-// max size for tiny vector and matrix
-#define TINY_MV_MAX_SIZE 16
-// default nb_sample for x86 timer
-#define DEFAULT_NB_SAMPLE 1000
-
-// how many times we run a single bench (keep the best perf)
-#define DEFAULT_NB_TRIES 3
-
-#endif
--- a/bench/btl/generic_bench/btl.hh
+++ b/bench/btl/generic_bench/btl.hh
@@ -1,205 +0,0 @@
-//=====================================================
-// File   :  btl.hh
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef BTL_HH
-#define BTL_HH
-
-#include "bench_parameter.hh"
-#include <iostream>
-#include <algorithm>
-#include <vector>
-#include <string>
-#include "utilities.h"
-
-#if (defined __GNUC__)
-#define BTL_ALWAYS_INLINE __attribute__((always_inline)) inline
-#else
-#define BTL_ALWAYS_INLINE inline
-#endif
-
-#if (defined __GNUC__)
-#define BTL_DONT_INLINE __attribute__((noinline))
-#else
-#define BTL_DONT_INLINE
-#endif
-
-#if (defined __GNUC__)
-#define BTL_ASM_COMMENT(X) asm("#" X)
-#else
-#define BTL_ASM_COMMENT(X)
-#endif
-
-#ifdef __SSE__
-#include "xmmintrin.h"
-// This enables flush to zero (FTZ) and denormals are zero (DAZ) modes:
-#define BTL_DISABLE_SSE_EXCEPTIONS() \
-  { _mm_setcsr(_mm_getcsr() | 0x8040); }
-#else
-#define BTL_DISABLE_SSE_EXCEPTIONS()
-#endif
-
-/** Enhanced std::string
- */
-class BtlString : public std::string {
- public:
-  BtlString() : std::string() {}
-  BtlString(const BtlString& str) : std::string(static_cast<const std::string&>(str)) {}
-  BtlString(const std::string& str) : std::string(str) {}
-  BtlString(const char* str) : std::string(str) {}
-
-  operator const char*() const { return c_str(); }
-
-  void trim(bool left = true, bool right = true) {
-    int lspaces, rspaces, len = length(), i;
-    lspaces = rspaces = 0;
-
-    if (left)
-      for (i = 0; i < len && (at(i) == ' ' || at(i) == '\t' || at(i) == '\r' || at(i) == '\n'); ++lspaces, ++i)
-        ;
-
-    if (right && lspaces < len)
-      for (i = len - 1; i >= 0 && (at(i) == ' ' || at(i) == '\t' || at(i) == '\r' || at(i) == '\n'); rspaces++, i--)
-        ;
-
-    *this = substr(lspaces, len - lspaces - rspaces);
-  }
-
-  std::vector<BtlString> split(const BtlString& delims = "\t\n ") const {
-    std::vector<BtlString> ret;
-    unsigned int numSplits = 0;
-    size_t start, pos;
-    start = 0;
-    do {
-      pos = find_first_of(delims, start);
-      if (pos == start) {
-        ret.push_back("");
-        start = pos + 1;
-      } else if (pos == npos)
-        ret.push_back(substr(start));
-      else {
-        ret.push_back(substr(start, pos - start));
-        start = pos + 1;
-      }
-      // start = find_first_not_of(delims, start);
-      ++numSplits;
-    } while (pos != npos);
-    return ret;
-  }
-
-  bool endsWith(const BtlString& str) const {
-    if (str.size() > this->size()) return false;
-    return this->substr(this->size() - str.size(), str.size()) == str;
-  }
-  bool contains(const BtlString& str) const { return this->find(str) < this->size(); }
-  bool beginsWith(const BtlString& str) const {
-    if (str.size() > this->size()) return false;
-    return this->substr(0, str.size()) == str;
-  }
-
-  BtlString toLowerCase(void) {
-    std::transform(begin(), end(), begin(), static_cast<int (*)(int)>(::tolower));
-    return *this;
-  }
-  BtlString toUpperCase(void) {
-    std::transform(begin(), end(), begin(), static_cast<int (*)(int)>(::toupper));
-    return *this;
-  }
-
-  /** Case insensitive comparison.
-   */
-  bool isEquiv(const BtlString& str) const {
-    BtlString str0 = *this;
-    str0.toLowerCase();
-    BtlString str1 = str;
-    str1.toLowerCase();
-    return str0 == str1;
-  }
-
-  /** Decompose the current string as a path and a file.
-      For instance: "dir1/dir2/file.ext" leads to path="dir1/dir2/" and filename="file.ext"
-  */
-  void decomposePathAndFile(BtlString& path, BtlString& filename) const {
-    std::vector<BtlString> elements = this->split("/\\");
-    path = "";
-    filename = elements.back();
-    elements.pop_back();
-    if (this->at(0) == '/') path = "/";
-    for (unsigned int i = 0; i < elements.size(); ++i) path += elements[i] + "/";
-  }
-};
-
-class BtlConfig {
- public:
-  BtlConfig() : overwriteResults(false), checkResults(true), realclock(false), tries(DEFAULT_NB_TRIES) {
-    char* _config;
-    _config = getenv("BTL_CONFIG");
-    if (_config != NULL) {
-      std::vector<BtlString> config = BtlString(_config).split(" \t\n");
-      for (unsigned int i = 0; i < config.size(); i++) {
-        if (config[i].beginsWith("-a")) {
-          if (i + 1 == config.size()) {
-            std::cerr << "error processing option: " << config[i] << "\n";
-            exit(2);
-          }
-          Instance.m_selectedActionNames = config[i + 1].split(":");
-
-          i += 1;
-        } else if (config[i].beginsWith("-t")) {
-          if (i + 1 == config.size()) {
-            std::cerr << "error processing option: " << config[i] << "\n";
-            exit(2);
-          }
-          Instance.tries = atoi(config[i + 1].c_str());
-
-          i += 1;
-        } else if (config[i].beginsWith("--overwrite")) {
-          Instance.overwriteResults = true;
-        } else if (config[i].beginsWith("--nocheck")) {
-          Instance.checkResults = false;
-        } else if (config[i].beginsWith("--real")) {
-          Instance.realclock = true;
-        }
-      }
-    }
-
-    BTL_DISABLE_SSE_EXCEPTIONS();
-  }
-
-  BTL_DONT_INLINE static bool skipAction(const std::string& _name) {
-    if (Instance.m_selectedActionNames.empty()) return false;
-
-    BtlString name(_name);
-    for (unsigned int i = 0; i < Instance.m_selectedActionNames.size(); ++i)
-      if (name.contains(Instance.m_selectedActionNames[i])) return false;
-
-    return true;
-  }
-
-  static BtlConfig Instance;
-  bool overwriteResults;
-  bool checkResults;
-  bool realclock;
-  int tries;
-
- protected:
-  std::vector<BtlString> m_selectedActionNames;
-};
-
-#define BTL_MAIN BtlConfig BtlConfig::Instance
-
-#endif  // BTL_HH
--- a/bench/btl/generic_bench/init/init_function.hh
+++ b/bench/btl/generic_bench/init/init_function.hh
@@ -1,35 +0,0 @@
-//=====================================================
-// File   :  init_function.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:18 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef INIT_FUNCTION_HH
-#define INIT_FUNCTION_HH
-
-double simple_function(int index) { return index; }
-
-double simple_function(int index_i, int index_j) { return index_i + index_j; }
-
-double pseudo_random(int /*index*/) { return std::rand() / double(RAND_MAX); }
-
-double pseudo_random(int /*index_i*/, int /*index_j*/) { return std::rand() / double(RAND_MAX); }
-
-double null_function(int /*index*/) { return 0.0; }
-
-double null_function(int /*index_i*/, int /*index_j*/) { return 0.0; }
-
-#endif
--- a/bench/btl/generic_bench/init/init_matrix.hh
+++ b/bench/btl/generic_bench/init/init_matrix.hh
@@ -1,61 +0,0 @@
-//=====================================================
-// File   :  init_matrix.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef INIT_MATRIX_HH
-#define INIT_MATRIX_HH
-
-// The Vector class must satisfy the following part of STL vector concept :
-//            resize() method
-//            [] operator for setting element
-//            value_type defined
-template <double init_function(int, int), class Vector>
-BTL_DONT_INLINE void init_row(Vector& X, int size, int row) {
-  X.resize(size);
-
-  for (unsigned int j = 0; j < X.size(); j++) {
-    X[j] = typename Vector::value_type(init_function(row, j));
-  }
-}
-
-// Matrix is a Vector of Vector
-// The Matrix class must satisfy the following part of STL vector concept :
-//            resize() method
-//            [] operator for setting rows
-template <double init_function(int, int), class Vector>
-BTL_DONT_INLINE void init_matrix(Vector& A, int size) {
-  A.resize(size);
-  for (unsigned int row = 0; row < A.size(); row++) {
-    init_row<init_function>(A[row], size, row);
-  }
-}
-
-template <double init_function(int, int), class Matrix>
-BTL_DONT_INLINE void init_matrix_symm(Matrix& A, int size) {
-  A.resize(size);
-  for (unsigned int row = 0; row < A.size(); row++) A[row].resize(size);
-  for (unsigned int row = 0; row < A.size(); row++) {
-    A[row][row] = init_function(row, row);
-    for (unsigned int col = 0; col < row; col++) {
-      double x = init_function(row, col);
-      A[row][col] = A[col][row] = x;
-    }
-  }
-}
-
-#endif
--- a/bench/btl/generic_bench/init/init_vector.hh
+++ b/bench/btl/generic_bench/init/init_vector.hh
@@ -1,36 +0,0 @@
-//=====================================================
-// File   :  init_vector.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:18 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef INIT_VECTOR_HH
-#define INIT_VECTOR_HH
-
-// The Vector class must satisfy the following part of STL vector concept :
-//            resize() method
-//            [] operator for setting element
-//            value_type defined
-template <double init_function(int), class Vector>
-void init_vector(Vector& X, int size) {
-  X.resize(size);
-
-  for (unsigned int i = 0; i < X.size(); i++) {
-    X[i] = typename Vector::value_type(init_function(i));
-  }
-}
-
-#endif
--- a/bench/btl/generic_bench/static/bench_static.hh
+++ b/bench/btl/generic_bench/static/bench_static.hh
@@ -1,61 +0,0 @@
-//=====================================================
-// File   :  bench_static.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:16 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef BENCH_STATIC_HH
-#define BENCH_STATIC_HH
-
-#include "btl.hh"
-#include "bench_parameter.hh"
-#include <iostream>
-#include "utilities.h"
-#include "xy_file.hh"
-#include "static/static_size_generator.hh"
-#include "timers/portable_perf_analyzer.hh"
-// #include "timers/mixed_perf_analyzer.hh"
-// #include "timers/x86_perf_analyzer.hh"
-
-using namespace std;
-
-template <template <class> class Perf_Analyzer, template <class> class Action, template <class, int> class Interface>
-BTL_DONT_INLINE void bench_static(void) {
-  if (BtlConfig::skipAction(Action<Interface<REAL_TYPE, 10> >::name())) return;
-
-  string filename = "bench_" + Action<Interface<REAL_TYPE, 10> >::name() + ".dat";
-
-  INFOS("starting " << filename);
-
-  const int max_size = TINY_MV_MAX_SIZE;
-
-  std::vector<double> tab_mflops;
-  std::vector<double> tab_sizes;
-
-  static_size_generator<max_size, Perf_Analyzer, Action, Interface>::go(tab_sizes, tab_mflops);
-
-  dump_xy_file(tab_sizes, tab_mflops, filename);
-}
-
-// default Perf Analyzer
-template <template <class> class Action, template <class, int> class Interface>
-BTL_DONT_INLINE void bench_static(void) {
-  bench_static<Portable_Perf_Analyzer, Action, Interface>();
-  // bench_static<Mixed_Perf_Analyzer,Action,Interface>();
-  // bench_static<X86_Perf_Analyzer,Action,Interface>();
-}
-
-#endif
--- a/bench/btl/generic_bench/static/intel_bench_fixed_size.hh
+++ b/bench/btl/generic_bench/static/intel_bench_fixed_size.hh
@@ -1,60 +0,0 @@
-//=====================================================
-// File   :  intel_bench_fixed_size.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar déc 3 18:59:37 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef _BENCH_FIXED_SIZE_HH_
-#define _BENCH_FIXED_SIZE_HH_
-
-#include "utilities.h"
-#include "function_time.hh"
-
-template <class Action>
-double bench_fixed_size(int size, unsigned long long& nb_calc, unsigned long long& nb_init) {
-  Action action(size);
-
-  double time_baseline = time_init(nb_init, action);
-
-  while (time_baseline < MIN_TIME) {
-    // INFOS("nb_init="<<nb_init);
-    // INFOS("time_baseline="<<time_baseline);
-    nb_init *= 2;
-    time_baseline = time_init(nb_init, action);
-  }
-
-  time_baseline = time_baseline / (double(nb_init));
-
-  double time_action = time_calculate(nb_calc, action);
-
-  while (time_action < MIN_TIME) {
-    nb_calc *= 2;
-    time_action = time_calculate(nb_calc, action);
-  }
-
-  INFOS("nb_init=" << nb_init);
-  INFOS("nb_calc=" << nb_calc);
-
-  time_action = time_action / (double(nb_calc));
-
-  action.check_result();
-
-  time_action = time_action - time_baseline;
-
-  return action.nb_op_base() / (time_action * 1000000.0);
-}
-
-#endif
--- a/bench/btl/generic_bench/static/static_size_generator.hh
+++ b/bench/btl/generic_bench/static/static_size_generator.hh
@@ -1,52 +0,0 @@
-//=====================================================
-// File   :  static_size_generator.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar déc 3 18:59:36 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef _STATIC_SIZE_GENERATOR_HH
-#define _STATIC_SIZE_GENERATOR_HH
-#include <vector>
-
-using namespace std;
-
-// recursive generation of statically defined matrix and vector sizes
-
-template <int SIZE, template <class> class Perf_Analyzer, template <class> class Action,
-          template <class, int> class Interface>
-struct static_size_generator {
-  static void go(vector<double>& tab_sizes, vector<double>& tab_mflops) {
-    tab_sizes.push_back(SIZE);
-    std::cout << tab_sizes.back() << " \t" << std::flush;
-    Perf_Analyzer<Action<Interface<REAL_TYPE, SIZE> > > perf_action;
-    tab_mflops.push_back(perf_action.eval_mflops(SIZE));
-    std::cout << tab_mflops.back() << " MFlops" << std::endl;
-    static_size_generator<SIZE - 1, Perf_Analyzer, Action, Interface>::go(tab_sizes, tab_mflops);
-  };
-};
-
-// recursion end
-
-template <template <class> class Perf_Analyzer, template <class> class Action, template <class, int> class Interface>
-struct static_size_generator<1, Perf_Analyzer, Action, Interface> {
-  static void go(vector<double>& tab_sizes, vector<double>& tab_mflops) {
-    tab_sizes.push_back(1);
-    Perf_Analyzer<Action<Interface<REAL_TYPE, 1> > > perf_action;
-    tab_mflops.push_back(perf_action.eval_mflops(1));
-  };
-};
-
-#endif
--- a/bench/btl/generic_bench/timers/STL_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/STL_perf_analyzer.hh
@@ -1,70 +0,0 @@
-//=====================================================
-// File   :  STL_perf_analyzer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar déc 3 18:59:35 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef _STL_PERF_ANALYSER_HH
-#define _STL_PERF_ANALYSER_HH
-
-#include "STL_timer.hh"
-#include "bench_parameter.hh"
-
-template <class ACTION>
-class STL_Perf_Analyzer {
- public:
-  STL_Perf_Analyzer(unsigned long long nb_sample = DEFAULT_NB_SAMPLE) : _nb_sample(nb_sample), _chronos() {
-    MESSAGE("STL_Perf_Analyzer Ctor");
-  };
-  STL_Perf_Analyzer(const STL_Perf_Analyzer&) {
-    INFOS("Copy Ctor not implemented");
-    exit(0);
-  };
-  ~STL_Perf_Analyzer(void) { MESSAGE("STL_Perf_Analyzer Dtor"); };
-
-  inline double eval_mflops(int size) {
-    ACTION action(size);
-
-    _chronos.start_baseline(_nb_sample);
-
-    do {
-      action.initialize();
-    } while (_chronos.check());
-
-    double baseline_time = _chronos.get_time();
-
-    _chronos.start(_nb_sample);
-    do {
-      action.initialize();
-      action.calculate();
-    } while (_chronos.check());
-
-    double calculate_time = _chronos.get_time();
-
-    double corrected_time = calculate_time - baseline_time;
-
-    //    cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
-
-    return action.nb_op_base() / (corrected_time * 1000000.0);
-    // return action.nb_op_base()/(calculate_time*1000000.0);
-  }
-
- private:
-  STL_Timer _chronos;
-  unsigned long long _nb_sample;
-};
-
-#endif
--- a/bench/btl/generic_bench/timers/STL_timer.hh
+++ b/bench/btl/generic_bench/timers/STL_timer.hh
@@ -1,75 +0,0 @@
-//=====================================================
-// File   :  STL_Timer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar déc 3 18:59:35 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-// STL Timer Class. Adapted (L.P.) from the timer class by Musser et Al
-// described int the Book : STL Tutorial and reference guide.
-// Define a timer class for analyzing algorithm performance.
-#include <iostream>
-#include <iomanip>
-#include <vector>
-#include <map>
-#include <algorithm>
-using namespace std;
-
-class STL_Timer {
- public:
-  STL_Timer() { baseline = false; };  // Default constructor
-  // Start a series of r trials:
-  void start(unsigned int r) {
-    reps = r;
-    count = 0;
-    iterations.clear();
-    iterations.reserve(reps);
-    initial = time(0);
-  };
-  // Start a series of r trials to determine baseline time:
-  void start_baseline(unsigned int r) {
-    baseline = true;
-    start(r);
-  }
-  // Returns true if the trials have been completed, else false
-  bool check() {
-    ++count;
-    final = time(0);
-    if (initial < final) {
-      iterations.push_back(count);
-      initial = final;
-      count = 0;
-    }
-    return (iterations.size() < reps);
-  };
-  // Returns the results for external use
-  double get_time(void) {
-    sort(iterations.begin(), iterations.end());
-    return 1.0 / iterations[reps / 2];
-  };
-
- private:
-  unsigned int reps;  // Number of trials
-  // For storing loop iterations of a trial
-  vector<long> iterations;
-  // For saving initial and final times of a trial
-  time_t initial, final;
-  // For counting loop iterations of a trial
-  unsigned long count;
-  // true if this is a baseline computation, false otherwise
-  bool baseline;
-  // For recording the baseline time
-  double baseline_time;
-};
--- a/bench/btl/generic_bench/timers/mixed_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/mixed_perf_analyzer.hh
@@ -1,58 +0,0 @@
-//=====================================================
-// File   :  mixed_perf_analyzer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar déc 3 18:59:36 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef _MIXED_PERF_ANALYSER_HH
-#define _MIXED_PERF_ANALYSER_HH
-
-#include "x86_perf_analyzer.hh"
-#include "portable_perf_analyzer.hh"
-
-// choose portable perf analyzer for long calculations and x86 analyser for short ones
-
-template <class Action>
-class Mixed_Perf_Analyzer {
- public:
-  Mixed_Perf_Analyzer(void) : _x86pa(), _ppa(), _use_ppa(true) { MESSAGE("Mixed_Perf_Analyzer Ctor"); };
-  Mixed_Perf_Analyzer(const Mixed_Perf_Analyzer&) {
-    INFOS("Copy Ctor not implemented");
-    exit(0);
-  };
-  ~Mixed_Perf_Analyzer(void) { MESSAGE("Mixed_Perf_Analyzer Dtor"); };
-
-  inline double eval_mflops(int size) {
-    double result = 0.0;
-    if (_use_ppa) {
-      result = _ppa.eval_mflops(size);
-      if (_ppa.get_nb_calc() > DEFAULT_NB_SAMPLE) {
-        _use_ppa = false;
-      }
-    } else {
-      result = _x86pa.eval_mflops(size);
-    }
-
-    return result;
-  }
-
- private:
-  Portable_Perf_Analyzer<Action> _ppa;
-  X86_Perf_Analyzer<Action> _x86pa;
-  bool _use_ppa;
-};
-
-#endif
--- a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
@@ -1,89 +0,0 @@
-//=====================================================
-// File   :  portable_perf_analyzer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef _PORTABLE_PERF_ANALYZER_HH
-#define _PORTABLE_PERF_ANALYZER_HH
-
-#include "utilities.h"
-#include "timers/portable_timer.hh"
-
-template <class Action>
-class Portable_Perf_Analyzer {
- public:
-  Portable_Perf_Analyzer() : _nb_calc(0), m_time_action(0), _chronos() { MESSAGE("Portable_Perf_Analyzer Ctor"); };
-  Portable_Perf_Analyzer(const Portable_Perf_Analyzer&) {
-    INFOS("Copy Ctor not implemented");
-    exit(0);
-  };
-  ~Portable_Perf_Analyzer() { MESSAGE("Portable_Perf_Analyzer Dtor"); };
-
-  BTL_DONT_INLINE double eval_mflops(int size) {
-    Action action(size);
-
-    //     action.initialize();
-    //     time_action = time_calculate(action);
-    while (m_time_action < MIN_TIME) {
-      if (_nb_calc == 0)
-        _nb_calc = 1;
-      else
-        _nb_calc *= 2;
-      action.initialize();
-      m_time_action = time_calculate(action);
-    }
-
-    // optimize
-    for (int i = 1; i < BtlConfig::Instance.tries; ++i) {
-      Action _action(size);
-      std::cout << " " << _action.nb_op_base() * _nb_calc / (m_time_action * 1e6) << " ";
-      _action.initialize();
-      m_time_action = std::min(m_time_action, time_calculate(_action));
-    }
-
-    double time_action = m_time_action / (double(_nb_calc));
-
-    // check
-    if (BtlConfig::Instance.checkResults && size < 128) {
-      action.initialize();
-      action.calculate();
-      action.check_result();
-    }
-    return action.nb_op_base() / (time_action * 1e6);
-  }
-
-  BTL_DONT_INLINE double time_calculate(Action& action) {
-    // time measurement
-    action.calculate();
-    _chronos.start();
-    for (unsigned int ii = 0; ii < _nb_calc; ii++) {
-      action.calculate();
-    }
-    _chronos.stop();
-    return _chronos.user_time();
-  }
-
-  unsigned long long get_nb_calc() { return _nb_calc; }
-
- private:
-  unsigned long long _nb_calc;
-  double m_time_action;
-  Portable_Timer _chronos;
-};
-
-#endif  //_PORTABLE_PERF_ANALYZER_HH
--- a/bench/btl/generic_bench/timers/portable_perf_analyzer_old.hh
+++ b/bench/btl/generic_bench/timers/portable_perf_analyzer_old.hh
@@ -1,110 +0,0 @@
-//=====================================================
-// File   :  portable_perf_analyzer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef _PORTABLE_PERF_ANALYZER_HH
-#define _PORTABLE_PERF_ANALYZER_HH
-
-#include "utilities.h"
-#include "timers/portable_timer.hh"
-
-template <class Action>
-class Portable_Perf_Analyzer {
- public:
-  Portable_Perf_Analyzer(void) : _nb_calc(1), _nb_init(1), _chronos() { MESSAGE("Portable_Perf_Analyzer Ctor"); };
-  Portable_Perf_Analyzer(const Portable_Perf_Analyzer&) {
-    INFOS("Copy Ctor not implemented");
-    exit(0);
-  };
-  ~Portable_Perf_Analyzer(void) { MESSAGE("Portable_Perf_Analyzer Dtor"); };
-
-  inline double eval_mflops(int size) {
-    Action action(size);
-
-    //     double time_baseline = time_init(action);
-    //     while (time_baseline < MIN_TIME_INIT)
-    //     {
-    //       _nb_init *= 2;
-    //       time_baseline = time_init(action);
-    //     }
-    //
-    //     // optimize
-    //     for (int i=1; i<NB_TRIES; ++i)
-    //       time_baseline = std::min(time_baseline, time_init(action));
-    //
-    //     time_baseline = time_baseline/(double(_nb_init));
-
-    double time_action = time_calculate(action);
-    while (time_action < MIN_TIME) {
-      _nb_calc *= 2;
-      time_action = time_calculate(action);
-    }
-
-    // optimize
-    for (int i = 1; i < NB_TRIES; ++i) time_action = std::min(time_action, time_calculate(action));
-
-    //     INFOS("size="<<size);
-    //     INFOS("_nb_init="<<_nb_init);
-    //     INFOS("_nb_calc="<<_nb_calc);
-
-    time_action = time_action / (double(_nb_calc));
-
-    action.check_result();
-
-    double time_baseline = time_init(action);
-    for (int i = 1; i < NB_TRIES; ++i) time_baseline = std::min(time_baseline, time_init(action));
-    time_baseline = time_baseline / (double(_nb_init));
-
-    //     INFOS("time_baseline="<<time_baseline);
-    //     INFOS("time_action="<<time_action);
-
-    time_action = time_action - time_baseline;
-
-    //     INFOS("time_corrected="<<time_action);
-
-    return action.nb_op_base() / (time_action * 1000000.0);
-  }
-
-  inline double time_init(Action& action) {
-    // time measurement
-    _chronos.start();
-    for (int ii = 0; ii < _nb_init; ii++) action.initialize();
-    _chronos.stop();
-    return _chronos.user_time();
-  }
-
-  inline double time_calculate(Action& action) {
-    // time measurement
-    _chronos.start();
-    for (int ii = 0; ii < _nb_calc; ii++) {
-      action.initialize();
-      action.calculate();
-    }
-    _chronos.stop();
-    return _chronos.user_time();
-  }
-
-  unsigned long long get_nb_calc(void) { return _nb_calc; }
-
- private:
-  unsigned long long _nb_calc;
-  unsigned long long _nb_init;
-  Portable_Timer _chronos;
-};
-
-#endif  //_PORTABLE_PERF_ANALYZER_HH
--- a/bench/btl/generic_bench/timers/portable_timer.hh
+++ b/bench/btl/generic_bench/timers/portable_timer.hh
@@ -1,143 +0,0 @@
-//=====================================================
-// File   :  portable_timer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)> from boost lib
-// Copyright (C) EDF R&D,  lun sep 30 14:23:17 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-//  simple_time extracted from the boost library
-//
-#ifndef _PORTABLE_TIMER_HH
-#define _PORTABLE_TIMER_HH
-
-#include <ctime>
-#include <cstdlib>
-
-#include <time.h>
-
-#define USEC_IN_SEC 1000000
-
-//  timer  -------------------------------------------------------------------//
-
-//  A timer object measures CPU time.
-#if defined(_MSC_VER)
-
-#define NOMINMAX
-#include <windows.h>
-
-/*#ifndef hr_timer
-#include "hr_time.h"
-#define hr_timer
-#endif*/
-
-class Portable_Timer {
- public:
-  typedef struct {
-    LARGE_INTEGER start;
-    LARGE_INTEGER stop;
-  } stopWatch;
-
-  Portable_Timer() {
-    startVal.QuadPart = 0;
-    stopVal.QuadPart = 0;
-    QueryPerformanceFrequency(&frequency);
-  }
-
-  void start() { QueryPerformanceCounter(&startVal); }
-
-  void stop() { QueryPerformanceCounter(&stopVal); }
-
-  double elapsed() {
-    LARGE_INTEGER time;
-    time.QuadPart = stopVal.QuadPart - startVal.QuadPart;
-    return LIToSecs(time);
-  }
-
-  double user_time() { return elapsed(); }
-
- private:
-  double LIToSecs(LARGE_INTEGER& L) { return ((double)L.QuadPart / (double)frequency.QuadPart); }
-
-  LARGE_INTEGER startVal;
-  LARGE_INTEGER stopVal;
-  LARGE_INTEGER frequency;
-
-};  // Portable_Timer
-
-#elif defined(__APPLE__)
-#include <CoreServices/CoreServices.h>
-#include <mach/mach_time.h>
-
-class Portable_Timer {
- public:
-  Portable_Timer() {}
-
-  void start() {
-    m_start_time = double(mach_absolute_time()) * 1e-9;
-    ;
-  }
-
-  void stop() {
-    m_stop_time = double(mach_absolute_time()) * 1e-9;
-    ;
-  }
-
-  double elapsed() { return user_time(); }
-
-  double user_time() { return m_stop_time - m_start_time; }
-
- private:
-  double m_stop_time, m_start_time;
-
-};  // Portable_Timer (Apple)
-
-#else
-
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <unistd.h>
-#include <sys/times.h>
-
-class Portable_Timer {
- public:
-  Portable_Timer() { m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID; }
-
-  Portable_Timer(int clkid) : m_clkid(clkid) {}
-
-  void start() {
-    timespec ts;
-    clock_gettime(m_clkid, &ts);
-    m_start_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
-  }
-
-  void stop() {
-    timespec ts;
-    clock_gettime(m_clkid, &ts);
-    m_stop_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
-  }
-
-  double elapsed() { return user_time(); }
-
-  double user_time() { return m_stop_time - m_start_time; }
-
- private:
-  int m_clkid;
-  double m_stop_time, m_start_time;
-
-};  // Portable_Timer (Linux)
-
-#endif
-
-#endif  // PORTABLE_TIMER_HPP
--- a/bench/btl/generic_bench/timers/x86_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/x86_perf_analyzer.hh
@@ -1,91 +0,0 @@
-//=====================================================
-// File   :  x86_perf_analyzer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef _X86_PERF_ANALYSER_HH
-#define _X86_PERF_ANALYSER_HH
-
-#include "x86_timer.hh"
-#include "bench_parameter.hh"
-
-template <class ACTION>
-class X86_Perf_Analyzer {
- public:
-  X86_Perf_Analyzer(unsigned long long nb_sample = DEFAULT_NB_SAMPLE) : _nb_sample(nb_sample), _chronos() {
-    MESSAGE("X86_Perf_Analyzer Ctor");
-    _chronos.find_frequency();
-  };
-  X86_Perf_Analyzer(const X86_Perf_Analyzer&) {
-    INFOS("Copy Ctor not implemented");
-    exit(0);
-  };
-  ~X86_Perf_Analyzer(void) { MESSAGE("X86_Perf_Analyzer Dtor"); };
-
-  inline double eval_mflops(int size) {
-    ACTION action(size);
-
-    int nb_loop = 5;
-    double calculate_time = 0.0;
-    double baseline_time = 0.0;
-
-    for (int j = 0; j < nb_loop; j++) {
-      _chronos.clear();
-
-      for (int i = 0; i < _nb_sample; i++) {
-        _chronos.start();
-        action.initialize();
-        action.calculate();
-        _chronos.stop();
-        _chronos.add_get_click();
-      }
-
-      calculate_time += double(_chronos.get_shortest_clicks()) / _chronos.frequency();
-
-      if (j == 0) action.check_result();
-
-      _chronos.clear();
-
-      for (int i = 0; i < _nb_sample; i++) {
-        _chronos.start();
-        action.initialize();
-        _chronos.stop();
-        _chronos.add_get_click();
-      }
-
-      baseline_time += double(_chronos.get_shortest_clicks()) / _chronos.frequency();
-    }
-
-    double corrected_time = (calculate_time - baseline_time) / double(nb_loop);
-
-    //     INFOS("_nb_sample="<<_nb_sample);
-    //     INFOS("baseline_time="<<baseline_time);
-    //     INFOS("calculate_time="<<calculate_time);
-    //     INFOS("corrected_time="<<corrected_time);
-
-    //    cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
-
-    return action.nb_op_base() / (corrected_time * 1000000.0);
-    // return action.nb_op_base()/(calculate_time*1000000.0);
-  }
-
- private:
-  X86_Timer _chronos;
-  unsigned long long _nb_sample;
-};
-
-#endif
--- a/bench/btl/generic_bench/timers/x86_timer.hh
+++ b/bench/btl/generic_bench/timers/x86_timer.hh
@@ -1,176 +0,0 @@
-//=====================================================
-// File   :  x86_timer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef _X86_TIMER_HH
-#define _X86_TIMER_HH
-
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <unistd.h>
-#include <sys/times.h>
-// #include "system_time.h"
-#define u32 unsigned int
-#include <asm/msr.h>
-#include "utilities.h"
-#include <map>
-#include <fstream>
-#include <string>
-#include <iostream>
-
-// CPU frequency in Hz
-// #define FREQUENCY 648000000
-// #define FREQUENCY 1400000000
-#define FREQUENCY 1695000000
-
-using namespace std;
-
-class X86_Timer {
- public:
-  X86_Timer(void) : _frequency(FREQUENCY), _nb_sample(0) { MESSAGE("X86_Timer Default Ctor"); }
-
-  inline void start(void) { rdtsc(_click_start.n32[0], _click_start.n32[1]); }
-
-  inline void stop(void) { rdtsc(_click_stop.n32[0], _click_stop.n32[1]); }
-
-  inline double frequency(void) { return _frequency; }
-
-  double get_elapsed_time_in_second(void) { return (_click_stop.n64 - _click_start.n64) / double(FREQUENCY); }
-
-  unsigned long long get_click(void) { return (_click_stop.n64 - _click_start.n64); }
-
-  inline void find_frequency(void) {
-    time_t initial, final;
-    int dummy = 2;
-
-    initial = time(0);
-    start();
-    do {
-      dummy += 2;
-    } while (time(0) == initial);
-    // We are at the start of a one-second cycle
-    initial = time(0);
-    start();
-    do {
-      dummy += 2;
-    } while (time(0) == initial);
-    final = time(0);
-    stop();
-    //    INFOS("fine grained time : "<<  get_elapsed_time_in_second());
-    //  INFOS("coarse grained time : "<<  final-initial);
-    _frequency = _frequency * get_elapsed_time_in_second() / double(final - initial);
-    ///  INFOS("CPU frequency : "<<  _frequency);
-  }
-
-  void add_get_click(void) {
-    _nb_sample++;
-    _counted_clicks[get_click()]++;
-    fill_history_clicks();
-  }
-
-  void dump_statistics(string filemane) {
-    ofstream outfile(filemane.c_str(), ios::out);
-
-    std::map<unsigned long long, unsigned long long>::iterator itr;
-    for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
-      outfile << (*itr).first << "  " << (*itr).second << endl;
-    }
-
-    outfile.close();
-  }
-
-  void dump_history(string filemane) {
-    ofstream outfile(filemane.c_str(), ios::out);
-
-    for (int i = 0; i < _history_mean_clicks.size(); i++) {
-      outfile << i << " " << _history_mean_clicks[i] << " " << _history_shortest_clicks[i] << " "
-              << _history_most_occured_clicks[i] << endl;
-    }
-
-    outfile.close();
-  }
-
-  double get_mean_clicks(void) {
-    std::map<unsigned long long, unsigned long long>::iterator itr;
-
-    unsigned long long mean_clicks = 0;
-
-    for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
-      mean_clicks += (*itr).second * (*itr).first;
-    }
-
-    return mean_clicks / double(_nb_sample);
-  }
-
-  double get_shortest_clicks(void) { return double((*_counted_clicks.begin()).first); }
-
-  void fill_history_clicks(void) {
-    _history_mean_clicks.push_back(get_mean_clicks());
-    _history_shortest_clicks.push_back(get_shortest_clicks());
-    _history_most_occured_clicks.push_back(get_most_occured_clicks());
-  }
-
-  double get_most_occured_clicks(void) {
-    unsigned long long moc = 0;
-    unsigned long long max_occurence = 0;
-
-    std::map<unsigned long long, unsigned long long>::iterator itr;
-
-    for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
-      if (max_occurence <= (*itr).second) {
-        max_occurence = (*itr).second;
-        moc = (*itr).first;
-      }
-    }
-
-    return double(moc);
-  }
-
-  void clear(void) {
-    _counted_clicks.clear();
-
-    _history_mean_clicks.clear();
-    _history_shortest_clicks.clear();
-    _history_most_occured_clicks.clear();
-
-    _nb_sample = 0;
-  }
-
- private:
-  union {
-    unsigned long int n32[2];
-    unsigned long long n64;
-  } _click_start;
-
-  union {
-    unsigned long int n32[2];
-    unsigned long long n64;
-  } _click_stop;
-
-  double _frequency;
-
-  map<unsigned long long, unsigned long long> _counted_clicks;
-
-  vector<double> _history_mean_clicks;
-  vector<double> _history_shortest_clicks;
-  vector<double> _history_most_occured_clicks;
-
-  unsigned long long _nb_sample;
-};
-
-#endif
--- a/bench/btl/generic_bench/utils/size_lin_log.hh
+++ b/bench/btl/generic_bench/utils/size_lin_log.hh
@@ -1,56 +0,0 @@
-//=====================================================
-// File   :  size_lin_log.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  mar déc 3 18:59:37 CET 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef SIZE_LIN_LOG
-#define SIZE_LIN_LOG
-
-#include "size_log.hh"
-
-template <class Vector>
-void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector& X) {
-  int ten = 10;
-  int nine = 9;
-
-  X.resize(nb_point);
-
-  if (nb_point > ten) {
-    for (int i = 0; i < nine; i++) {
-      X[i] = i + 1;
-    }
-
-    Vector log_size;
-    size_log(nb_point - nine, ten, size_max, log_size);
-
-    for (int i = 0; i < nb_point - nine; i++) {
-      X[i + nine] = log_size[i];
-    }
-  } else {
-    for (int i = 0; i < nb_point; i++) {
-      X[i] = i + 1;
-    }
-  }
-
-  //  for (int i=0;i<nb_point;i++){
-
-  //        INFOS("computed sizes : X["<<i<<"]="<<X[i]);
-
-  //   }
-}
-
-#endif
--- a/bench/btl/generic_bench/utils/size_log.hh
+++ b/bench/btl/generic_bench/utils/size_log.hh
@@ -1,50 +0,0 @@
-//=====================================================
-// File   :  size_log.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:17 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef SIZE_LOG
-#define SIZE_LOG
-
-#include "math.h"
-// The Vector class must satisfy the following part of STL vector concept :
-//            resize() method
-//            [] operator for setting element
-// the vector element are int compatible.
-template <class Vector>
-void size_log(const int nb_point, const int size_min, const int size_max, Vector& X) {
-  X.resize(nb_point);
-
-  float ls_min = log(float(size_min));
-  float ls_max = log(float(size_max));
-
-  float ls = 0.0;
-
-  float delta_ls = (ls_max - ls_min) / (float(nb_point - 1));
-
-  int size = 0;
-
-  for (int i = 0; i < nb_point; i++) {
-    ls = ls_min + float(i) * delta_ls;
-
-    size = int(exp(ls));
-
-    X[i] = size;
-  }
-}
-
-#endif
--- a/bench/btl/generic_bench/utils/utilities.h
+++ b/bench/btl/generic_bench/utils/utilities.h
@@ -1,130 +0,0 @@
-//=============================================================================
-// File      : utilities.h
-// Created   : mar jun 19 13:18:14 CEST 2001
-// Author    : Antoine YESSAYAN, Paul RASCLE, EDF
-// Project   : SALOME
-// Copyright : EDF 2001
-// $Header$
-//=============================================================================
-
-/* ---  Definition macros file to print information if _DEBUG_ is defined --- */
-
-#ifndef UTILITIES_H
-#define UTILITIES_H
-
-#include <stdlib.h>
-// # include <iostream> ok for gcc3.01
-#include <iostream>
-
-/* ---  INFOS is always defined (without _DEBUG_): to be used for warnings, with release version --- */
-
-#define HEREWEARE \
-  cout << flush;  \
-  cerr << __FILE__ << " [" << __LINE__ << "] : " << flush;
-#define INFOS(chain)       \
-  {                        \
-    HEREWEARE;             \
-    cerr << chain << endl; \
-  }
-#define PYSCRIPT(chain)                         \
-  {                                             \
-    cout << flush;                              \
-    cerr << "---PYSCRIPT--- " << chain << endl; \
-  }
-
-/* --- To print date and time of compilation of current source on stdout --- */
-
-#if defined(__NVCOMPILER)
-#define COMPILER "nvc++";
-#elif defined(__GNUC__)
-#define COMPILER "g++";
-#elif defined(__sun)
-#define COMPILER "CC";
-#elif defined(__KCC)
-#define COMPILER "KCC";
-#elif defined(__PGI)
-#define COMPILER "pgCC";
-#else
-#define COMPILER "undefined";
-#endif
-
-#ifdef INFOS_COMPILATION
-#error INFOS_COMPILATION already defined
-#endif
-#define INFOS_COMPILATION                 \
-  {                                       \
-    cerr << flush;                        \
-    cout << __FILE__;                     \
-    cout << " [" << __LINE__ << "] : ";   \
-    cout << "COMPILED with " << COMPILER; \
-    cout << ", " << __DATE__;             \
-    cout << " at " << __TIME__ << endl;   \
-    cout << "\n\n";                       \
-    cout << flush;                        \
-  }
-
-#ifdef _DEBUG_
-
-/* --- the following MACROS are useful at debug time --- */
-
-#define HERE     \
-  cout << flush; \
-  cerr << "- Trace " << __FILE__ << " [" << __LINE__ << "] : " << flush;
-#define SCRUTE(var) \
-  HERE;             \
-  cerr << #var << "=" << var << endl;
-#define MESSAGE(chain)     \
-  {                        \
-    HERE;                  \
-    cerr << chain << endl; \
-  }
-#define INTERRUPTION(code)                              \
-  HERE;                                                 \
-  cerr << "INTERRUPTION return code= " << code << endl; \
-  exit(code);
-
-#ifndef ASSERT
-#define ASSERT(condition)                                          \
-  if (!(condition)) {                                              \
-    HERE;                                                          \
-    cerr << "CONDITION " << #condition << " NOT VERIFIED" << endl; \
-    INTERRUPTION(1);                                               \
-  }
-#endif /* ASSERT */
-
-#define REPERE   \
-  cout << flush; \
-  cerr << "   --------------" << endl << flush;
-#define BEGIN_OF(chain)                    \
-  {                                        \
-    REPERE;                                \
-    HERE;                                  \
-    cerr << "Begin of: " << chain << endl; \
-    REPERE;                                \
-  }
-#define END_OF(chain)                           \
-  {                                             \
-    REPERE;                                     \
-    HERE;                                       \
-    cerr << "Normal end of: " << chain << endl; \
-    REPERE;                                     \
-  }
-
-#else /* ifdef _DEBUG_*/
-
-#define HERE
-#define SCRUTE(var)
-#define MESSAGE(chain)
-#define INTERRUPTION(code)
-
-#ifndef ASSERT
-#define ASSERT(condition)
-#endif /* ASSERT */
-
-#define REPERE
-#define BEGIN_OF(chain)
-#define END_OF(chain)
-
-#endif /* ifdef _DEBUG_*/
-
-#endif /* ifndef UTILITIES_H */
--- a/bench/btl/generic_bench/utils/xy_file.hh
+++ b/bench/btl/generic_bench/utils/xy_file.hh
@@ -1,71 +0,0 @@
-//=====================================================
-// File   :  dump_file_x_y.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>
-// Copyright (C) EDF R&D,  lun sep 30 14:23:20 CEST 2002
-//=====================================================
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; either version 2
-// of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-//
-#ifndef XY_FILE_HH
-#define XY_FILE_HH
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <vector>
-using namespace std;
-
-bool read_xy_file(const std::string& filename, std::vector<int>& tab_sizes, std::vector<double>& tab_mflops,
-                  bool quiet = false) {
-  std::ifstream input_file(filename.c_str(), std::ios::in);
-
-  if (!input_file) {
-    if (!quiet) {
-      INFOS("!!! Error opening " << filename);
-    }
-    return false;
-  }
-
-  int nb_point = 0;
-  int size = 0;
-  double mflops = 0;
-
-  while (input_file >> size >> mflops) {
-    nb_point++;
-    tab_sizes.push_back(size);
-    tab_mflops.push_back(mflops);
-  }
-  SCRUTE(nb_point);
-
-  input_file.close();
-  return true;
-}
-
-// The Vector class must satisfy the following part of STL vector concept :
-//            resize() method
-//            [] operator for setting element
-// the vector element must have the << operator define
-
-using namespace std;
-
-template <class Vector_A, class Vector_B>
-void dump_xy_file(const Vector_A& X, const Vector_B& Y, const std::string& filename) {
-  ofstream outfile(filename.c_str(), ios::out);
-  int size = X.size();
-
-  for (int i = 0; i < size; i++) outfile << X[i] << " " << Y[i] << endl;
-
-  outfile.close();
-}
-
-#endif
--- a/bench/btl/libs/BLAS/CMakeLists.txt
+++ b/bench/btl/libs/BLAS/CMakeLists.txt
@@ -1,47 +0,0 @@
-
-find_package(ATLAS)
-if (ATLAS_FOUND)
-  btl_add_bench(btl_atlas main.cpp)
-  if(BUILD_btl_atlas)
-    target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
-    set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
-  endif()
-endif ()
-
-find_package(MKL)
-if (MKL_FOUND)
-  btl_add_bench(btl_mkl main.cpp)
-  if(BUILD_btl_mkl)
-    target_link_libraries(btl_mkl ${MKL_LIBRARIES})
-    set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
-  endif()
-endif ()
-
-
-find_package(OPENBLAS)
-if (OPENBLAS_FOUND)
-  btl_add_bench(btl_openblas main.cpp)
-  if(BUILD_btl_openblas)
-    target_link_libraries(btl_openblas ${OPENBLAS_LIBRARIES} )
-    set_target_properties(btl_openblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=OPENBLAS")
-  endif()
-endif ()
-
-find_package(ACML)
-if (ACML_FOUND)
-  btl_add_bench(btl_acml main.cpp)
-  if(BUILD_btl_acml)
-    target_link_libraries(btl_acml ${ACML_LIBRARIES} )
-    set_target_properties(btl_acml PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ACML -DHAS_LAPACK=1")
-  endif()
-endif ()
-
-if(Eigen_SOURCE_DIR AND CMAKE_Fortran_COMPILER_WORKS)
-  # we are inside Eigen and blas/lapack interface is compilable
-  include_directories(${Eigen_SOURCE_DIR})
-  btl_add_bench(btl_eigenblas main.cpp)
-  if(BUILD_btl_eigenblas)
-    target_link_libraries(btl_eigenblas eigen_blas eigen_lapack )
-    set_target_properties(btl_eigenblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=EigenBLAS")
-  endif()
-endif()
--- a/Show More
+++ b/Show More