mirror of
https://gitlab.com/libeigen/eigen.git
synced 2026-04-10 11:34:33 +08:00
Remove obsolete bench/ and btl/ directories
libeigen/eigen!2217 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
This commit is contained in:
@@ -74,9 +74,6 @@ if (EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)
|
||||
option(EIGEN_BUILD_SPBENCH "Build sparse benchmark suite" OFF)
|
||||
option(EIGEN_BUILD_AOCL_BENCH "Build AOCL benchmark" OFF)
|
||||
# Avoid building docs if included from another project.
|
||||
# Building documentation requires creating and running executables on the host
|
||||
# platform. We shouldn't do this if cross-compiling.
|
||||
@@ -93,7 +90,7 @@ if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
|
||||
endif()
|
||||
option(EIGEN_BUILD_CMAKE_PACKAGE "Enables the creation of EigenConfig.cmake and related files" ${PROJECT_IS_TOP_LEVEL})
|
||||
|
||||
if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILT_BTL OR EIGEN_BUILD_BTL OR EIGEN_BUILD_SPBENCH OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS)
|
||||
if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS)
|
||||
set(EIGEN_IS_BUILDING_ ON)
|
||||
endif()
|
||||
|
||||
@@ -764,66 +761,6 @@ if(EIGEN_BUILD_DOC)
|
||||
add_subdirectory(doc EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"?
|
||||
if(EIGEN_BUILD_BTL)
|
||||
add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
if(NOT WIN32 AND EIGEN_BUILD_SPBENCH)
|
||||
add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
#--------------------------------------------------------------------------------------#
|
||||
# AOCL BENCHMARK BUILD SECTION #
|
||||
#--------------------------------------------------------------------------------------#
|
||||
if(EIGEN_BUILD_AOCL_BENCH)
|
||||
# Allow users to override the default architecture
|
||||
set(EIGEN_AOCL_BENCH_ARCH "znver5" CACHE STRING "Target architecture for AOCL benchmark")
|
||||
add_executable(benchmark_aocl EXCLUDE_FROM_ALL bench/benchmark_aocl.cpp)
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("-march=${EIGEN_AOCL_BENCH_ARCH}" COMPILER_SUPPORTS_AOCL_ARCH)
|
||||
if(COMPILER_SUPPORTS_AOCL_ARCH)
|
||||
target_compile_options(benchmark_aocl PRIVATE -O3 -Wno-shadow -march=${EIGEN_AOCL_BENCH_ARCH})
|
||||
else()
|
||||
message(WARNING "${EIGEN_AOCL_BENCH_ARCH} architecture not supported by compiler")
|
||||
target_compile_options(benchmark_aocl PRIVATE -O3)
|
||||
endif()
|
||||
|
||||
# Add custom flags if provided
|
||||
if(EIGEN_AOCL_BENCH_FLAGS)
|
||||
separate_arguments(CUSTOM_FLAGS NATIVE_COMMAND "${EIGEN_AOCL_BENCH_FLAGS}")
|
||||
target_compile_options(benchmark_aocl PRIVATE ${CUSTOM_FLAGS})
|
||||
# Check if OpenMP is requested in custom flags and link it
|
||||
string(FIND "${EIGEN_AOCL_BENCH_FLAGS}" "-fopenmp" OPENMP_REQUESTED)
|
||||
if(NOT OPENMP_REQUESTED EQUAL -1)
|
||||
find_package(OpenMP)
|
||||
if(OpenMP_CXX_FOUND)
|
||||
target_link_libraries(benchmark_aocl OpenMP::OpenMP_CXX)
|
||||
else()
|
||||
# Generic fallback: let compiler handle OpenMP linking
|
||||
if(MSVC)
|
||||
target_compile_options(benchmark_aocl PRIVATE "/openmp")
|
||||
else()
|
||||
target_compile_options(benchmark_aocl PRIVATE "-fopenmp")
|
||||
target_link_options(benchmark_aocl PRIVATE "-fopenmp")
|
||||
endif()
|
||||
message(STATUS "Using compiler OpenMP flags as fallback")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
target_include_directories(benchmark_aocl PRIVATE ${INCLUDE_INSTALL_DIR})
|
||||
if(EIGEN_AOCL_BENCH_USE_MT)
|
||||
target_compile_definitions(benchmark_aocl PRIVATE EIGEN_USE_AOCL_MT)
|
||||
else()
|
||||
target_compile_definitions(benchmark_aocl PRIVATE EIGEN_USE_AOCL_ALL)
|
||||
endif()
|
||||
target_link_libraries(benchmark_aocl Eigen3::Eigen)
|
||||
if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
|
||||
target_link_libraries(benchmark_aocl ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
|
||||
endif()
|
||||
endif()
|
||||
#----------------------------------------------------------------------------------------#
|
||||
|
||||
if (EIGEN_BUILD_DEMOS)
|
||||
add_subdirectory(demos EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
@@ -872,9 +809,6 @@ if(PROJECT_IS_TOP_LEVEL)
|
||||
if (EIGEN_BUILD_LAPACK)
|
||||
message(STATUS "lapack | Build LAPACK subset library (not the same thing as Eigen)")
|
||||
endif()
|
||||
if(EIGEN_BUILD_AOCL_BENCH)
|
||||
message(STATUS "benchmark_aocl | Build AOCL benchmark executable")
|
||||
endif()
|
||||
message(STATUS "------------+--------------------------------------------------------------")
|
||||
message(STATUS "")
|
||||
endif()
|
||||
|
||||
@@ -6,6 +6,6 @@ Some files contain third-party code under BSD, LGPL, Apache, or other
|
||||
MPL2-compatible licenses, hence the other COPYING.* files here.
|
||||
|
||||
Note that some optional external dependencies (e.g. FFTW, MPFR C++)
|
||||
and some bundled benchmark code (bench/btl/) are distributed under
|
||||
different licenses, including the GPL. Refer to the individual source
|
||||
files and their respective COPYING files for details.
|
||||
are distributed under different licenses, including the GPL. Refer to
|
||||
the individual source files and their respective COPYING files for
|
||||
details.
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
|
||||
#include <Eigen/Sparse>
|
||||
#include <bench/BenchTimer.h>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SIZE
|
||||
#define SIZE 1024
|
||||
#endif
|
||||
|
||||
#ifndef DENSITY
|
||||
#define DENSITY 0.01
|
||||
#endif
|
||||
|
||||
#ifndef SCALAR
|
||||
#define SCALAR double
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
|
||||
typedef Matrix<Scalar, Dynamic, 1> DenseVector;
|
||||
typedef SparseMatrix<Scalar> EigenSparseMatrix;
|
||||
|
||||
void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst) {
|
||||
dst.reserve(double(rows) * cols * density);
|
||||
for (int j = 0; j < cols; j++) {
|
||||
for (int i = 0; i < rows; i++) {
|
||||
Scalar v = (internal::random<float>(0, 1) < density) ? internal::random<Scalar>() : 0;
|
||||
if (v != 0) dst.insert(i, j) = v;
|
||||
}
|
||||
}
|
||||
dst.finalize();
|
||||
}
|
||||
|
||||
void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst) {
|
||||
// std::cout << "alloc " << nnzPerCol*cols << "\n";
|
||||
dst.reserve(nnzPerCol * cols);
|
||||
for (int j = 0; j < cols; j++) {
|
||||
std::set<int> aux;
|
||||
for (int i = 0; i < nnzPerCol; i++) {
|
||||
int k = internal::random<int>(0, rows - 1);
|
||||
while (aux.find(k) != aux.end()) k = internal::random<int>(0, rows - 1);
|
||||
aux.insert(k);
|
||||
|
||||
dst.insert(k, j) = internal::random<Scalar>();
|
||||
}
|
||||
}
|
||||
dst.finalize();
|
||||
}
|
||||
|
||||
void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst) {
|
||||
dst.setZero();
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
|
||||
}
|
||||
|
||||
#ifndef NOGMM
|
||||
#include "gmm/gmm.h"
|
||||
typedef gmm::csc_matrix<Scalar> GmmSparse;
|
||||
typedef gmm::col_matrix<gmm::wsvector<Scalar> > GmmDynSparse;
|
||||
void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst) {
|
||||
GmmDynSparse tmp(src.rows(), src.cols());
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) tmp(it.index(), j) = it.value();
|
||||
gmm::copy(tmp, dst);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef NOMTL
|
||||
#include <boost/numeric/mtl/mtl.hpp>
|
||||
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::col_major> > MtlSparse;
|
||||
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::row_major> > MtlSparseRowMajor;
|
||||
void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst) {
|
||||
mtl::matrix::inserter<MtlSparse> ins(dst);
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) ins[it.index()][j] = it.value();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CSPARSE
|
||||
extern "C" {
|
||||
#include "cs.h"
|
||||
}
|
||||
void eiToCSparse(const EigenSparseMatrix& src, cs*& dst) {
|
||||
cs* aux = cs_spalloc(0, 0, 1, 1, 1);
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
|
||||
if (!cs_entry(aux, it.index(), j, it.value())) {
|
||||
std::cout << "cs_entry error\n";
|
||||
exit(2);
|
||||
}
|
||||
dst = cs_compress(aux);
|
||||
// cs_spfree(aux);
|
||||
}
|
||||
#endif // CSPARSE
|
||||
|
||||
#ifndef NOUBLAS
|
||||
#include <boost/numeric/ublas/vector.hpp>
|
||||
#include <boost/numeric/ublas/matrix.hpp>
|
||||
#include <boost/numeric/ublas/io.hpp>
|
||||
#include <boost/numeric/ublas/triangular.hpp>
|
||||
#include <boost/numeric/ublas/vector_sparse.hpp>
|
||||
#include <boost/numeric/ublas/matrix_sparse.hpp>
|
||||
#include <boost/numeric/ublas/vector_of_vector.hpp>
|
||||
#include <boost/numeric/ublas/operation.hpp>
|
||||
|
||||
typedef boost::numeric::ublas::compressed_matrix<Scalar, boost::numeric::ublas::column_major> UBlasSparse;
|
||||
|
||||
void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst) {
|
||||
dst.resize(src.rows(), src.cols(), false);
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
|
||||
}
|
||||
|
||||
template <typename EigenType, typename UblasType>
|
||||
void eiToUblasVec(const EigenType& src, UblasType& dst) {
|
||||
dst.resize(src.size());
|
||||
for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef OSKI
|
||||
extern "C" {
|
||||
#include <oski/oski.h>
|
||||
}
|
||||
#endif
|
||||
@@ -1,176 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_BENCH_TIMERR_H
|
||||
#define EIGEN_BENCH_TIMERR_H
|
||||
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#define EIGEN_BT_UNDEF_NOMINMAX
|
||||
#endif
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
static void escape(void *p) {
|
||||
#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
|
||||
asm volatile("" : : "g"(p) : "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void clobber() {
|
||||
#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
|
||||
asm volatile("" : : : "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
#include <Eigen/Core>
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
enum { CPU_TIMER = 0, REAL_TIMER = 1 };
|
||||
|
||||
/** Elapsed time timer keeping the best try.
|
||||
*
|
||||
* On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
|
||||
* On Windows we use QueryPerformanceCounter
|
||||
*
|
||||
* Important: on linux, you must link with -lrt
|
||||
*/
|
||||
class BenchTimer {
|
||||
public:
|
||||
BenchTimer() {
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
LARGE_INTEGER freq;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
m_frequency = (double)freq.QuadPart;
|
||||
#endif
|
||||
reset();
|
||||
}
|
||||
|
||||
~BenchTimer() {}
|
||||
|
||||
inline void reset() {
|
||||
m_bests.fill(1e9);
|
||||
m_worsts.fill(0);
|
||||
m_totals.setZero();
|
||||
}
|
||||
inline void start() {
|
||||
m_starts[CPU_TIMER] = getCpuTime();
|
||||
m_starts[REAL_TIMER] = getRealTime();
|
||||
}
|
||||
inline void stop() {
|
||||
m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER];
|
||||
m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER];
|
||||
#if EIGEN_VERSION_AT_LEAST(2, 90, 0)
|
||||
m_bests = m_bests.cwiseMin(m_times);
|
||||
m_worsts = m_worsts.cwiseMax(m_times);
|
||||
#else
|
||||
m_bests(0) = std::min(m_bests(0), m_times(0));
|
||||
m_bests(1) = std::min(m_bests(1), m_times(1));
|
||||
m_worsts(0) = std::max(m_worsts(0), m_times(0));
|
||||
m_worsts(1) = std::max(m_worsts(1), m_times(1));
|
||||
#endif
|
||||
m_totals += m_times;
|
||||
}
|
||||
|
||||
/** Return the elapsed time in seconds between the last start/stop pair
|
||||
*/
|
||||
inline double value(int TIMER = CPU_TIMER) const { return m_times[TIMER]; }
|
||||
|
||||
/** Return the best elapsed time in seconds
|
||||
*/
|
||||
inline double best(int TIMER = CPU_TIMER) const { return m_bests[TIMER]; }
|
||||
|
||||
/** Return the worst elapsed time in seconds
|
||||
*/
|
||||
inline double worst(int TIMER = CPU_TIMER) const { return m_worsts[TIMER]; }
|
||||
|
||||
/** Return the total elapsed time in seconds.
|
||||
*/
|
||||
inline double total(int TIMER = CPU_TIMER) const { return m_totals[TIMER]; }
|
||||
|
||||
inline double getCpuTime() const {
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER query_ticks;
|
||||
QueryPerformanceCounter(&query_ticks);
|
||||
return query_ticks.QuadPart / m_frequency;
|
||||
#elif __APPLE__
|
||||
return double(mach_absolute_time()) * 1e-9;
|
||||
#else
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
|
||||
return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline double getRealTime() const {
|
||||
#ifdef _WIN32
|
||||
SYSTEMTIME st;
|
||||
GetSystemTime(&st);
|
||||
return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
|
||||
#elif __APPLE__
|
||||
return double(mach_absolute_time()) * 1e-9;
|
||||
#else
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
double m_frequency;
|
||||
#endif
|
||||
Vector2d m_starts;
|
||||
Vector2d m_times;
|
||||
Vector2d m_bests;
|
||||
Vector2d m_worsts;
|
||||
Vector2d m_totals;
|
||||
|
||||
public:
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
||||
};
|
||||
|
||||
#define BENCH(TIMER, TRIES, REP, CODE) \
|
||||
{ \
|
||||
TIMER.reset(); \
|
||||
for (int bench_tries_ = 0; bench_tries_ < TRIES; ++bench_tries_) { \
|
||||
TIMER.start(); \
|
||||
for (int bench_reps_ = 0; bench_reps_ < REP; ++bench_reps_) { \
|
||||
CODE; \
|
||||
} \
|
||||
TIMER.stop(); \
|
||||
clobber(); \
|
||||
} \
|
||||
}
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
// clean #defined tokens
|
||||
#ifdef EIGEN_BT_UNDEF_NOMINMAX
|
||||
#undef EIGEN_BT_UNDEF_NOMINMAX
|
||||
#undef NOMINMAX
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
|
||||
#undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
|
||||
#undef WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_BENCH_TIMERR_H
|
||||
@@ -1,86 +0,0 @@
|
||||
|
||||
#ifndef EIGEN_BENCH_UTIL_H
|
||||
#define EIGEN_BENCH_UTIL_H
|
||||
|
||||
#include <Eigen/Core>
|
||||
#include "BenchTimer.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
#include <boost/preprocessor/repetition/enum_params.hpp>
|
||||
#include <boost/preprocessor/repetition.hpp>
|
||||
#include <boost/preprocessor/seq.hpp>
|
||||
#include <boost/preprocessor/array.hpp>
|
||||
#include <boost/preprocessor/arithmetic.hpp>
|
||||
#include <boost/preprocessor/comparison.hpp>
|
||||
#include <boost/preprocessor/punctuation.hpp>
|
||||
#include <boost/preprocessor/punctuation/comma.hpp>
|
||||
#include <boost/preprocessor/stringize.hpp>
|
||||
|
||||
template <typename MatrixType>
|
||||
void initMatrix_random(MatrixType& mat) __attribute__((noinline));
|
||||
template <typename MatrixType>
|
||||
void initMatrix_random(MatrixType& mat) {
|
||||
mat.setRandom(); // = MatrixType::random(mat.rows(), mat.cols());
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
|
||||
template <typename MatrixType>
|
||||
void initMatrix_identity(MatrixType& mat) {
|
||||
mat.setIdentity();
|
||||
}
|
||||
|
||||
#ifndef __INTEL_COMPILER
|
||||
#define DISABLE_SSE_EXCEPTIONS() \
|
||||
{ \
|
||||
int aux; \
|
||||
asm("stmxcsr %[aux] \n\t" \
|
||||
"orl $32832, %[aux] \n\t" \
|
||||
"ldmxcsr %[aux] \n\t" \
|
||||
: \
|
||||
: [aux] "m"(aux)); \
|
||||
}
|
||||
#else
|
||||
#define DISABLE_SSE_EXCEPTIONS()
|
||||
#endif
|
||||
|
||||
#ifdef BENCH_GMM
|
||||
#include <gmm/gmm.h>
|
||||
template <typename EigenMatrixType, typename GmmMatrixType>
|
||||
void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst) {
|
||||
dst.resize(src.rows(), src.cols());
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BENCH_GSL
|
||||
#include <gsl/gsl_matrix.h>
|
||||
#include <gsl/gsl_linalg.h>
|
||||
#include <gsl/gsl_eigen.h>
|
||||
template <typename EigenMatrixType>
|
||||
void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst) {
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (int i = 0; i < src.rows(); ++i) gsl_matrix_set(*dst, i, j, src.coeff(i, j));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BENCH_UBLAS
|
||||
#include <boost/numeric/ublas/matrix.hpp>
|
||||
#include <boost/numeric/ublas/vector.hpp>
|
||||
template <typename EigenMatrixType, typename UblasMatrixType>
|
||||
void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst) {
|
||||
dst.resize(src.rows(), src.cols());
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
|
||||
}
|
||||
template <typename EigenType, typename UblasType>
|
||||
void eiToUblasVec(const EigenType& src, UblasType& dst) {
|
||||
dst.resize(src.size());
|
||||
for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_BENCH_UTIL_H
|
||||
108
bench/README.txt
108
bench/README.txt
@@ -1,108 +0,0 @@
|
||||
|
||||
This folder contains a couple of benchmark utities and Eigen benchmarks.
|
||||
|
||||
****************************
|
||||
* bench_multi_compilers.sh *
|
||||
****************************
|
||||
|
||||
This script allows to run a benchmark on a set of different compilers/compiler options.
|
||||
It takes two arguments:
|
||||
- a file defining the list of the compilers with their options
|
||||
- the .cpp file of the benchmark
|
||||
|
||||
Examples:
|
||||
|
||||
$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp
|
||||
|
||||
g++-4.1 -O3 -DNDEBUG -finline-limit=10000
|
||||
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||
0.271102 0.131416 0.422322 0.198633
|
||||
0.201658 0.102436 0.397566 0.207282
|
||||
|
||||
g++-4.2 -O3 -DNDEBUG -finline-limit=10000
|
||||
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||
0.107805 0.0890579 0.30265 0.161843
|
||||
0.127157 0.0712581 0.278341 0.191029
|
||||
|
||||
g++-4.3 -O3 -DNDEBUG -finline-limit=10000
|
||||
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||
0.134318 0.105291 0.3704 0.180966
|
||||
0.137703 0.0732472 0.31225 0.202204
|
||||
|
||||
icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size
|
||||
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||
0.226145 0.0941319 0.371873 0.159433
|
||||
0.109302 0.0837538 0.328102 0.173891
|
||||
|
||||
|
||||
$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp
|
||||
|
||||
g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp
|
||||
double, fixed-size 4x4: 0.00165105s 0.0778739s
|
||||
double, 32x32: 0.0654769s 0.075289s => x0.869674 (2)
|
||||
double, 128x128: 0.054148s 0.0419669s => x1.29025 (2)
|
||||
double, 512x512: 0.913799s 0.428533s => x2.13239 (2)
|
||||
double, 1024x1024: 14.5972s 9.3542s => x1.5605 (2)
|
||||
|
||||
icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp
|
||||
double, fixed-size 4x4: 0.000589848s 0.019949s
|
||||
double, 32x32: 0.0682781s 0.0449722s => x1.51823 (2)
|
||||
double, 128x128: 0.0547509s 0.0435519s => x1.25714 (2)
|
||||
double, 512x512: 0.829436s 0.424438s => x1.9542 (2)
|
||||
double, 1024x1024: 14.5243s 10.7735s => x1.34815 (2)
|
||||
|
||||
|
||||
|
||||
************************
|
||||
* benchmark_aocl *
|
||||
************************
|
||||
|
||||
This benchmark exercises Eigen operations using AMD Optimized Libraries
|
||||
(AOCL). It is disabled by default and can be enabled when configuring the
|
||||
build:
|
||||
|
||||
cmake .. -DEIGEN_BUILD_AOCL_BENCH=ON
|
||||
|
||||
The resulting `benchmark_aocl` target is compiled with `-O3` and, if the
|
||||
compiler supports it, `-march=znver5` for optimal performance on AMD
|
||||
processors.
|
||||
|
||||
The benchmark also links to `libblis-mt.so` and `libflame.so` so BLAS and
|
||||
LAPACK operations run with multithreaded AOCL when available.
|
||||
|
||||
By default the CMake build defines `EIGEN_USE_AOCL_MT` via the option
|
||||
`EIGEN_AOCL_BENCH_USE_MT` (enabled). Set this option to `OFF` if you want
|
||||
to build the benchmark using the single-threaded AOCL libraries instead,
|
||||
in which case `EIGEN_USE_AOCL_ALL` is defined.
|
||||
|
||||
|
||||
|
||||
Alternatively you can build the same benchmark using the
|
||||
`Makefile` in this directory. This allows experimenting with
|
||||
different compiler flags without reconfiguring CMake:
|
||||
|
||||
```
|
||||
cd bench && make # builds with -O3 -march=znver5 by default
|
||||
make clean && make CXX="clang++" ## For different compiler apart from g++
|
||||
make clean && make MARCH="" CXXFLAGS="-O2" # example of custom flags
|
||||
make AOCL_ROOT=/opt/aocl # use AOCL from a custom location
|
||||
|
||||
This Makefile links against `libblis-mt.so` and `libflame.so` so the
|
||||
matrix multiplication benchmark exercises multithreaded BLIS when
|
||||
`EIGEN_USE_AOCL_MT` is defined (enabled by default in the Makefile).
|
||||
|
||||
If you prefer to compile manually, ensure that the Eigen include path
|
||||
points to the directory where `AOCL_Support.h` resides. For example:
|
||||
|
||||
|
||||
clang++ -O3 -std=c++14 -I../build/install/include \
|
||||
-march=znver5 -DEIGEN_USE_AOCL_MT \
|
||||
benchmark_aocl.cpp -o benchmark_aocl \
|
||||
-lblis-mt -lflame -lamdlibm -lpthread -lm
|
||||
```
|
||||
Replace `../install/include` with your actual Eigen install path.
|
||||
|
||||
When invoking `make`, you can point `AOCL_ROOT` to your AOCL
|
||||
installation directory so the Makefile links against `$(AOCL_ROOT)/lib`.
|
||||
|
||||
|
||||
@@ -1,772 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace std;
|
||||
|
||||
const int default_precision = 4;
|
||||
|
||||
// see --only-cubic-sizes
|
||||
bool only_cubic_sizes = false;
|
||||
|
||||
// see --dump-tables
|
||||
bool dump_tables = false;
|
||||
|
||||
uint8_t log2_pot(size_t x) {
|
||||
size_t l = 0;
|
||||
while (x >>= 1) l++;
|
||||
return l;
|
||||
}
|
||||
|
||||
uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
|
||||
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
|
||||
}
|
||||
|
||||
// just a helper to store a triple of K,M,N sizes for matrix product
|
||||
struct size_triple_t {
|
||||
uint16_t k, m, n;
|
||||
size_triple_t() : k(0), m(0), n(0) {}
|
||||
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
|
||||
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
|
||||
size_triple_t(uint16_t compact) {
|
||||
k = 1 << ((compact & 0xf00) >> 8);
|
||||
m = 1 << ((compact & 0x0f0) >> 4);
|
||||
n = 1 << ((compact & 0x00f) >> 0);
|
||||
}
|
||||
bool is_cubic() const { return k == m && m == n; }
|
||||
};
|
||||
|
||||
ostream& operator<<(ostream& s, const size_triple_t& t) { return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; }
|
||||
|
||||
struct inputfile_entry_t {
|
||||
uint16_t product_size;
|
||||
uint16_t pot_block_size;
|
||||
size_triple_t nonpot_block_size;
|
||||
float gflops;
|
||||
};
|
||||
|
||||
struct inputfile_t {
|
||||
enum class type_t { unknown, all_pot_sizes, default_sizes };
|
||||
|
||||
string filename;
|
||||
vector<inputfile_entry_t> entries;
|
||||
type_t type;
|
||||
|
||||
inputfile_t(const string& fname) : filename(fname), type(type_t::unknown) {
|
||||
ifstream stream(filename);
|
||||
if (!stream.is_open()) {
|
||||
cerr << "couldn't open input file: " << filename << endl;
|
||||
exit(1);
|
||||
}
|
||||
string line;
|
||||
while (getline(stream, line)) {
|
||||
if (line.empty()) continue;
|
||||
if (line.find("BEGIN MEASUREMENTS ALL POT SIZES") == 0) {
|
||||
if (type != type_t::unknown) {
|
||||
cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
|
||||
exit(1);
|
||||
}
|
||||
type = type_t::all_pot_sizes;
|
||||
continue;
|
||||
}
|
||||
if (line.find("BEGIN MEASUREMENTS DEFAULT SIZES") == 0) {
|
||||
if (type != type_t::unknown) {
|
||||
cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
|
||||
exit(1);
|
||||
}
|
||||
type = type_t::default_sizes;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (type == type_t::unknown) {
|
||||
continue;
|
||||
}
|
||||
switch (type) {
|
||||
case type_t::all_pot_sizes: {
|
||||
unsigned int product_size, block_size;
|
||||
float gflops;
|
||||
int sscanf_result = sscanf(line.c_str(), "%x %x %f", &product_size, &block_size, &gflops);
|
||||
if (3 != sscanf_result || !product_size || product_size > 0xfff || !block_size || block_size > 0xfff ||
|
||||
!isfinite(gflops)) {
|
||||
cerr << "ill-formed input file: " << filename << endl;
|
||||
cerr << "offending line:" << endl << line << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
|
||||
continue;
|
||||
}
|
||||
inputfile_entry_t entry;
|
||||
entry.product_size = uint16_t(product_size);
|
||||
entry.pot_block_size = uint16_t(block_size);
|
||||
entry.gflops = gflops;
|
||||
entries.push_back(entry);
|
||||
break;
|
||||
}
|
||||
case type_t::default_sizes: {
|
||||
unsigned int product_size;
|
||||
float gflops;
|
||||
int bk, bm, bn;
|
||||
int sscanf_result = sscanf(line.c_str(), "%x default(%d, %d, %d) %f", &product_size, &bk, &bm, &bn, &gflops);
|
||||
if (5 != sscanf_result || !product_size || product_size > 0xfff || !isfinite(gflops)) {
|
||||
cerr << "ill-formed input file: " << filename << endl;
|
||||
cerr << "offending line:" << endl << line << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
|
||||
continue;
|
||||
}
|
||||
inputfile_entry_t entry;
|
||||
entry.product_size = uint16_t(product_size);
|
||||
entry.pot_block_size = 0;
|
||||
entry.nonpot_block_size = size_triple_t(bk, bm, bn);
|
||||
entry.gflops = gflops;
|
||||
entries.push_back(entry);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
stream.close();
|
||||
if (type == type_t::unknown) {
|
||||
cerr << "Unrecognized input file " << filename << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (entries.empty()) {
|
||||
cerr << "didn't find any measurements in input file: " << filename << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct preprocessed_inputfile_entry_t {
|
||||
uint16_t product_size;
|
||||
uint16_t block_size;
|
||||
|
||||
float efficiency;
|
||||
};
|
||||
|
||||
bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) {
|
||||
return e1.efficiency < e2.efficiency;
|
||||
}
|
||||
|
||||
struct preprocessed_inputfile_t {
|
||||
string filename;
|
||||
vector<preprocessed_inputfile_entry_t> entries;
|
||||
|
||||
preprocessed_inputfile_t(const inputfile_t& inputfile) : filename(inputfile.filename) {
|
||||
if (inputfile.type != inputfile_t::type_t::all_pot_sizes) {
|
||||
abort();
|
||||
}
|
||||
auto it = inputfile.entries.begin();
|
||||
auto it_first_with_given_product_size = it;
|
||||
while (it != inputfile.entries.end()) {
|
||||
++it;
|
||||
if (it == inputfile.entries.end() || it->product_size != it_first_with_given_product_size->product_size) {
|
||||
import_input_file_range_one_product_size(it_first_with_given_product_size, it);
|
||||
it_first_with_given_product_size = it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void import_input_file_range_one_product_size(const vector<inputfile_entry_t>::const_iterator& begin,
|
||||
const vector<inputfile_entry_t>::const_iterator& end) {
|
||||
uint16_t product_size = begin->product_size;
|
||||
float max_gflops = 0.0f;
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
if (it->product_size != product_size) {
|
||||
cerr << "Unexpected ordering of entries in " << filename << endl;
|
||||
cerr << "(Expected all entries for product size " << hex << product_size << dec << " to be grouped)" << endl;
|
||||
exit(1);
|
||||
}
|
||||
max_gflops = max(max_gflops, it->gflops);
|
||||
}
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
preprocessed_inputfile_entry_t entry;
|
||||
entry.product_size = it->product_size;
|
||||
entry.block_size = it->pot_block_size;
|
||||
entry.efficiency = it->gflops / max_gflops;
|
||||
entries.push_back(entry);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void check_all_files_in_same_exact_order(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) {
|
||||
if (preprocessed_inputfiles.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[0];
|
||||
const size_t num_entries = first_file.entries.size();
|
||||
|
||||
for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
|
||||
if (preprocessed_inputfiles[i].entries.size() != num_entries) {
|
||||
cerr << "these files have different number of entries: " << preprocessed_inputfiles[i].filename << " and "
|
||||
<< first_file.filename << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t entry_index = 0; entry_index < num_entries; entry_index++) {
|
||||
const uint16_t entry_product_size = first_file.entries[entry_index].product_size;
|
||||
const uint16_t entry_block_size = first_file.entries[entry_index].block_size;
|
||||
for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) {
|
||||
const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index];
|
||||
if (cur_file.entries[entry_index].product_size != entry_product_size ||
|
||||
cur_file.entries[entry_index].block_size != entry_block_size) {
|
||||
cerr << "entries not in same order between these files: " << first_file.filename << " and " << cur_file.filename
|
||||
<< endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float efficiency_of_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<size_t>& subset) {
|
||||
if (subset.size() <= 1) {
|
||||
return 1.0f;
|
||||
}
|
||||
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
|
||||
const size_t num_entries = first_file.entries.size();
|
||||
float efficiency = 1.0f;
|
||||
size_t entry_index = 0;
|
||||
size_t first_entry_index_with_this_product_size = 0;
|
||||
uint16_t product_size = first_file.entries[0].product_size;
|
||||
while (entry_index < num_entries) {
|
||||
++entry_index;
|
||||
if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
|
||||
float efficiency_this_product_size = 0.0f;
|
||||
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
|
||||
float efficiency_this_entry = 1.0f;
|
||||
for (auto i = subset.begin(); i != subset.end(); ++i) {
|
||||
efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
|
||||
}
|
||||
efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry);
|
||||
}
|
||||
efficiency = min(efficiency, efficiency_this_product_size);
|
||||
if (entry_index < num_entries) {
|
||||
first_entry_index_with_this_product_size = entry_index;
|
||||
product_size = first_file.entries[entry_index].product_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return efficiency;
|
||||
}
|
||||
|
||||
void dump_table_for_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<size_t>& subset) {
|
||||
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
|
||||
const size_t num_entries = first_file.entries.size();
|
||||
size_t entry_index = 0;
|
||||
size_t first_entry_index_with_this_product_size = 0;
|
||||
uint16_t product_size = first_file.entries[0].product_size;
|
||||
size_t i = 0;
|
||||
size_triple_t min_product_size(first_file.entries.front().product_size);
|
||||
size_triple_t max_product_size(first_file.entries.back().product_size);
|
||||
if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) {
|
||||
abort();
|
||||
}
|
||||
if (only_cubic_sizes) {
|
||||
cerr << "Can't generate tables with --only-cubic-sizes." << endl;
|
||||
abort();
|
||||
}
|
||||
cout << "struct LookupTable {" << endl;
|
||||
cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl;
|
||||
const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1;
|
||||
const size_t TableSize = NumSizes * NumSizes * NumSizes;
|
||||
cout << " static const size_t NumSizes = " << NumSizes << ";" << endl;
|
||||
cout << " static const unsigned short* Data() {" << endl;
|
||||
cout << " static const unsigned short data[" << TableSize << "] = {";
|
||||
while (entry_index < num_entries) {
|
||||
++entry_index;
|
||||
if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
|
||||
float best_efficiency_this_product_size = 0.0f;
|
||||
uint16_t best_block_size_this_product_size = 0;
|
||||
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
|
||||
float efficiency_this_entry = 1.0f;
|
||||
for (auto i = subset.begin(); i != subset.end(); ++i) {
|
||||
efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
|
||||
}
|
||||
if (efficiency_this_entry > best_efficiency_this_product_size) {
|
||||
best_efficiency_this_product_size = efficiency_this_entry;
|
||||
best_block_size_this_product_size = first_file.entries[e].block_size;
|
||||
}
|
||||
}
|
||||
if ((i++) % NumSizes) {
|
||||
cout << " ";
|
||||
} else {
|
||||
cout << endl << " ";
|
||||
}
|
||||
cout << "0x" << hex << best_block_size_this_product_size << dec;
|
||||
if (entry_index < num_entries) {
|
||||
cout << ",";
|
||||
first_entry_index_with_this_product_size = entry_index;
|
||||
product_size = first_file.entries[entry_index].product_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i != TableSize) {
|
||||
cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl;
|
||||
abort();
|
||||
}
|
||||
cout << endl << " };" << endl;
|
||||
cout << " return data;" << endl;
|
||||
cout << " }" << endl;
|
||||
cout << "};" << endl;
|
||||
}
|
||||
|
||||
float efficiency_of_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<vector<size_t>>& partition) {
|
||||
float efficiency = 1.0f;
|
||||
for (auto s = partition.begin(); s != partition.end(); ++s) {
|
||||
efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s));
|
||||
}
|
||||
return efficiency;
|
||||
}
|
||||
|
||||
void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size) {
|
||||
assert(subset_size >= 1 && subset_size <= set_size);
|
||||
out_subset.resize(subset_size);
|
||||
for (size_t i = 0; i < subset_size; i++) {
|
||||
out_subset[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_last_subset(const vector<size_t>& subset, size_t set_size) { return subset[0] == set_size - subset.size(); }
|
||||
|
||||
void next_subset(vector<size_t>& inout_subset, size_t set_size) {
|
||||
if (is_last_subset(inout_subset, set_size)) {
|
||||
cerr << "iterating past the last subset" << endl;
|
||||
abort();
|
||||
}
|
||||
size_t i = 1;
|
||||
while (inout_subset[inout_subset.size() - i] == set_size - i) {
|
||||
i++;
|
||||
assert(i <= inout_subset.size());
|
||||
}
|
||||
size_t first_index_to_change = inout_subset.size() - i;
|
||||
inout_subset[first_index_to_change]++;
|
||||
size_t p = inout_subset[first_index_to_change];
|
||||
for (size_t j = first_index_to_change + 1; j < inout_subset.size(); j++) {
|
||||
inout_subset[j] = ++p;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t number_of_subsets_limit = 100;
|
||||
const size_t always_search_subsets_of_size_at_least = 2;
|
||||
|
||||
bool is_number_of_subsets_feasible(size_t n, size_t p) {
|
||||
assert(n > 0 && p > 0 && p <= n);
|
||||
uint64_t numerator = 1, denominator = 1;
|
||||
for (size_t i = 0; i < p; i++) {
|
||||
numerator *= n - i;
|
||||
denominator *= i + 1;
|
||||
if (numerator > denominator * number_of_subsets_limit) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t max_feasible_subset_size(size_t n) {
|
||||
assert(n > 0);
|
||||
const size_t minresult = min<size_t>(n - 1, always_search_subsets_of_size_at_least);
|
||||
for (size_t p = 1; p <= n - 1; p++) {
|
||||
if (!is_number_of_subsets_feasible(n, p + 1)) {
|
||||
return max(p, minresult);
|
||||
}
|
||||
}
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
void find_subset_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
float required_efficiency_to_beat, vector<size_t>& inout_remainder,
|
||||
vector<size_t>& out_subset) {
|
||||
out_subset.resize(0);
|
||||
|
||||
if (required_efficiency_to_beat >= 1.0f) {
|
||||
cerr << "can't beat efficiency 1." << endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
while (!inout_remainder.empty()) {
|
||||
vector<size_t> candidate_indices(inout_remainder.size());
|
||||
for (size_t i = 0; i < candidate_indices.size(); i++) {
|
||||
candidate_indices[i] = i;
|
||||
}
|
||||
|
||||
size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size());
|
||||
while (candidate_indices_subset_size >= 1) {
|
||||
vector<size_t> candidate_indices_subset;
|
||||
make_first_subset(candidate_indices_subset_size, candidate_indices_subset, candidate_indices.size());
|
||||
|
||||
vector<size_t> best_candidate_indices_subset;
|
||||
float best_efficiency = 0.0f;
|
||||
vector<size_t> trial_subset = out_subset;
|
||||
trial_subset.resize(out_subset.size() + candidate_indices_subset_size);
|
||||
while (true) {
|
||||
for (size_t i = 0; i < candidate_indices_subset_size; i++) {
|
||||
trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]];
|
||||
}
|
||||
|
||||
float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
|
||||
if (trial_efficiency > best_efficiency) {
|
||||
best_efficiency = trial_efficiency;
|
||||
best_candidate_indices_subset = candidate_indices_subset;
|
||||
}
|
||||
if (is_last_subset(candidate_indices_subset, candidate_indices.size())) {
|
||||
break;
|
||||
}
|
||||
next_subset(candidate_indices_subset, candidate_indices.size());
|
||||
}
|
||||
|
||||
if (best_efficiency > required_efficiency_to_beat) {
|
||||
for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) {
|
||||
candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]];
|
||||
}
|
||||
candidate_indices.resize(best_candidate_indices_subset.size());
|
||||
}
|
||||
candidate_indices_subset_size--;
|
||||
}
|
||||
|
||||
size_t candidate_index = candidate_indices[0];
|
||||
auto candidate_iterator = inout_remainder.begin() + candidate_index;
|
||||
vector<size_t> trial_subset = out_subset;
|
||||
|
||||
trial_subset.push_back(*candidate_iterator);
|
||||
float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
|
||||
if (trial_efficiency > required_efficiency_to_beat) {
|
||||
out_subset.push_back(*candidate_iterator);
|
||||
inout_remainder.erase(candidate_iterator);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void find_partition_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
float required_efficiency_to_beat,
|
||||
vector<vector<size_t>>& out_partition) {
|
||||
out_partition.resize(0);
|
||||
|
||||
vector<size_t> remainder;
|
||||
for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
|
||||
remainder.push_back(i);
|
||||
}
|
||||
|
||||
while (!remainder.empty()) {
|
||||
vector<size_t> new_subset;
|
||||
find_subset_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, remainder,
|
||||
new_subset);
|
||||
out_partition.push_back(new_subset);
|
||||
}
|
||||
}
|
||||
|
||||
void print_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<vector<size_t>>& partition) {
|
||||
float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
|
||||
cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl;
|
||||
for (auto subset = partition.begin(); subset != partition.end(); ++subset) {
|
||||
cout << " Subset " << (subset - partition.begin()) << ", efficiency "
|
||||
<< efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" << endl;
|
||||
for (auto file = subset->begin(); file != subset->end(); ++file) {
|
||||
cout << " " << preprocessed_inputfiles[*file].filename << endl;
|
||||
}
|
||||
if (dump_tables) {
|
||||
cout << " Table:" << endl;
|
||||
dump_table_for_subset(preprocessed_inputfiles, *subset);
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
struct action_t {
|
||||
virtual const char* invokation_name() const {
|
||||
abort();
|
||||
return nullptr;
|
||||
}
|
||||
virtual void run(const vector<string>&) const { abort(); }
|
||||
virtual ~action_t() {}
|
||||
};
|
||||
|
||||
struct partition_action_t : action_t {
|
||||
virtual const char* invokation_name() const override { return "partition"; }
|
||||
virtual void run(const vector<string>& input_filenames) const override {
|
||||
vector<preprocessed_inputfile_t> preprocessed_inputfiles;
|
||||
|
||||
if (input_filenames.empty()) {
|
||||
cerr << "The " << invokation_name() << " action needs a list of input files." << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) {
|
||||
inputfile_t inputfile(*it);
|
||||
switch (inputfile.type) {
|
||||
case inputfile_t::type_t::all_pot_sizes:
|
||||
preprocessed_inputfiles.emplace_back(inputfile);
|
||||
break;
|
||||
case inputfile_t::type_t::default_sizes:
|
||||
cerr << "The " << invokation_name() << " action only uses measurements for all pot sizes, and "
|
||||
<< "has no use for " << *it << " which contains measurements for default sizes." << endl;
|
||||
exit(1);
|
||||
break;
|
||||
default:
|
||||
cerr << "Unrecognized input file: " << *it << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
check_all_files_in_same_exact_order(preprocessed_inputfiles);
|
||||
|
||||
float required_efficiency_to_beat = 0.0f;
|
||||
vector<vector<vector<size_t>>> partitions;
|
||||
cerr << "searching for partitions...\r" << flush;
|
||||
while (true) {
|
||||
vector<vector<size_t>> partition;
|
||||
find_partition_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, partition);
|
||||
float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
|
||||
cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() << " subsets for "
|
||||
<< 100.0f * actual_efficiency << " % efficiency"
|
||||
<< " \r" << flush;
|
||||
partitions.push_back(partition);
|
||||
if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
|
||||
break;
|
||||
}
|
||||
required_efficiency_to_beat = actual_efficiency;
|
||||
}
|
||||
cerr << " " << endl;
|
||||
while (true) {
|
||||
bool repeat = false;
|
||||
for (size_t i = 0; i < partitions.size() - 1; i++) {
|
||||
if (partitions[i].size() >= partitions[i + 1].size()) {
|
||||
partitions.erase(partitions.begin() + i);
|
||||
repeat = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!repeat) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (auto it = partitions.begin(); it != partitions.end(); ++it) {
|
||||
print_partition(preprocessed_inputfiles, *it);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct evaluate_defaults_action_t : action_t {
|
||||
struct results_entry_t {
|
||||
uint16_t product_size;
|
||||
size_triple_t default_block_size;
|
||||
uint16_t best_pot_block_size;
|
||||
float default_gflops;
|
||||
float best_pot_gflops;
|
||||
float default_efficiency;
|
||||
};
|
||||
friend ostream& operator<<(ostream& s, const results_entry_t& entry) {
|
||||
return s << "Product size " << size_triple_t(entry.product_size) << ": default block size "
|
||||
<< entry.default_block_size << " -> " << entry.default_gflops
|
||||
<< " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
|
||||
<< " of best POT block size " << size_triple_t(entry.best_pot_block_size) << " -> "
|
||||
<< entry.best_pot_gflops << " GFlop/s" << dec;
|
||||
}
|
||||
static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
|
||||
return e1.default_efficiency < e2.default_efficiency;
|
||||
}
|
||||
virtual const char* invokation_name() const override { return "evaluate-defaults"; }
|
||||
void show_usage_and_exit() const {
|
||||
cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
|
||||
cerr << "checks how well the performance with default sizes compares to the best "
|
||||
<< "performance measured over all POT sizes." << endl;
|
||||
exit(1);
|
||||
}
|
||||
virtual void run(const vector<string>& input_filenames) const override {
|
||||
if (input_filenames.size() != 2) {
|
||||
show_usage_and_exit();
|
||||
}
|
||||
inputfile_t inputfile_default_sizes(input_filenames[0]);
|
||||
inputfile_t inputfile_all_pot_sizes(input_filenames[1]);
|
||||
if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) {
|
||||
cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl;
|
||||
show_usage_and_exit();
|
||||
}
|
||||
if (inputfile_all_pot_sizes.type != inputfile_t::type_t::all_pot_sizes) {
|
||||
cerr << inputfile_all_pot_sizes.filename << " is not an input file with all POT sizes." << endl;
|
||||
show_usage_and_exit();
|
||||
}
|
||||
vector<results_entry_t> results;
|
||||
vector<results_entry_t> cubic_results;
|
||||
|
||||
uint16_t product_size = 0;
|
||||
auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin();
|
||||
for (auto it_default_sizes = inputfile_default_sizes.entries.begin();
|
||||
it_default_sizes != inputfile_default_sizes.entries.end(); ++it_default_sizes) {
|
||||
if (it_default_sizes->product_size == product_size) {
|
||||
continue;
|
||||
}
|
||||
product_size = it_default_sizes->product_size;
|
||||
while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() &&
|
||||
it_all_pot_sizes->product_size != product_size) {
|
||||
++it_all_pot_sizes;
|
||||
}
|
||||
if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) {
|
||||
break;
|
||||
}
|
||||
uint16_t best_pot_block_size = 0;
|
||||
float best_pot_gflops = 0;
|
||||
for (auto it = it_all_pot_sizes; it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
|
||||
++it) {
|
||||
if (it->gflops > best_pot_gflops) {
|
||||
best_pot_gflops = it->gflops;
|
||||
best_pot_block_size = it->pot_block_size;
|
||||
}
|
||||
}
|
||||
results_entry_t entry;
|
||||
entry.product_size = product_size;
|
||||
entry.default_block_size = it_default_sizes->nonpot_block_size;
|
||||
entry.best_pot_block_size = best_pot_block_size;
|
||||
entry.default_gflops = it_default_sizes->gflops;
|
||||
entry.best_pot_gflops = best_pot_gflops;
|
||||
entry.default_efficiency = entry.default_gflops / entry.best_pot_gflops;
|
||||
results.push_back(entry);
|
||||
|
||||
size_triple_t t(product_size);
|
||||
if (t.k == t.m && t.m == t.n) {
|
||||
cubic_results.push_back(entry);
|
||||
}
|
||||
}
|
||||
|
||||
cout << "All results:" << endl;
|
||||
for (auto it = results.begin(); it != results.end(); ++it) {
|
||||
cout << *it << endl;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
sort(results.begin(), results.end(), lower_efficiency);
|
||||
|
||||
const size_t n = min<size_t>(20, results.size());
|
||||
cout << n << " worst results:" << endl;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
cout << results[i] << endl;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
cout << "cubic results:" << endl;
|
||||
for (auto it = cubic_results.begin(); it != cubic_results.end(); ++it) {
|
||||
cout << *it << endl;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
sort(cubic_results.begin(), cubic_results.end(), lower_efficiency);
|
||||
|
||||
cout.precision(2);
|
||||
vector<float> a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f};
|
||||
for (auto it = a.begin(); it != a.end(); ++it) {
|
||||
size_t n = min(results.size() - 1, size_t(*it * results.size()));
|
||||
cout << (100.0f * n / (results.size() - 1))
|
||||
<< " % of product sizes have default efficiency <= " << 100.0f * results[n].default_efficiency << " %"
|
||||
<< endl;
|
||||
}
|
||||
cout.precision(default_precision);
|
||||
}
|
||||
};
|
||||
|
||||
void show_usage_and_exit(int argc, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
|
||||
cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
|
||||
cerr << "available actions:" << endl;
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
cerr << " " << (*it)->invokation_name() << endl;
|
||||
}
|
||||
cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
cout.precision(default_precision);
|
||||
cerr.precision(default_precision);
|
||||
|
||||
vector<unique_ptr<action_t>> available_actions;
|
||||
available_actions.emplace_back(new partition_action_t);
|
||||
available_actions.emplace_back(new evaluate_defaults_action_t);
|
||||
|
||||
vector<string> input_filenames;
|
||||
|
||||
action_t* action = nullptr;
|
||||
|
||||
if (argc < 2) {
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
for (int i = 1; i < argc; i++) {
|
||||
bool arg_handled = false;
|
||||
// Step 1. Try to match action invocation names.
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
if (!strcmp(argv[i], (*it)->invokation_name())) {
|
||||
if (!action) {
|
||||
action = it->get();
|
||||
arg_handled = true;
|
||||
break;
|
||||
} else {
|
||||
cerr << "can't specify more than one action!" << endl;
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (arg_handled) {
|
||||
continue;
|
||||
}
|
||||
// Step 2. Try to match option names.
|
||||
if (argv[i][0] == '-') {
|
||||
if (!strcmp(argv[i], "--only-cubic-sizes")) {
|
||||
only_cubic_sizes = true;
|
||||
arg_handled = true;
|
||||
}
|
||||
if (!strcmp(argv[i], "--dump-tables")) {
|
||||
dump_tables = true;
|
||||
arg_handled = true;
|
||||
}
|
||||
if (!arg_handled) {
|
||||
cerr << "Unrecognized option: " << argv[i] << endl;
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
}
|
||||
if (arg_handled) {
|
||||
continue;
|
||||
}
|
||||
// Step 3. Default to interpreting args as input filenames.
|
||||
input_filenames.emplace_back(argv[i]);
|
||||
}
|
||||
|
||||
if (dump_tables && only_cubic_sizes) {
|
||||
cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl;
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
|
||||
if (!action) {
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
|
||||
action->run(input_filenames);
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG"
|
||||
# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG -finline-limit=20000"
|
||||
|
||||
# CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG"
|
||||
#CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000"
|
||||
|
||||
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG"
|
||||
#CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000"
|
||||
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate"
|
||||
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use"
|
||||
|
||||
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG"
|
||||
#CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000"
|
||||
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate"
|
||||
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use"
|
||||
|
||||
# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-genx"
|
||||
# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-use"
|
||||
|
||||
#CLIST[((g++))]="/opt/intel/Compiler/11.1/072/bin/intel64/icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -lrt"
|
||||
CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
|
||||
CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -lrt"
|
||||
CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
|
||||
CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -lrt"
|
||||
CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
|
||||
CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -lrt"
|
||||
@@ -1,34 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
#include "BenchUtil.h"
|
||||
#include "basicbenchmark.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
DISABLE_SSE_EXCEPTIONS();
|
||||
|
||||
// this is the list of matrix type and size we want to bench:
|
||||
// ((suffix) (matrix size) (number of iterations))
|
||||
#define MODES ((3d)(3)(4000000))((4d)(4)(1000000))((Xd)(4)(1000000))((Xd)(20)(10000))
|
||||
// #define MODES ((Xd)(20)(10000))
|
||||
|
||||
#define _GENERATE_HEADER(R, ARG, EL) \
|
||||
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) \
|
||||
<< "-" \
|
||||
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / "
|
||||
|
||||
std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES) << endl;
|
||||
|
||||
const int tries = 10;
|
||||
|
||||
#define _RUN_BENCH(R, ARG, EL) \
|
||||
std::cout << ARG(BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL))(BOOST_PP_SEQ_ELEM(1, EL), BOOST_PP_SEQ_ELEM(1, EL)), \
|
||||
BOOST_PP_SEQ_ELEM(2, EL), tries) \
|
||||
<< " ";
|
||||
|
||||
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES);
|
||||
std::cout << endl;
|
||||
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES);
|
||||
std::cout << endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
|
||||
#ifndef EIGEN_BENCH_BASICBENCH_H
|
||||
#define EIGEN_BENCH_BASICBENCH_H
|
||||
|
||||
enum { LazyEval, EarlyEval, OmpEval };
|
||||
|
||||
template <int Mode, typename MatrixType>
|
||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));
|
||||
|
||||
template <int Mode, typename MatrixType>
|
||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) {
|
||||
for (int a = 0; a < iterations; a++) {
|
||||
if (Mode == LazyEval) {
|
||||
asm("#begin_bench_loop LazyEval");
|
||||
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
|
||||
m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
|
||||
} else if (Mode == OmpEval) {
|
||||
asm("#begin_bench_loop OmpEval");
|
||||
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
|
||||
m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
|
||||
} else {
|
||||
asm("#begin_bench_loop EarlyEval");
|
||||
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
|
||||
m = I + 0.00005 * (m + m * m);
|
||||
}
|
||||
asm("#end_bench_loop");
|
||||
}
|
||||
}
|
||||
|
||||
template <int Mode, typename MatrixType>
|
||||
double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));
|
||||
|
||||
template <int Mode, typename MatrixType>
|
||||
double benchBasic(const MatrixType& mat, int iterations, int tries) {
|
||||
const int rows = mat.rows();
|
||||
const int cols = mat.cols();
|
||||
|
||||
MatrixType I(rows, cols);
|
||||
MatrixType m(rows, cols);
|
||||
|
||||
initMatrix_identity(I);
|
||||
|
||||
Eigen::BenchTimer timer;
|
||||
for (uint t = 0; t < tries; ++t) {
|
||||
initMatrix_random(m);
|
||||
timer.start();
|
||||
benchBasic_loop<Mode>(I, m, iterations);
|
||||
timer.stop();
|
||||
cerr << m;
|
||||
}
|
||||
return timer.value();
|
||||
};
|
||||
|
||||
#endif // EIGEN_BENCH_BASICBENCH_H
|
||||
@@ -1,199 +0,0 @@
|
||||
// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
|
||||
// possible options:
|
||||
// -DEIGEN_DONT_VECTORIZE
|
||||
// -msse2
|
||||
|
||||
// #define EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
#define _FLOAT
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <Eigen/Core>
|
||||
#include "BenchTimer.h"
|
||||
|
||||
// include the BLAS headers
|
||||
extern "C" {
|
||||
#include <cblas.h>
|
||||
}
|
||||
#include <string>
|
||||
|
||||
#ifdef _FLOAT
|
||||
typedef float Scalar;
|
||||
#define CBLAS_GEMM cblas_sgemm
|
||||
#else
|
||||
typedef double Scalar;
|
||||
#define CBLAS_GEMM cblas_dgemm
|
||||
#endif
|
||||
|
||||
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix;
|
||||
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
|
||||
void check_product(int M, int N, int K);
|
||||
void check_product(void);
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
// disable SSE exceptions
|
||||
#ifdef __GNUC__
|
||||
{
|
||||
int aux;
|
||||
asm("stmxcsr %[aux] \n\t"
|
||||
"orl $32832, %[aux] \n\t"
|
||||
"ldmxcsr %[aux] \n\t"
|
||||
:
|
||||
: [aux] "m"(aux));
|
||||
}
|
||||
#endif
|
||||
|
||||
int nbtries = 1, nbloops = 1, M, N, K;
|
||||
|
||||
if (argc == 2) {
|
||||
if (std::string(argv[1]) == "check")
|
||||
check_product();
|
||||
else
|
||||
M = N = K = atoi(argv[1]);
|
||||
} else if ((argc == 3) && (std::string(argv[1]) == "auto")) {
|
||||
M = N = K = atoi(argv[2]);
|
||||
nbloops = 1000000000 / (M * M * M);
|
||||
if (nbloops < 1) nbloops = 1;
|
||||
nbtries = 6;
|
||||
} else if (argc == 4) {
|
||||
M = N = K = atoi(argv[1]);
|
||||
nbloops = atoi(argv[2]);
|
||||
nbtries = atoi(argv[3]);
|
||||
} else if (argc == 6) {
|
||||
M = atoi(argv[1]);
|
||||
N = atoi(argv[2]);
|
||||
K = atoi(argv[3]);
|
||||
nbloops = atoi(argv[4]);
|
||||
nbtries = atoi(argv[5]);
|
||||
} else {
|
||||
std::cout << "Usage: " << argv[0] << " size \n";
|
||||
std::cout << "Usage: " << argv[0] << " auto size\n";
|
||||
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
|
||||
std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
|
||||
std::cout << "Usage: " << argv[0] << " check\n";
|
||||
std::cout << "Options:\n";
|
||||
std::cout << " size unique size of the 2 matrices (integer)\n";
|
||||
std::cout << " auto automatically set the number of repetitions and tries\n";
|
||||
std::cout << " nbloops number of times the GEMM routines is executed\n";
|
||||
std::cout << " nbtries number of times the loop is benched (return the best try)\n";
|
||||
std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
|
||||
std::cout << " check check eigen product using cblas as a reference\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
double nbmad = double(M) * double(N) * double(K) * double(nbloops);
|
||||
|
||||
if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n";
|
||||
|
||||
Scalar alpha, beta;
|
||||
MyMatrix ma(M, K), mb(K, N), mc(M, N);
|
||||
ma = MyMatrix::Random(M, K);
|
||||
mb = MyMatrix::Random(K, N);
|
||||
mc = MyMatrix::Random(M, N);
|
||||
|
||||
Eigen::BenchTimer timer;
|
||||
|
||||
// we simply compute c += a*b, so:
|
||||
alpha = 1;
|
||||
beta = 1;
|
||||
|
||||
// bench cblas
|
||||
// ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
|
||||
if (!(std::string(argv[1]) == "auto")) {
|
||||
timer.reset();
|
||||
for (uint k = 0; k < nbtries; ++k) {
|
||||
timer.start();
|
||||
for (uint j = 0; j < nbloops; ++j)
|
||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta,
|
||||
mc.data(), N);
|
||||
#else
|
||||
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta,
|
||||
mc.data(), M);
|
||||
#endif
|
||||
timer.stop();
|
||||
}
|
||||
if (!(std::string(argv[1]) == "auto"))
|
||||
std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
|
||||
else
|
||||
std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
|
||||
}
|
||||
|
||||
// clear
|
||||
ma = MyMatrix::Random(M, K);
|
||||
mb = MyMatrix::Random(K, N);
|
||||
mc = MyMatrix::Random(M, N);
|
||||
|
||||
// eigen
|
||||
// if (!(std::string(argv[1])=="auto"))
|
||||
{
|
||||
timer.reset();
|
||||
for (uint k = 0; k < nbtries; ++k) {
|
||||
timer.start();
|
||||
bench_eigengemm(mc, ma, mb, nbloops);
|
||||
timer.stop();
|
||||
}
|
||||
if (!(std::string(argv[1]) == "auto"))
|
||||
std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
|
||||
else
|
||||
std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
|
||||
}
|
||||
|
||||
std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
|
||||
std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) {
|
||||
for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb;
|
||||
}
|
||||
|
||||
#define MYVERIFY(A, M) \
|
||||
if (!(A)) { \
|
||||
std::cout << "FAIL: " << M << "\n"; \
|
||||
}
|
||||
void check_product(int M, int N, int K) {
|
||||
MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N);
|
||||
ma = MyMatrix::Random(M, K);
|
||||
mb = MyMatrix::Random(K, N);
|
||||
maT = ma.transpose();
|
||||
mbT = mb.transpose();
|
||||
mc = MyMatrix::Random(M, N);
|
||||
|
||||
MyMatrix::Scalar eps = 1e-4;
|
||||
|
||||
meigen = mref = mc;
|
||||
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
|
||||
meigen += ma * mb;
|
||||
MYVERIFY(meigen.isApprox(mref, eps), ". * .");
|
||||
|
||||
meigen = mref = mc;
|
||||
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
|
||||
meigen += maT.transpose() * mb;
|
||||
MYVERIFY(meigen.isApprox(mref, eps), "T * .");
|
||||
|
||||
meigen = mref = mc;
|
||||
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
|
||||
meigen += (maT.transpose()) * (mbT.transpose());
|
||||
MYVERIFY(meigen.isApprox(mref, eps), "T * T");
|
||||
|
||||
meigen = mref = mc;
|
||||
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
|
||||
meigen += ma * mbT.transpose();
|
||||
MYVERIFY(meigen.isApprox(mref, eps), ". * T");
|
||||
}
|
||||
|
||||
void check_product(void) {
|
||||
int M, N, K;
|
||||
for (uint i = 0; i < 1000; ++i) {
|
||||
M = internal::random<int>(1, 64);
|
||||
N = internal::random<int>(1, 768);
|
||||
K = internal::random<int>(1, 768);
|
||||
M = (0 + M) * 1;
|
||||
std::cout << M << " x " << N << " x " << K << "\n";
|
||||
check_product(M, N, K);
|
||||
}
|
||||
}
|
||||
@@ -1,124 +0,0 @@
|
||||
// g++ -DNDEBUG -O3 -I.. benchCholesky.cpp -o benchCholesky && ./benchCholesky
|
||||
// options:
|
||||
// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
|
||||
// -DEIGEN_DONT_VECTORIZE
|
||||
// -msse2
|
||||
// -DREPEAT=100
|
||||
// -DTRIES=10
|
||||
// -DSCALAR=double
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Cholesky>
|
||||
#include <bench/BenchUtil.h>
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 10000
|
||||
#endif
|
||||
|
||||
#ifndef TRIES
|
||||
#define TRIES 10
|
||||
#endif
|
||||
|
||||
typedef float Scalar;
|
||||
|
||||
template <typename MatrixType>
|
||||
__attribute__((noinline)) void benchLLT(const MatrixType& m) {
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
|
||||
double cost = 0;
|
||||
for (int j = 0; j < rows; ++j) {
|
||||
int r = std::max(rows - j - 1, 0);
|
||||
cost += 2 * (r * j + r + j);
|
||||
}
|
||||
|
||||
int repeats = (REPEAT * 1000) / (rows * rows);
|
||||
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
|
||||
|
||||
MatrixType a = MatrixType::Random(rows, cols);
|
||||
SquareMatrixType covMat = a * a.adjoint();
|
||||
|
||||
BenchTimer timerNoSqrt, timerSqrt;
|
||||
|
||||
Scalar acc = 0;
|
||||
int r = internal::random<int>(0, covMat.rows() - 1);
|
||||
int c = internal::random<int>(0, covMat.cols() - 1);
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerNoSqrt.start();
|
||||
for (int k = 0; k < repeats; ++k) {
|
||||
LDLT<SquareMatrixType> cholnosqrt(covMat);
|
||||
acc += cholnosqrt.matrixL().coeff(r, c);
|
||||
}
|
||||
timerNoSqrt.stop();
|
||||
}
|
||||
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSqrt.start();
|
||||
for (int k = 0; k < repeats; ++k) {
|
||||
LLT<SquareMatrixType> chol(covMat);
|
||||
acc += chol.matrixL().coeff(r, c);
|
||||
}
|
||||
timerSqrt.stop();
|
||||
}
|
||||
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic)
|
||||
std::cout << "dyn ";
|
||||
else
|
||||
std::cout << "fixed ";
|
||||
std::cout << covMat.rows() << " \t" << (timerNoSqrt.best()) / repeats << "s "
|
||||
<< "(" << 1e-9 * cost * repeats / timerNoSqrt.best() << " GFLOPS)\t" << (timerSqrt.best()) / repeats << "s "
|
||||
<< "(" << 1e-9 * cost * repeats / timerSqrt.best() << " GFLOPS)\n";
|
||||
|
||||
#ifdef BENCH_GSL
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic) {
|
||||
timerSqrt.reset();
|
||||
|
||||
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
|
||||
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());
|
||||
|
||||
eiToGsl(covMat, &gslCovMat);
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSqrt.start();
|
||||
for (int k = 0; k < repeats; ++k) {
|
||||
gsl_matrix_memcpy(gslCopy, gslCovMat);
|
||||
gsl_linalg_cholesky_decomp(gslCopy);
|
||||
acc += gsl_matrix_get(gslCopy, r, c);
|
||||
}
|
||||
timerSqrt.stop();
|
||||
}
|
||||
|
||||
std::cout << " | \t" << timerSqrt.value() * REPEAT / repeats << "s";
|
||||
|
||||
gsl_matrix_free(gslCovMat);
|
||||
}
|
||||
#endif
|
||||
std::cout << "\n";
|
||||
// make sure the compiler does not optimize too much
|
||||
if (acc == 123) std::cout << acc;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 1500, 0};
|
||||
std::cout << "size LDLT LLT";
|
||||
// #ifdef BENCH_GSL
|
||||
// std::cout << " GSL (standard + double + ATLAS) ";
|
||||
// #endif
|
||||
std::cout << "\n";
|
||||
for (int i = 0; dynsizes[i] > 0; ++i) benchLLT(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
|
||||
|
||||
benchLLT(Matrix<Scalar, 2, 2>());
|
||||
benchLLT(Matrix<Scalar, 3, 3>());
|
||||
benchLLT(Matrix<Scalar, 4, 4>());
|
||||
benchLLT(Matrix<Scalar, 5, 5>());
|
||||
benchLLT(Matrix<Scalar, 6, 6>());
|
||||
benchLLT(Matrix<Scalar, 7, 7>());
|
||||
benchLLT(Matrix<Scalar, 8, 8>());
|
||||
benchLLT(Matrix<Scalar, 12, 12>());
|
||||
benchLLT(Matrix<Scalar, 16, 16>());
|
||||
return 0;
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
|
||||
// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp -o benchEigenSolver && ./benchEigenSolver
|
||||
// options:
|
||||
// -DBENCH_GMM
|
||||
// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
|
||||
// -DEIGEN_DONT_VECTORIZE
|
||||
// -msse2
|
||||
// -DREPEAT=100
|
||||
// -DTRIES=10
|
||||
// -DSCALAR=double
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/QR>
|
||||
#include <bench/BenchUtil.h>
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 1000
|
||||
#endif
|
||||
|
||||
#ifndef TRIES
|
||||
#define TRIES 4
|
||||
#endif
|
||||
|
||||
#ifndef SCALAR
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
|
||||
template <typename MatrixType>
|
||||
__attribute__((noinline)) void benchEigenSolver(const MatrixType& m) {
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
|
||||
int stdRepeats = std::max(1, int((REPEAT * 1000) / (rows * rows * sqrt(rows))));
|
||||
int saRepeats = stdRepeats * 4;
|
||||
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
|
||||
|
||||
MatrixType a = MatrixType::Random(rows, cols);
|
||||
SquareMatrixType covMat = a * a.adjoint();
|
||||
|
||||
BenchTimer timerSa, timerStd;
|
||||
|
||||
Scalar acc = 0;
|
||||
int r = internal::random<int>(0, covMat.rows() - 1);
|
||||
int c = internal::random<int>(0, covMat.cols() - 1);
|
||||
{
|
||||
SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSa.start();
|
||||
for (int k = 0; k < saRepeats; ++k) {
|
||||
ei.compute(covMat);
|
||||
acc += ei.eigenvectors().coeff(r, c);
|
||||
}
|
||||
timerSa.stop();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
EigenSolver<SquareMatrixType> ei(covMat);
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerStd.start();
|
||||
for (int k = 0; k < stdRepeats; ++k) {
|
||||
ei.compute(covMat);
|
||||
acc += ei.eigenvectors().coeff(r, c);
|
||||
}
|
||||
timerStd.stop();
|
||||
}
|
||||
}
|
||||
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic)
|
||||
std::cout << "dyn ";
|
||||
else
|
||||
std::cout << "fixed ";
|
||||
std::cout << covMat.rows() << " \t" << timerSa.value() * REPEAT / saRepeats << "s \t"
|
||||
<< timerStd.value() * REPEAT / stdRepeats << "s";
|
||||
|
||||
#ifdef BENCH_GMM
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic) {
|
||||
timerSa.reset();
|
||||
timerStd.reset();
|
||||
|
||||
gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(), covMat.cols());
|
||||
gmm::dense_matrix<Scalar> eigvect(covMat.rows(), covMat.cols());
|
||||
std::vector<Scalar> eigval(covMat.rows());
|
||||
eiToGmm(covMat, gmmCovMat);
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSa.start();
|
||||
for (int k = 0; k < saRepeats; ++k) {
|
||||
gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
|
||||
acc += eigvect(r, c);
|
||||
}
|
||||
timerSa.stop();
|
||||
}
|
||||
// the non-selfadjoint solver does not compute the eigen vectors
|
||||
// for (int t=0; t<TRIES; ++t)
|
||||
// {
|
||||
// timerStd.start();
|
||||
// for (int k=0; k<stdRepeats; ++k)
|
||||
// {
|
||||
// gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
|
||||
// acc += eigvect(r,c);
|
||||
// }
|
||||
// timerStd.stop();
|
||||
// }
|
||||
|
||||
std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s"
|
||||
<< /*timerStd.value() * REPEAT / stdRepeats << "s"*/ " na ";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BENCH_GSL
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic) {
|
||||
timerSa.reset();
|
||||
timerStd.reset();
|
||||
|
||||
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
|
||||
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());
|
||||
gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(), covMat.cols());
|
||||
gsl_vector* eigval = gsl_vector_alloc(covMat.rows());
|
||||
gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
|
||||
|
||||
gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(), covMat.cols());
|
||||
gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows());
|
||||
gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
|
||||
|
||||
eiToGsl(covMat, &gslCovMat);
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSa.start();
|
||||
for (int k = 0; k < saRepeats; ++k) {
|
||||
gsl_matrix_memcpy(gslCopy, gslCovMat);
|
||||
gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
|
||||
acc += gsl_matrix_get(eigvect, r, c);
|
||||
}
|
||||
timerSa.stop();
|
||||
}
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerStd.start();
|
||||
for (int k = 0; k < stdRepeats; ++k) {
|
||||
gsl_matrix_memcpy(gslCopy, gslCovMat);
|
||||
gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
|
||||
acc += GSL_REAL(gsl_matrix_complex_get(eigvectz, r, c));
|
||||
}
|
||||
timerStd.stop();
|
||||
}
|
||||
|
||||
std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s \t" << timerStd.value() * REPEAT / stdRepeats
|
||||
<< "s";
|
||||
|
||||
gsl_matrix_free(gslCovMat);
|
||||
gsl_vector_free(gslCopy);
|
||||
gsl_matrix_free(eigvect);
|
||||
gsl_vector_free(eigval);
|
||||
gsl_matrix_complex_free(eigvectz);
|
||||
gsl_vector_complex_free(eigvalz);
|
||||
gsl_eigen_symmv_free(eisymm);
|
||||
gsl_eigen_nonsymmv_free(einonsymm);
|
||||
}
|
||||
#endif
|
||||
|
||||
std::cout << "\n";
|
||||
|
||||
// make sure the compiler does not optimize too much
|
||||
if (acc == 123) std::cout << acc;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
const int dynsizes[] = {4, 6, 8, 12, 16, 24, 32, 64, 128, 256, 512, 0};
|
||||
std::cout << "size selfadjoint generic";
|
||||
#ifdef BENCH_GMM
|
||||
std::cout << " GMM++ ";
|
||||
#endif
|
||||
#ifdef BENCH_GSL
|
||||
std::cout << " GSL (double + ATLAS) ";
|
||||
#endif
|
||||
std::cout << "\n";
|
||||
for (uint i = 0; dynsizes[i] > 0; ++i) benchEigenSolver(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
|
||||
|
||||
benchEigenSolver(Matrix<Scalar, 2, 2>());
|
||||
benchEigenSolver(Matrix<Scalar, 3, 3>());
|
||||
benchEigenSolver(Matrix<Scalar, 4, 4>());
|
||||
benchEigenSolver(Matrix<Scalar, 6, 6>());
|
||||
benchEigenSolver(Matrix<Scalar, 8, 8>());
|
||||
benchEigenSolver(Matrix<Scalar, 12, 12>());
|
||||
benchEigenSolver(Matrix<Scalar, 16, 16>());
|
||||
return 0;
|
||||
}
|
||||
@@ -1,117 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <bench/BenchUtil.h>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <Eigen/Core>
|
||||
|
||||
#include <unsupported/Eigen/FFT>
|
||||
|
||||
using namespace Eigen;
|
||||
using namespace std;
|
||||
|
||||
template <typename T>
|
||||
string nameof();
|
||||
|
||||
template <>
|
||||
string nameof<float>() {
|
||||
return "float";
|
||||
}
|
||||
template <>
|
||||
string nameof<double>() {
|
||||
return "double";
|
||||
}
|
||||
template <>
|
||||
string nameof<long double>() {
|
||||
return "long double";
|
||||
}
|
||||
|
||||
#ifndef TYPE
|
||||
#define TYPE float
|
||||
#endif
|
||||
|
||||
#ifndef NFFT
|
||||
#define NFFT 1024
|
||||
#endif
|
||||
#ifndef NDATA
|
||||
#define NDATA 1000000
|
||||
#endif
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename T>
|
||||
void bench(int nfft, bool fwd, bool unscaled = false, bool halfspec = false) {
|
||||
typedef typename NumTraits<T>::Real Scalar;
|
||||
typedef typename std::complex<Scalar> Complex;
|
||||
int nits = NDATA / nfft;
|
||||
vector<T> inbuf(nfft);
|
||||
vector<Complex> outbuf(nfft);
|
||||
FFT<Scalar> fft;
|
||||
|
||||
if (unscaled) {
|
||||
fft.SetFlag(fft.Unscaled);
|
||||
cout << "unscaled ";
|
||||
}
|
||||
if (halfspec) {
|
||||
fft.SetFlag(fft.HalfSpectrum);
|
||||
cout << "halfspec ";
|
||||
}
|
||||
|
||||
std::fill(inbuf.begin(), inbuf.end(), 0);
|
||||
fft.fwd(outbuf, inbuf);
|
||||
|
||||
BenchTimer timer;
|
||||
timer.reset();
|
||||
for (int k = 0; k < 8; ++k) {
|
||||
timer.start();
|
||||
if (fwd)
|
||||
for (int i = 0; i < nits; i++) fft.fwd(outbuf, inbuf);
|
||||
else
|
||||
for (int i = 0; i < nits; i++) fft.inv(inbuf, outbuf);
|
||||
timer.stop();
|
||||
}
|
||||
|
||||
cout << nameof<Scalar>() << " ";
|
||||
double mflops = 5. * nfft * log2((double)nfft) / (1e6 * timer.value() / (double)nits);
|
||||
if (NumTraits<T>::IsComplex) {
|
||||
cout << "complex";
|
||||
} else {
|
||||
cout << "real ";
|
||||
mflops /= 2;
|
||||
}
|
||||
|
||||
if (fwd)
|
||||
cout << " fwd";
|
||||
else
|
||||
cout << " inv";
|
||||
|
||||
cout << " NFFT=" << nfft << " " << (double(1e-6 * nfft * nits) / timer.value()) << " MS/s " << mflops << "MFLOPS\n";
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bench<complex<float> >(NFFT, true);
|
||||
bench<complex<float> >(NFFT, false);
|
||||
bench<float>(NFFT, true);
|
||||
bench<float>(NFFT, false);
|
||||
bench<float>(NFFT, false, true);
|
||||
bench<float>(NFFT, false, true, true);
|
||||
|
||||
bench<complex<double> >(NFFT, true);
|
||||
bench<complex<double> >(NFFT, false);
|
||||
bench<double>(NFFT, true);
|
||||
bench<double>(NFFT, false);
|
||||
bench<complex<long double> >(NFFT, true);
|
||||
bench<complex<long double> >(NFFT, false);
|
||||
bench<long double>(NFFT, true);
|
||||
bench<long double>(NFFT, false);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Geometry>
|
||||
#include <bench/BenchTimer.h>
|
||||
|
||||
using namespace Eigen;
|
||||
using namespace std;
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 1000000
|
||||
#endif
|
||||
|
||||
enum func_opt {
|
||||
TV,
|
||||
TMATV,
|
||||
TMATVMAT,
|
||||
};
|
||||
|
||||
template <class res, class arg1, class arg2, int opt>
|
||||
struct func;
|
||||
|
||||
template <class res, class arg1, class arg2>
|
||||
struct func<res, arg1, arg2, TV> {
|
||||
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
|
||||
asm("");
|
||||
return a1 * a2;
|
||||
}
|
||||
};
|
||||
|
||||
template <class res, class arg1, class arg2>
|
||||
struct func<res, arg1, arg2, TMATV> {
|
||||
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
|
||||
asm("");
|
||||
return a1.matrix() * a2;
|
||||
}
|
||||
};
|
||||
|
||||
template <class res, class arg1, class arg2>
|
||||
struct func<res, arg1, arg2, TMATVMAT> {
|
||||
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
|
||||
asm("");
|
||||
return res(a1.matrix() * a2.matrix());
|
||||
}
|
||||
};
|
||||
|
||||
template <class func, class arg1, class arg2>
|
||||
struct test_transform {
|
||||
static void run() {
|
||||
arg1 a1;
|
||||
a1.setIdentity();
|
||||
arg2 a2;
|
||||
a2.setIdentity();
|
||||
|
||||
BenchTimer timer;
|
||||
timer.reset();
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
timer.start();
|
||||
for (int k = 0; k < REPEAT; ++k) a2 = func::run(a1, a2);
|
||||
timer.stop();
|
||||
}
|
||||
cout << setprecision(4) << fixed << timer.value() << "s " << endl;
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
#define run_vec(op, scalar, mode, option, vsize) \
|
||||
std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
|
||||
{ \
|
||||
typedef Transform<scalar, 3, mode, option> Trans; \
|
||||
typedef Matrix<scalar, vsize, 1, option> Vec; \
|
||||
typedef func<Vec, Trans, Vec, op> Func; \
|
||||
test_transform<Func, Trans, Vec>::run(); \
|
||||
}
|
||||
|
||||
#define run_trans(op, scalar, mode, option) \
|
||||
std::cout << #scalar << "\t " << #mode << "\t " << #option << " "; \
|
||||
{ \
|
||||
typedef Transform<scalar, 3, mode, option> Trans; \
|
||||
typedef func<Trans, Trans, Trans, op> Func; \
|
||||
test_transform<Func, Trans, Trans>::run(); \
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
cout << "vec = trans * vec" << endl;
|
||||
run_vec(TV, float, Isometry, AutoAlign, 3);
|
||||
run_vec(TV, float, Isometry, DontAlign, 3);
|
||||
run_vec(TV, float, Isometry, AutoAlign, 4);
|
||||
run_vec(TV, float, Isometry, DontAlign, 4);
|
||||
run_vec(TV, float, Projective, AutoAlign, 4);
|
||||
run_vec(TV, float, Projective, DontAlign, 4);
|
||||
run_vec(TV, double, Isometry, AutoAlign, 3);
|
||||
run_vec(TV, double, Isometry, DontAlign, 3);
|
||||
run_vec(TV, double, Isometry, AutoAlign, 4);
|
||||
run_vec(TV, double, Isometry, DontAlign, 4);
|
||||
run_vec(TV, double, Projective, AutoAlign, 4);
|
||||
run_vec(TV, double, Projective, DontAlign, 4);
|
||||
|
||||
cout << "vec = trans.matrix() * vec" << endl;
|
||||
run_vec(TMATV, float, Isometry, AutoAlign, 4);
|
||||
run_vec(TMATV, float, Isometry, DontAlign, 4);
|
||||
run_vec(TMATV, double, Isometry, AutoAlign, 4);
|
||||
run_vec(TMATV, double, Isometry, DontAlign, 4);
|
||||
|
||||
cout << "trans = trans1 * trans" << endl;
|
||||
run_trans(TV, float, Isometry, AutoAlign);
|
||||
run_trans(TV, float, Isometry, DontAlign);
|
||||
run_trans(TV, double, Isometry, AutoAlign);
|
||||
run_trans(TV, double, Isometry, DontAlign);
|
||||
run_trans(TV, float, Projective, AutoAlign);
|
||||
run_trans(TV, float, Projective, DontAlign);
|
||||
run_trans(TV, double, Projective, AutoAlign);
|
||||
run_trans(TV, double, Projective, DontAlign);
|
||||
|
||||
cout << "trans = trans1.matrix() * trans.matrix()" << endl;
|
||||
run_trans(TMATVMAT, float, Isometry, AutoAlign);
|
||||
run_trans(TMATVMAT, float, Isometry, DontAlign);
|
||||
run_trans(TMATVMAT, double, Isometry, AutoAlign);
|
||||
run_trans(TMATVMAT, double, Isometry, DontAlign);
|
||||
}
|
||||
@@ -1,131 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <Eigen/Core>
|
||||
#include <bench/BenchTimer.h>
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SIZE
|
||||
#define SIZE 50
|
||||
#endif
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 10000
|
||||
#endif
|
||||
|
||||
typedef float Scalar;
|
||||
|
||||
__attribute__((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
|
||||
__attribute__((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
|
||||
__attribute__((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
int size = SIZE * 8;
|
||||
int size2 = size * size;
|
||||
Scalar* a = internal::aligned_new<Scalar>(size2);
|
||||
Scalar* b = internal::aligned_new<Scalar>(size2 + 4) + 1;
|
||||
Scalar* c = internal::aligned_new<Scalar>(size2);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
a[i] = b[i] = c[i] = 0;
|
||||
}
|
||||
|
||||
BenchTimer timer;
|
||||
|
||||
timer.reset();
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
timer.start();
|
||||
benchVec(a, b, c, size2);
|
||||
timer.stop();
|
||||
}
|
||||
std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
|
||||
<< " GFlops\n";
|
||||
return 0;
|
||||
for (int innersize = size; innersize > 2; --innersize) {
|
||||
if (size2 % innersize == 0) {
|
||||
int outersize = size2 / innersize;
|
||||
MatrixXf ma = Map<MatrixXf>(a, innersize, outersize);
|
||||
MatrixXf mb = Map<MatrixXf>(b, innersize, outersize);
|
||||
MatrixXf mc = Map<MatrixXf>(c, innersize, outersize);
|
||||
timer.reset();
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
timer.start();
|
||||
benchVec(ma, mb, mc);
|
||||
timer.stop();
|
||||
}
|
||||
std::cout << innersize << " x " << outersize << " " << timer.value() << "s "
|
||||
<< (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) << " GFlops\n";
|
||||
}
|
||||
}
|
||||
|
||||
VectorXf va = Map<VectorXf>(a, size2);
|
||||
VectorXf vb = Map<VectorXf>(b, size2);
|
||||
VectorXf vc = Map<VectorXf>(c, size2);
|
||||
timer.reset();
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
timer.start();
|
||||
benchVec(va, vb, vc);
|
||||
timer.stop();
|
||||
}
|
||||
std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
|
||||
<< " GFlops\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) {
|
||||
for (int k = 0; k < REPEAT; ++k) a = a + b;
|
||||
}
|
||||
|
||||
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) {
|
||||
for (int k = 0; k < REPEAT; ++k) a = a + b;
|
||||
}
|
||||
|
||||
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) {
|
||||
typedef internal::packet_traits<Scalar>::type PacketScalar;
|
||||
const int PacketSize = internal::packet_traits<Scalar>::size;
|
||||
PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
|
||||
for (int k = 0; k < REPEAT; ++k)
|
||||
for (int i = 0; i < size; i += PacketSize * 8) {
|
||||
// a0 = internal::pload(&a[i]);
|
||||
// b0 = internal::pload(&b[i]);
|
||||
// a1 = internal::pload(&a[i+1*PacketSize]);
|
||||
// b1 = internal::pload(&b[i+1*PacketSize]);
|
||||
// a2 = internal::pload(&a[i+2*PacketSize]);
|
||||
// b2 = internal::pload(&b[i+2*PacketSize]);
|
||||
// a3 = internal::pload(&a[i+3*PacketSize]);
|
||||
// b3 = internal::pload(&b[i+3*PacketSize]);
|
||||
// internal::pstore(&a[i], internal::padd(a0, b0));
|
||||
// a0 = internal::pload(&a[i+4*PacketSize]);
|
||||
// b0 = internal::pload(&b[i+4*PacketSize]);
|
||||
//
|
||||
// internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
|
||||
// a1 = internal::pload(&a[i+5*PacketSize]);
|
||||
// b1 = internal::pload(&b[i+5*PacketSize]);
|
||||
//
|
||||
// internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
|
||||
// a2 = internal::pload(&a[i+6*PacketSize]);
|
||||
// b2 = internal::pload(&b[i+6*PacketSize]);
|
||||
//
|
||||
// internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
|
||||
// a3 = internal::pload(&a[i+7*PacketSize]);
|
||||
// b3 = internal::pload(&b[i+7*PacketSize]);
|
||||
//
|
||||
// internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
|
||||
// internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
|
||||
// internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
|
||||
// internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
|
||||
|
||||
internal::pstore(&a[i + 2 * PacketSize], internal::padd(internal::ploadu(&a[i + 2 * PacketSize]),
|
||||
internal::ploadu(&b[i + 2 * PacketSize])));
|
||||
internal::pstore(&a[i + 3 * PacketSize], internal::padd(internal::ploadu(&a[i + 3 * PacketSize]),
|
||||
internal::ploadu(&b[i + 3 * PacketSize])));
|
||||
internal::pstore(&a[i + 4 * PacketSize], internal::padd(internal::ploadu(&a[i + 4 * PacketSize]),
|
||||
internal::ploadu(&b[i + 4 * PacketSize])));
|
||||
internal::pstore(&a[i + 5 * PacketSize], internal::padd(internal::ploadu(&a[i + 5 * PacketSize]),
|
||||
internal::ploadu(&b[i + 5 * PacketSize])));
|
||||
internal::pstore(&a[i + 6 * PacketSize], internal::padd(internal::ploadu(&a[i + 6 * PacketSize]),
|
||||
internal::ploadu(&b[i + 6 * PacketSize])));
|
||||
internal::pstore(&a[i + 7 * PacketSize], internal::padd(internal::ploadu(&a[i + 7 * PacketSize]),
|
||||
internal::ploadu(&b[i + 7 * PacketSize])));
|
||||
}
|
||||
}
|
||||
@@ -1,393 +0,0 @@
|
||||
|
||||
// g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2 ./a.out
|
||||
// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out
|
||||
|
||||
// Compilation options:
|
||||
//
|
||||
// -DSCALAR=std::complex<double>
|
||||
// -DSCALARA=double or -DSCALARB=double
|
||||
// -DHAVE_BLAS
|
||||
// -DDECOUPLED
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include <bench/BenchTimer.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SCALAR
|
||||
// #define SCALAR std::complex<float>
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
#ifndef SCALARA
|
||||
#define SCALARA SCALAR
|
||||
#endif
|
||||
|
||||
#ifndef SCALARB
|
||||
#define SCALARB SCALAR
|
||||
#endif
|
||||
|
||||
#ifdef ROWMAJ_A
|
||||
const int opt_A = RowMajor;
|
||||
#else
|
||||
const int opt_A = ColMajor;
|
||||
#endif
|
||||
|
||||
#ifdef ROWMAJ_B
|
||||
const int opt_B = RowMajor;
|
||||
#else
|
||||
const int opt_B = ColMajor;
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
typedef NumTraits<Scalar>::Real RealScalar;
|
||||
typedef Matrix<SCALARA, Dynamic, Dynamic, opt_A> A;
|
||||
typedef Matrix<SCALARB, Dynamic, Dynamic, opt_B> B;
|
||||
typedef Matrix<Scalar, Dynamic, Dynamic> C;
|
||||
typedef Matrix<RealScalar, Dynamic, Dynamic> M;
|
||||
|
||||
#ifdef HAVE_BLAS
|
||||
|
||||
extern "C" {
|
||||
#include <Eigen/src/misc/blas.h>
|
||||
}
|
||||
|
||||
static float fone = 1;
|
||||
static float fzero = 0;
|
||||
static double done = 1;
|
||||
static double szero = 0;
|
||||
static std::complex<float> cfone = 1;
|
||||
static std::complex<float> cfzero = 0;
|
||||
static std::complex<double> cdone = 1;
|
||||
static std::complex<double> cdzero = 0;
|
||||
static char notrans = 'N';
|
||||
static char trans = 'T';
|
||||
static char nonunit = 'N';
|
||||
static char lower = 'L';
|
||||
static char right = 'R';
|
||||
static int intone = 1;
|
||||
|
||||
#ifdef ROWMAJ_A
|
||||
const char transA = trans;
|
||||
#else
|
||||
const char transA = notrans;
|
||||
#endif
|
||||
|
||||
#ifdef ROWMAJ_B
|
||||
const char transB = trans;
|
||||
#else
|
||||
const char transB = notrans;
|
||||
#endif
|
||||
|
||||
template <typename A, typename B>
|
||||
void blas_gemm(const A& a, const B& b, MatrixXf& c) {
|
||||
int M = c.rows();
|
||||
int N = c.cols();
|
||||
int K = a.cols();
|
||||
int lda = a.outerStride();
|
||||
int ldb = b.outerStride();
|
||||
int ldc = c.rows();
|
||||
|
||||
sgemm_(&transA, &transB, &M, &N, &K, &fone, const_cast<float*>(a.data()), &lda, const_cast<float*>(b.data()), &ldb,
|
||||
&fone, c.data(), &ldc);
|
||||
}
|
||||
|
||||
template <typename A, typename B>
|
||||
void blas_gemm(const A& a, const B& b, MatrixXd& c) {
|
||||
int M = c.rows();
|
||||
int N = c.cols();
|
||||
int K = a.cols();
|
||||
int lda = a.outerStride();
|
||||
int ldb = b.outerStride();
|
||||
int ldc = c.rows();
|
||||
|
||||
dgemm_(&transA, &transB, &M, &N, &K, &done, const_cast<double*>(a.data()), &lda, const_cast<double*>(b.data()), &ldb,
|
||||
&done, c.data(), &ldc);
|
||||
}
|
||||
|
||||
template <typename A, typename B>
|
||||
void blas_gemm(const A& a, const B& b, MatrixXcf& c) {
|
||||
int M = c.rows();
|
||||
int N = c.cols();
|
||||
int K = a.cols();
|
||||
int lda = a.outerStride();
|
||||
int ldb = b.outerStride();
|
||||
int ldc = c.rows();
|
||||
|
||||
cgemm_(&transA, &transB, &M, &N, &K, (float*)&cfone, const_cast<float*>((const float*)a.data()), &lda,
|
||||
const_cast<float*>((const float*)b.data()), &ldb, (float*)&cfone, (float*)c.data(), &ldc);
|
||||
}
|
||||
|
||||
template <typename A, typename B>
|
||||
void blas_gemm(const A& a, const B& b, MatrixXcd& c) {
|
||||
int M = c.rows();
|
||||
int N = c.cols();
|
||||
int K = a.cols();
|
||||
int lda = a.outerStride();
|
||||
int ldb = b.outerStride();
|
||||
int ldc = c.rows();
|
||||
|
||||
zgemm_(&transA, &transB, &M, &N, &K, (double*)&cdone, const_cast<double*>((const double*)a.data()), &lda,
|
||||
const_cast<double*>((const double*)b.data()), &ldb, (double*)&cdone, (double*)c.data(), &ldc);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) {
|
||||
cr.noalias() += ar * br;
|
||||
cr.noalias() -= ai * bi;
|
||||
ci.noalias() += ar * bi;
|
||||
ci.noalias() += ai * br;
|
||||
// [cr ci] += [ar ai] * br + [-ai ar] * bi
|
||||
}
|
||||
|
||||
void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) {
|
||||
cr.noalias() += a * br;
|
||||
ci.noalias() += a * bi;
|
||||
}
|
||||
|
||||
void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) {
|
||||
cr.noalias() += ar * b;
|
||||
ci.noalias() += ai * b;
|
||||
}
|
||||
|
||||
template <typename A, typename B, typename C>
|
||||
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) {
|
||||
c.noalias() += a * b;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
std::ptrdiff_t l1 = internal::queryL1CacheSize();
|
||||
std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
|
||||
std::cout << "L1 cache size = " << (l1 > 0 ? l1 / 1024 : -1) << " KB\n";
|
||||
std::cout << "L2/L3 cache size = " << (l2 > 0 ? l2 / 1024 : -1) << " KB\n";
|
||||
typedef internal::gebp_traits<Scalar, Scalar> Traits;
|
||||
std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n";
|
||||
|
||||
int rep = 1; // number of repetitions per try
|
||||
int tries = 2; // number of tries, we keep the best
|
||||
|
||||
int s = 2048;
|
||||
int m = s;
|
||||
int n = s;
|
||||
int p = s;
|
||||
int cache_size1 = -1, cache_size2 = l2, cache_size3 = 0;
|
||||
|
||||
bool need_help = false;
|
||||
for (int i = 1; i < argc;) {
|
||||
if (argv[i][0] == '-') {
|
||||
if (argv[i][1] == 's') {
|
||||
++i;
|
||||
s = atoi(argv[i++]);
|
||||
m = n = p = s;
|
||||
if (argv[i][0] != '-') {
|
||||
n = atoi(argv[i++]);
|
||||
p = atoi(argv[i++]);
|
||||
}
|
||||
} else if (argv[i][1] == 'c') {
|
||||
++i;
|
||||
cache_size1 = atoi(argv[i++]);
|
||||
if (argv[i][0] != '-') {
|
||||
cache_size2 = atoi(argv[i++]);
|
||||
if (argv[i][0] != '-') cache_size3 = atoi(argv[i++]);
|
||||
}
|
||||
} else if (argv[i][1] == 't') {
|
||||
tries = atoi(argv[++i]);
|
||||
++i;
|
||||
} else if (argv[i][1] == 'p') {
|
||||
++i;
|
||||
rep = atoi(argv[i++]);
|
||||
}
|
||||
} else {
|
||||
need_help = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_help) {
|
||||
std::cout << argv[0] << " -s <matrix sizes> -c <cache sizes> -t <nb tries> -p <nb repeats>\n";
|
||||
std::cout << " <matrix sizes> : size\n";
|
||||
std::cout << " <matrix sizes> : rows columns depth\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if EIGEN_VERSION_AT_LEAST(3, 2, 90)
|
||||
if (cache_size1 > 0) setCpuCacheSizes(cache_size1, cache_size2, cache_size3);
|
||||
#endif
|
||||
|
||||
A a(m, p);
|
||||
a.setRandom();
|
||||
B b(p, n);
|
||||
b.setRandom();
|
||||
C c(m, n);
|
||||
c.setOnes();
|
||||
C rc = c;
|
||||
|
||||
std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
|
||||
std::ptrdiff_t mc(m), nc(n), kc(p);
|
||||
internal::computeProductBlockingSizes<Scalar, Scalar>(kc, mc, nc);
|
||||
std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << " x " << nc << "\n";
|
||||
|
||||
C r = c;
|
||||
|
||||
// check the parallel product is correct
|
||||
#if defined EIGEN_HAS_OPENMP
|
||||
Eigen::initParallel();
|
||||
int procs = omp_get_max_threads();
|
||||
if (procs > 1) {
|
||||
#ifdef HAVE_BLAS
|
||||
blas_gemm(a, b, r);
|
||||
#else
|
||||
omp_set_num_threads(1);
|
||||
r.noalias() += a * b;
|
||||
omp_set_num_threads(procs);
|
||||
#endif
|
||||
c.noalias() += a * b;
|
||||
if (!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
|
||||
}
|
||||
#elif defined HAVE_BLAS
|
||||
blas_gemm(a, b, r);
|
||||
c.noalias() += a * b;
|
||||
if (!r.isApprox(c)) {
|
||||
std::cout << (r - c).norm() / r.norm() << "\n";
|
||||
std::cerr << "Warning, your product is crap!\n\n";
|
||||
}
|
||||
#else
|
||||
if (1. * m * n * p < 2000. * 2000 * 2000) {
|
||||
gemm(a, b, c);
|
||||
r.noalias() += a.cast<Scalar>().lazyProduct(b.cast<Scalar>());
|
||||
if (!r.isApprox(c)) {
|
||||
std::cout << (r - c).norm() / r.norm() << "\n";
|
||||
std::cerr << "Warning, your product is crap!\n\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_BLAS
|
||||
BenchTimer tblas;
|
||||
c = rc;
|
||||
BENCH(tblas, tries, rep, blas_gemm(a, b, c));
|
||||
std::cout << "blas cpu " << tblas.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tblas.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "blas real " << tblas.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tblas.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
#endif
|
||||
|
||||
// warm start
|
||||
if (b.norm() + a.norm() == 123.554) std::cout << "\n";
|
||||
|
||||
BenchTimer tmt;
|
||||
c = rc;
|
||||
BENCH(tmt, tries, rep, gemm(a, b, c));
|
||||
std::cout << "eigen cpu " << tmt.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "eigen real " << tmt.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
|
||||
#ifdef EIGEN_HAS_OPENMP
|
||||
if (procs > 1) {
|
||||
BenchTimer tmono;
|
||||
omp_set_num_threads(1);
|
||||
Eigen::setNbThreads(1);
|
||||
c = rc;
|
||||
BENCH(tmono, tries, rep, gemm(a, b, c));
|
||||
std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmono.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "eigen mono real " << tmono.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmono.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t("
|
||||
<< tmono.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => "
|
||||
<< (100.0 * tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)) / procs << "%\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
if (1. * m * n * p < 30 * 30 * 30) {
|
||||
BenchTimer tmt;
|
||||
c = rc;
|
||||
BENCH(tmt, tries, rep, c.noalias() += a.lazyProduct(b));
|
||||
std::cout << "lazy cpu " << tmt.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "lazy real " << tmt.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
}
|
||||
|
||||
#ifdef DECOUPLED
|
||||
if ((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
|
||||
M ar(m, p);
|
||||
ar.setRandom();
|
||||
M ai(m, p);
|
||||
ai.setRandom();
|
||||
M br(p, n);
|
||||
br.setRandom();
|
||||
M bi(p, n);
|
||||
bi.setRandom();
|
||||
M cr(m, n);
|
||||
cr.setRandom();
|
||||
M ci(m, n);
|
||||
ci.setRandom();
|
||||
|
||||
BenchTimer t;
|
||||
BENCH(t, tries, rep, matlab_cplx_cplx(ar, ai, br, bi, cr, ci));
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
}
|
||||
if ((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
|
||||
M a(m, p);
|
||||
a.setRandom();
|
||||
M br(p, n);
|
||||
br.setRandom();
|
||||
M bi(p, n);
|
||||
bi.setRandom();
|
||||
M cr(m, n);
|
||||
cr.setRandom();
|
||||
M ci(m, n);
|
||||
ci.setRandom();
|
||||
|
||||
BenchTimer t;
|
||||
BENCH(t, tries, rep, matlab_real_cplx(a, br, bi, cr, ci));
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
}
|
||||
if ((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex)) {
|
||||
M ar(m, p);
|
||||
ar.setRandom();
|
||||
M ai(m, p);
|
||||
ai.setRandom();
|
||||
M b(p, n);
|
||||
b.setRandom();
|
||||
M cr(m, n);
|
||||
cr.setRandom();
|
||||
M ci(m, n);
|
||||
ci.setRandom();
|
||||
|
||||
BenchTimer t;
|
||||
BENCH(t, tries, rep, matlab_cplx_real(ar, ai, b, cr, ci));
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2020 Sebastien Boisvert <seb@boisvert.info>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "BenchTimer.h"
|
||||
#include "../test/MovableScalar.h"
|
||||
|
||||
#include <Eigen/Core>
|
||||
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
|
||||
template <typename MatrixType>
|
||||
void copy_matrix(MatrixType& m) {
|
||||
MatrixType tmp(m);
|
||||
m = tmp;
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void move_matrix(MatrixType&& m) {
|
||||
MatrixType tmp(std::move(m));
|
||||
m = std::move(tmp);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void bench(const std::string& label) {
|
||||
using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
|
||||
Eigen::BenchTimer t;
|
||||
|
||||
int tries = 10;
|
||||
int rep = 1000000;
|
||||
|
||||
MatrixType data = MatrixType::Random().eval();
|
||||
MatrixType dest;
|
||||
|
||||
BENCH(t, tries, rep, copy_matrix(data));
|
||||
std::cout << label << " copy semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
|
||||
|
||||
BENCH(t, tries, rep, move_matrix(std::move(data)));
|
||||
std::cout << label << " move semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
|
||||
}
|
||||
|
||||
int main() {
|
||||
bench<float>("float");
|
||||
bench<double>("double");
|
||||
return 0;
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
if (($# < 2)); then
|
||||
echo "Usage: $0 compilerlist.txt benchfile.cpp"
|
||||
else
|
||||
|
||||
compilerlist=$1
|
||||
benchfile=$2
|
||||
|
||||
g=0
|
||||
source $compilerlist
|
||||
|
||||
# for each compiler, compile benchfile and run the benchmark
|
||||
for (( i=0 ; i<g ; ++i )) ; do
|
||||
# check the compiler exists
|
||||
compiler=`echo ${CLIST[$i]} | cut -d " " -f 1`
|
||||
if [ -e `which $compiler` ]; then
|
||||
echo "${CLIST[$i]}"
|
||||
# echo "${CLIST[$i]} $benchfile -I.. -o bench~"
|
||||
# if [ -e ./.bench ] ; then rm .bench; fi
|
||||
${CLIST[$i]} $benchfile -I.. -o .bench && ./.bench 2> /dev/null
|
||||
echo ""
|
||||
else
|
||||
echo "compiler not found: $compiler"
|
||||
fi
|
||||
done
|
||||
|
||||
fi
|
||||
@@ -1,342 +0,0 @@
|
||||
#include <typeinfo>
|
||||
#include <iostream>
|
||||
#include <Eigen/Core>
|
||||
#include "BenchTimer.h"
|
||||
using namespace Eigen;
|
||||
using namespace std;
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) {
|
||||
return v.norm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) {
|
||||
return v.stableNorm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) {
|
||||
return v.hypotNorm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) {
|
||||
return v.blueNorm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) {
|
||||
typedef typename T::Scalar Scalar;
|
||||
int n = v.size();
|
||||
Scalar scale = 0;
|
||||
Scalar ssq = 1;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
Scalar ax = std::abs(v.coeff(i));
|
||||
if (scale >= ax) {
|
||||
ssq += numext::abs2(ax / scale);
|
||||
} else {
|
||||
ssq = Scalar(1) + ssq * numext::abs2(scale / ax);
|
||||
scale = ax;
|
||||
}
|
||||
}
|
||||
return scale * std::sqrt(ssq);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) {
|
||||
typedef typename T::Scalar Scalar;
|
||||
Scalar s = v.array().abs().maxCoeff();
|
||||
return s * (v / s).norm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) {
|
||||
return v.stableNorm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) {
|
||||
int n = v.size() / 2;
|
||||
for (int i = 0; i < n; ++i) v(i) = v(2 * i) * v(2 * i) + v(2 * i + 1) * v(2 * i + 1);
|
||||
n = n / 2;
|
||||
while (n > 0) {
|
||||
for (int i = 0; i < n; ++i) v(i) = v(2 * i) + v(2 * i + 1);
|
||||
n = n / 2;
|
||||
}
|
||||
return std::sqrt(v(0));
|
||||
}
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a, b); }
|
||||
Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a, b); }
|
||||
|
||||
Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a, b); }
|
||||
Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a, b); }
|
||||
#endif
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) {
|
||||
#ifndef EIGEN_VECTORIZE
|
||||
return v.blueNorm();
|
||||
#else
|
||||
typedef typename T::Scalar Scalar;
|
||||
|
||||
static int nmax = 0;
|
||||
static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
|
||||
int n;
|
||||
|
||||
if (nmax <= 0) {
|
||||
int nbig, ibeta, it, iemin, iemax, iexp;
|
||||
Scalar abig, eps;
|
||||
|
||||
nbig = NumTraits<int>::highest(); // largest integer
|
||||
ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base; // base for
|
||||
// floating-point numbers
|
||||
it = NumTraits<Scalar>::digits(); // NumTraits<Scalar>::Mantissa; // number of base-beta digits in
|
||||
// mantissa
|
||||
iemin = NumTraits<Scalar>::min_exponent(); // minimum exponent
|
||||
iemax = NumTraits<Scalar>::max_exponent(); // maximum exponent
|
||||
rbig = NumTraits<Scalar>::highest(); // largest floating-point number
|
||||
|
||||
// Check the basic machine-dependent constants.
|
||||
if (iemin > 1 - 2 * it || 1 + it > iemax || (it == 2 && ibeta < 5) || (it <= 4 && ibeta <= 3) || it < 2) {
|
||||
eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
|
||||
}
|
||||
iexp = -((1 - iemin) / 2);
|
||||
b1 = std::pow(ibeta, iexp); // lower boundary of midrange
|
||||
iexp = (iemax + 1 - it) / 2;
|
||||
b2 = std::pow(ibeta, iexp); // upper boundary of midrange
|
||||
|
||||
iexp = (2 - iemin) / 2;
|
||||
s1m = std::pow(ibeta, iexp); // scaling factor for lower range
|
||||
iexp = -((iemax + it) / 2);
|
||||
s2m = std::pow(ibeta, iexp); // scaling factor for upper range
|
||||
|
||||
overfl = rbig * s2m; // overflow boundary for abig
|
||||
eps = std::pow(ibeta, 1 - it);
|
||||
relerr = std::sqrt(eps); // tolerance for neglecting asml
|
||||
abig = 1.0 / eps - 1.0;
|
||||
if (Scalar(nbig) > abig)
|
||||
nmax = abig; // largest safe n
|
||||
else
|
||||
nmax = nbig;
|
||||
}
|
||||
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
const int ps = internal::packet_traits<Scalar>::size;
|
||||
Packet pasml = internal::pset1<Packet>(Scalar(0));
|
||||
Packet pamed = internal::pset1<Packet>(Scalar(0));
|
||||
Packet pabig = internal::pset1<Packet>(Scalar(0));
|
||||
Packet ps2m = internal::pset1<Packet>(s2m);
|
||||
Packet ps1m = internal::pset1<Packet>(s1m);
|
||||
Packet pb2 = internal::pset1<Packet>(b2);
|
||||
Packet pb1 = internal::pset1<Packet>(b1);
|
||||
for (int j = 0; j < v.size(); j += ps) {
|
||||
Packet ax = internal::pabs(v.template packet<Aligned>(j));
|
||||
Packet ax_s2m = internal::pmul(ax, ps2m);
|
||||
Packet ax_s1m = internal::pmul(ax, ps1m);
|
||||
Packet maskBig = internal::plt(pb2, ax);
|
||||
Packet maskSml = internal::plt(ax, pb1);
|
||||
|
||||
// Packet maskMed = internal::pand(maskSml,maskBig);
|
||||
// Packet scale = internal::pset1(Scalar(0));
|
||||
// scale = internal::por(scale, internal::pand(maskBig,ps2m));
|
||||
// scale = internal::por(scale, internal::pand(maskSml,ps1m));
|
||||
// scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
|
||||
// ax = internal::pmul(ax,scale);
|
||||
// ax = internal::pmul(ax,ax);
|
||||
// pabig = internal::padd(pabig, internal::pand(maskBig, ax));
|
||||
// pasml = internal::padd(pasml, internal::pand(maskSml, ax));
|
||||
// pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
|
||||
|
||||
pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m, ax_s2m)));
|
||||
pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m, ax_s1m)));
|
||||
pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax, ax), internal::pand(maskSml, maskBig)));
|
||||
}
|
||||
Scalar abig = internal::predux(pabig);
|
||||
Scalar asml = internal::predux(pasml);
|
||||
Scalar amed = internal::predux(pamed);
|
||||
if (abig > Scalar(0)) {
|
||||
abig = std::sqrt(abig);
|
||||
if (abig > overfl) {
|
||||
eigen_assert(false && "overflow");
|
||||
return rbig;
|
||||
}
|
||||
if (amed > Scalar(0)) {
|
||||
abig = abig / s2m;
|
||||
amed = std::sqrt(amed);
|
||||
} else {
|
||||
return abig / s2m;
|
||||
}
|
||||
|
||||
} else if (asml > Scalar(0)) {
|
||||
if (amed > Scalar(0)) {
|
||||
abig = std::sqrt(amed);
|
||||
amed = std::sqrt(asml) / s1m;
|
||||
} else {
|
||||
return std::sqrt(asml) / s1m;
|
||||
}
|
||||
} else {
|
||||
return std::sqrt(amed);
|
||||
}
|
||||
asml = std::min(abig, amed);
|
||||
abig = std::max(abig, amed);
|
||||
if (asml <= abig * relerr)
|
||||
return abig;
|
||||
else
|
||||
return abig * std::sqrt(Scalar(1) + numext::abs2(asml / abig));
|
||||
#endif
|
||||
}
|
||||
|
||||
#define BENCH_PERF(NRM) \
|
||||
{ \
|
||||
float af = 0; \
|
||||
double ad = 0; \
|
||||
std::complex<float> ac = 0; \
|
||||
Eigen::BenchTimer tf, td, tcf; \
|
||||
tf.reset(); \
|
||||
td.reset(); \
|
||||
tcf.reset(); \
|
||||
for (int k = 0; k < tries; ++k) { \
|
||||
tf.start(); \
|
||||
for (int i = 0; i < iters; ++i) { \
|
||||
af += NRM(vf); \
|
||||
} \
|
||||
tf.stop(); \
|
||||
} \
|
||||
for (int k = 0; k < tries; ++k) { \
|
||||
td.start(); \
|
||||
for (int i = 0; i < iters; ++i) { \
|
||||
ad += NRM(vd); \
|
||||
} \
|
||||
td.stop(); \
|
||||
} \
|
||||
/*for (int k=0; k<std::max(1,tries/3); ++k) { \
|
||||
tcf.start(); \
|
||||
for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
|
||||
tcf.stop(); \
|
||||
} */ \
|
||||
std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \
|
||||
}
|
||||
|
||||
void check_accuracy(double basef, double based, int s) {
|
||||
double yf = basef * std::abs(internal::random<double>());
|
||||
double yd = based * std::abs(internal::random<double>());
|
||||
VectorXf vf = VectorXf::Ones(s) * yf;
|
||||
VectorXd vd = VectorXd::Ones(s) * yd;
|
||||
|
||||
std::cout << "reference\t" << std::sqrt(double(s)) * yf << "\t" << std::sqrt(double(s)) * yd << "\n";
|
||||
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
|
||||
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
|
||||
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
|
||||
std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n";
|
||||
std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n";
|
||||
std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n";
|
||||
std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
|
||||
}
|
||||
|
||||
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) {
|
||||
VectorXf vf(s);
|
||||
VectorXd vd(s);
|
||||
for (int i = 0; i < s; ++i) {
|
||||
vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0, ef1));
|
||||
vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0, ed1));
|
||||
}
|
||||
|
||||
// std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
|
||||
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>())
|
||||
<< "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>())
|
||||
<< "\t" << hypotNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t"
|
||||
<< blueNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>())
|
||||
<< "\t" << blueNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>())
|
||||
<< "\t" << lapackNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t"
|
||||
<< twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
|
||||
// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long
|
||||
// double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int tries = 10;
|
||||
int iters = 100000;
|
||||
double y = 1.1345743233455785456788e12 * internal::random<double>();
|
||||
VectorXf v = VectorXf::Ones(1024) * y;
|
||||
|
||||
// return 0;
|
||||
int s = 10000;
|
||||
double basef_ok = 1.1345743233455785456788e15;
|
||||
double based_ok = 1.1345743233455785456788e95;
|
||||
|
||||
double basef_under = 1.1345743233455785456788e-27;
|
||||
double based_under = 1.1345743233455785456788e-303;
|
||||
|
||||
double basef_over = 1.1345743233455785456788e+27;
|
||||
double based_over = 1.1345743233455785456788e+302;
|
||||
|
||||
std::cout.precision(20);
|
||||
|
||||
std::cerr << "\nNo under/overflow:\n";
|
||||
check_accuracy(basef_ok, based_ok, s);
|
||||
|
||||
std::cerr << "\nUnderflow:\n";
|
||||
check_accuracy(basef_under, based_under, s);
|
||||
|
||||
std::cerr << "\nOverflow:\n";
|
||||
check_accuracy(basef_over, based_over, s);
|
||||
|
||||
std::cerr << "\nVarying (over):\n";
|
||||
for (int k = 0; k < 1; ++k) {
|
||||
check_accuracy_var(20, 27, 190, 302, s);
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
std::cerr << "\nVarying (under):\n";
|
||||
for (int k = 0; k < 1; ++k) {
|
||||
check_accuracy_var(-27, 20, -302, -190, s);
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
y = 1;
|
||||
std::cout.precision(4);
|
||||
int s1 = 1024 * 1024 * 32;
|
||||
std::cerr << "Performance (out of cache, " << s1 << "):\n";
|
||||
{
|
||||
int iters = 1;
|
||||
VectorXf vf = VectorXf::Random(s1) * y;
|
||||
VectorXd vd = VectorXd::Random(s1) * y;
|
||||
VectorXcf vcf = VectorXcf::Random(s1) * y;
|
||||
BENCH_PERF(sqsumNorm);
|
||||
BENCH_PERF(stableNorm);
|
||||
BENCH_PERF(blueNorm);
|
||||
BENCH_PERF(pblueNorm);
|
||||
BENCH_PERF(lapackNorm);
|
||||
BENCH_PERF(hypotNorm);
|
||||
BENCH_PERF(twopassNorm);
|
||||
BENCH_PERF(bl2passNorm);
|
||||
}
|
||||
|
||||
std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
|
||||
{
|
||||
int iters = 100000;
|
||||
VectorXf vf = VectorXf::Random(512) * y;
|
||||
VectorXd vd = VectorXd::Random(512) * y;
|
||||
VectorXcf vcf = VectorXcf::Random(512) * y;
|
||||
BENCH_PERF(sqsumNorm);
|
||||
BENCH_PERF(stableNorm);
|
||||
BENCH_PERF(blueNorm);
|
||||
BENCH_PERF(pblueNorm);
|
||||
BENCH_PERF(lapackNorm);
|
||||
BENCH_PERF(hypotNorm);
|
||||
BENCH_PERF(twopassNorm);
|
||||
BENCH_PERF(bl2passNorm);
|
||||
}
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <Eigen/Core>
|
||||
#include <bench/BenchUtil.h>
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 100000
|
||||
#endif
|
||||
|
||||
#ifndef TRIES
|
||||
#define TRIES 20
|
||||
#endif
|
||||
|
||||
typedef double Scalar;
|
||||
|
||||
template <typename MatrixType>
|
||||
__attribute__((noinline)) void bench_reverse(const MatrixType& m) {
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
int size = m.size();
|
||||
|
||||
int repeats = (REPEAT * 1000) / size;
|
||||
MatrixType a = MatrixType::Random(rows, cols);
|
||||
MatrixType b = MatrixType::Random(rows, cols);
|
||||
|
||||
BenchTimer timerB, timerH, timerV;
|
||||
|
||||
Scalar acc = 0;
|
||||
int r = internal::random<int>(0, rows - 1);
|
||||
int c = internal::random<int>(0, cols - 1);
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerB.start();
|
||||
for (int k = 0; k < repeats; ++k) {
|
||||
asm("#begin foo");
|
||||
b = a.reverse();
|
||||
asm("#end foo");
|
||||
acc += b.coeff(r, c);
|
||||
}
|
||||
timerB.stop();
|
||||
}
|
||||
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic)
|
||||
std::cout << "dyn ";
|
||||
else
|
||||
std::cout << "fixed ";
|
||||
std::cout << rows << " x " << cols << " \t" << (timerB.value() * REPEAT) / repeats << "s "
|
||||
<< "(" << 1e-6 * size * repeats / timerB.value() << " MFLOPS)\t";
|
||||
|
||||
std::cout << "\n";
|
||||
// make sure the compiler does not optimize too much
|
||||
if (acc == 123) std::cout << acc;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 0};
|
||||
std::cout << "size no sqrt standard";
|
||||
// #ifdef BENCH_GSL
|
||||
// std::cout << " GSL (standard + double + ATLAS) ";
|
||||
// #endif
|
||||
std::cout << "\n";
|
||||
for (uint i = 0; dynsizes[i] > 0; ++i) {
|
||||
bench_reverse(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
|
||||
bench_reverse(Matrix<Scalar, Dynamic, 1>(dynsizes[i] * dynsizes[i]));
|
||||
}
|
||||
// bench_reverse(Matrix<Scalar,2,2>());
|
||||
// bench_reverse(Matrix<Scalar,3,3>());
|
||||
// bench_reverse(Matrix<Scalar,4,4>());
|
||||
// bench_reverse(Matrix<Scalar,5,5>());
|
||||
// bench_reverse(Matrix<Scalar,6,6>());
|
||||
// bench_reverse(Matrix<Scalar,7,7>());
|
||||
// bench_reverse(Matrix<Scalar,8,8>());
|
||||
// bench_reverse(Matrix<Scalar,12,12>());
|
||||
// bench_reverse(Matrix<Scalar,16,16>());
|
||||
return 0;
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <Eigen/Core>
|
||||
using namespace Eigen;
|
||||
using namespace std;
|
||||
|
||||
int main() {
|
||||
typedef Matrix<SCALAR, Eigen::Dynamic, 1> Vec;
|
||||
Vec v(SIZE);
|
||||
v.setZero();
|
||||
v[0] = 1;
|
||||
v[1] = 2;
|
||||
for (int i = 0; i < 1000000; i++) {
|
||||
v.coeffRef(0) += v.sum() * SCALAR(1e-20);
|
||||
}
|
||||
cout << v.sum() << endl;
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# gcc : CXX="g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000"
|
||||
# icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions"
|
||||
CXX=${CXX-g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value
|
||||
|
||||
for ((i=1; i<16; ++i)); do
|
||||
echo "Matrix size: $i x $i :"
|
||||
$CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
|
||||
$CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
|
||||
echo " "
|
||||
done
|
||||
@@ -1,617 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <cstdio>
|
||||
|
||||
bool eigen_use_specific_block_size;
|
||||
int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n;
|
||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
|
||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
|
||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
|
||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
|
||||
#include <Eigen/Core>
|
||||
|
||||
#include <bench/BenchTimer.h>
|
||||
|
||||
using namespace Eigen;
|
||||
using namespace std;
|
||||
|
||||
static BenchTimer timer;
|
||||
|
||||
// how many times we repeat each measurement.
|
||||
// measurements are randomly shuffled - we're not doing
|
||||
// all N identical measurements in a row.
|
||||
const int measurement_repetitions = 3;
|
||||
|
||||
// Timings below this value are too short to be accurate,
|
||||
// we'll repeat measurements with more iterations until
|
||||
// we get a timing above that threshold.
|
||||
const float min_accurate_time = 1e-2f;
|
||||
|
||||
// See --min-working-set-size command line parameter.
|
||||
size_t min_working_set_size = 0;
|
||||
|
||||
float max_clock_speed = 0.0f;
|
||||
|
||||
// range of sizes that we will benchmark (in all 3 K,M,N dimensions)
|
||||
const size_t maxsize = 2048;
|
||||
const size_t minsize = 16;
|
||||
|
||||
typedef MatrixXf MatrixType;
|
||||
typedef MatrixType::Scalar Scalar;
|
||||
typedef internal::packet_traits<Scalar>::type Packet;
|
||||
|
||||
static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two");
|
||||
static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two");
|
||||
static_assert(maxsize > minsize, "maxsize must be larger than minsize");
|
||||
static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");
|
||||
|
||||
// just a helper to store a triple of K,M,N sizes for matrix product
|
||||
struct size_triple_t {
|
||||
size_t k, m, n;
|
||||
size_triple_t() : k(0), m(0), n(0) {}
|
||||
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
|
||||
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
|
||||
size_triple_t(uint16_t compact) {
|
||||
k = 1 << ((compact & 0xf00) >> 8);
|
||||
m = 1 << ((compact & 0x0f0) >> 4);
|
||||
n = 1 << ((compact & 0x00f) >> 0);
|
||||
}
|
||||
};
|
||||
|
||||
uint8_t log2_pot(size_t x) {
|
||||
size_t l = 0;
|
||||
while (x >>= 1) l++;
|
||||
return l;
|
||||
}
|
||||
|
||||
// Convert between size tripes and a compact form fitting in 12 bits
|
||||
// where each size, which must be a POT, is encoded as its log2, on 4 bits
|
||||
// so the largest representable size is 2^15 == 32k ... big enough.
|
||||
uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
|
||||
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
|
||||
}
|
||||
|
||||
uint16_t compact_size_triple(const size_triple_t& t) { return compact_size_triple(t.k, t.m, t.n); }
|
||||
|
||||
// A single benchmark. Initially only contains benchmark params.
|
||||
// Then call run(), which stores the result in the gflops field.
|
||||
struct benchmark_t {
|
||||
uint16_t compact_product_size;
|
||||
uint16_t compact_block_size;
|
||||
bool use_default_block_size;
|
||||
float gflops;
|
||||
benchmark_t() : compact_product_size(0), compact_block_size(0), use_default_block_size(false), gflops(0) {}
|
||||
benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn)
|
||||
: compact_product_size(compact_size_triple(pk, pm, pn)),
|
||||
compact_block_size(compact_size_triple(bk, bm, bn)),
|
||||
use_default_block_size(false),
|
||||
gflops(0) {}
|
||||
benchmark_t(size_t pk, size_t pm, size_t pn)
|
||||
: compact_product_size(compact_size_triple(pk, pm, pn)),
|
||||
compact_block_size(0),
|
||||
use_default_block_size(true),
|
||||
gflops(0) {}
|
||||
|
||||
void run();
|
||||
};
|
||||
|
||||
ostream& operator<<(ostream& s, const benchmark_t& b) {
|
||||
s << hex << b.compact_product_size << dec;
|
||||
if (b.use_default_block_size) {
|
||||
size_triple_t t(b.compact_product_size);
|
||||
Index k = t.k, m = t.m, n = t.n;
|
||||
internal::computeProductBlockingSizes<Scalar, Scalar>(k, m, n);
|
||||
s << " default(" << k << ", " << m << ", " << n << ")";
|
||||
} else {
|
||||
s << " " << hex << b.compact_block_size << dec;
|
||||
}
|
||||
s << " " << b.gflops;
|
||||
return s;
|
||||
}
|
||||
|
||||
// We sort first by increasing benchmark parameters,
|
||||
// then by decreasing performance.
|
||||
bool operator<(const benchmark_t& b1, const benchmark_t& b2) {
|
||||
return b1.compact_product_size < b2.compact_product_size ||
|
||||
(b1.compact_product_size == b2.compact_product_size &&
|
||||
((b1.compact_block_size < b2.compact_block_size ||
|
||||
(b1.compact_block_size == b2.compact_block_size && b1.gflops > b2.gflops))));
|
||||
}
|
||||
|
||||
void benchmark_t::run() {
|
||||
size_triple_t productsizes(compact_product_size);
|
||||
|
||||
if (use_default_block_size) {
|
||||
eigen_use_specific_block_size = false;
|
||||
} else {
|
||||
// feed eigen with our custom blocking params
|
||||
eigen_use_specific_block_size = true;
|
||||
size_triple_t blocksizes(compact_block_size);
|
||||
eigen_block_size_k = blocksizes.k;
|
||||
eigen_block_size_m = blocksizes.m;
|
||||
eigen_block_size_n = blocksizes.n;
|
||||
}
|
||||
|
||||
// set up the matrix pool
|
||||
|
||||
const size_t combined_three_matrices_sizes =
|
||||
sizeof(Scalar) *
|
||||
(productsizes.k * productsizes.m + productsizes.k * productsizes.n + productsizes.m * productsizes.n);
|
||||
|
||||
// 64 M is large enough that nobody has a cache bigger than that,
|
||||
// while still being small enough that everybody has this much RAM,
|
||||
// so conveniently we don't need to special-case platforms here.
|
||||
const size_t unlikely_large_cache_size = 64 << 20;
|
||||
|
||||
const size_t working_set_size = min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
|
||||
|
||||
const size_t matrix_pool_size = 1 + working_set_size / combined_three_matrices_sizes;
|
||||
|
||||
MatrixType* lhs = new MatrixType[matrix_pool_size];
|
||||
MatrixType* rhs = new MatrixType[matrix_pool_size];
|
||||
MatrixType* dst = new MatrixType[matrix_pool_size];
|
||||
|
||||
for (size_t i = 0; i < matrix_pool_size; i++) {
|
||||
lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k);
|
||||
rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n);
|
||||
dst[i] = MatrixType::Zero(productsizes.m, productsizes.n);
|
||||
}
|
||||
|
||||
// main benchmark loop
|
||||
|
||||
int iters_at_a_time = 1;
|
||||
float time_per_iter = 0.0f;
|
||||
size_t matrix_index = 0;
|
||||
while (true) {
|
||||
double starttime = timer.getCpuTime();
|
||||
for (int i = 0; i < iters_at_a_time; i++) {
|
||||
dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
|
||||
matrix_index++;
|
||||
if (matrix_index == matrix_pool_size) {
|
||||
matrix_index = 0;
|
||||
}
|
||||
}
|
||||
double endtime = timer.getCpuTime();
|
||||
|
||||
const float timing = float(endtime - starttime);
|
||||
|
||||
if (timing >= min_accurate_time) {
|
||||
time_per_iter = timing / iters_at_a_time;
|
||||
break;
|
||||
}
|
||||
|
||||
iters_at_a_time *= 2;
|
||||
}
|
||||
|
||||
delete[] lhs;
|
||||
delete[] rhs;
|
||||
delete[] dst;
|
||||
|
||||
gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
|
||||
}
|
||||
|
||||
void print_cpuinfo() {
|
||||
#ifdef __linux__
|
||||
cout << "contents of /proc/cpuinfo:" << endl;
|
||||
string line;
|
||||
ifstream cpuinfo("/proc/cpuinfo");
|
||||
if (cpuinfo.is_open()) {
|
||||
while (getline(cpuinfo, line)) {
|
||||
cout << line << endl;
|
||||
}
|
||||
cpuinfo.close();
|
||||
}
|
||||
cout << endl;
|
||||
#elif defined __APPLE__
|
||||
cout << "output of sysctl hw:" << endl;
|
||||
system("sysctl hw");
|
||||
cout << endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
string type_name() {
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
template <>
|
||||
string type_name<float>() {
|
||||
return "float";
|
||||
}
|
||||
|
||||
template <>
|
||||
string type_name<double>() {
|
||||
return "double";
|
||||
}
|
||||
|
||||
struct action_t {
|
||||
virtual const char* invokation_name() const {
|
||||
abort();
|
||||
return nullptr;
|
||||
}
|
||||
virtual void run() const { abort(); }
|
||||
virtual ~action_t() {}
|
||||
};
|
||||
|
||||
void show_usage_and_exit(int /*argc*/, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
|
||||
cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
|
||||
cerr << "available actions:" << endl << endl;
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
cerr << " " << (*it)->invokation_name() << endl;
|
||||
}
|
||||
cerr << endl;
|
||||
cerr << "options:" << endl << endl;
|
||||
cerr << " --min-working-set-size=N:" << endl;
|
||||
cerr << " Set the minimum working set size to N bytes." << endl;
|
||||
cerr << " This is rounded up as needed to a multiple of matrix size." << endl;
|
||||
cerr << " A larger working set lowers the chance of a warm cache." << endl;
|
||||
cerr << " The default value 0 means use a large enough working" << endl;
|
||||
cerr << " set to likely outsize caches." << endl;
|
||||
cerr << " A value of 1 (that is, 1 byte) would mean don't do anything to" << endl;
|
||||
cerr << " avoid warm caches." << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
float measure_clock_speed() {
|
||||
cerr << "Measuring clock speed... \r" << flush;
|
||||
|
||||
vector<float> all_gflops;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
benchmark_t b(1024, 1024, 1024);
|
||||
b.run();
|
||||
all_gflops.push_back(b.gflops);
|
||||
}
|
||||
|
||||
sort(all_gflops.begin(), all_gflops.end());
|
||||
float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5];
|
||||
|
||||
// multiply by an arbitrary constant to discourage trying doing anything with the
|
||||
// returned values besides just comparing them with each other.
|
||||
float result = stable_estimate * 123.456f;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct human_duration_t {
|
||||
int seconds;
|
||||
human_duration_t(int s) : seconds(s) {}
|
||||
};
|
||||
|
||||
ostream& operator<<(ostream& s, const human_duration_t& d) {
|
||||
int remainder = d.seconds;
|
||||
if (remainder > 3600) {
|
||||
int hours = remainder / 3600;
|
||||
s << hours << " h ";
|
||||
remainder -= hours * 3600;
|
||||
}
|
||||
if (remainder > 60) {
|
||||
int minutes = remainder / 60;
|
||||
s << minutes << " min ";
|
||||
remainder -= minutes * 60;
|
||||
}
|
||||
if (d.seconds < 600) {
|
||||
s << remainder << " s";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
|
||||
|
||||
void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run) {
|
||||
FILE* file = fopen(filename, "w");
|
||||
if (!file) {
|
||||
cerr << "Could not open file " << filename << " for writing." << endl;
|
||||
cerr << "Do you have write permissions on the current working directory?" << endl;
|
||||
exit(1);
|
||||
}
|
||||
size_t benchmarks_vector_size = benchmarks.size();
|
||||
fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file);
|
||||
fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file);
|
||||
fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file);
|
||||
fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run) {
|
||||
FILE* file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
return false;
|
||||
}
|
||||
if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) {
|
||||
return false;
|
||||
}
|
||||
size_t benchmarks_vector_size = 0;
|
||||
if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) {
|
||||
return false;
|
||||
}
|
||||
if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) {
|
||||
return false;
|
||||
}
|
||||
benchmarks.resize(benchmarks_vector_size);
|
||||
if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) {
|
||||
return false;
|
||||
}
|
||||
unlink(filename);
|
||||
return true;
|
||||
}
|
||||
|
||||
void try_run_some_benchmarks(vector<benchmark_t>& benchmarks, double time_start, size_t& first_benchmark_to_run) {
|
||||
if (first_benchmark_to_run == benchmarks.size()) {
|
||||
return;
|
||||
}
|
||||
|
||||
double time_last_progress_update = 0;
|
||||
double time_last_clock_speed_measurement = 0;
|
||||
double time_now = 0;
|
||||
|
||||
size_t benchmark_index = first_benchmark_to_run;
|
||||
|
||||
while (true) {
|
||||
float ratio_done = float(benchmark_index) / benchmarks.size();
|
||||
time_now = timer.getRealTime();
|
||||
|
||||
// We check clock speed every minute and at the end.
|
||||
if (benchmark_index == benchmarks.size() || time_now > time_last_clock_speed_measurement + 60.0f) {
|
||||
time_last_clock_speed_measurement = time_now;
|
||||
|
||||
// Ensure that clock speed is as expected
|
||||
float current_clock_speed = measure_clock_speed();
|
||||
|
||||
// The tolerance needs to be smaller than the relative difference between
|
||||
// clock speeds that a device could operate under.
|
||||
// It seems unlikely that a device would be throttling clock speeds by
|
||||
// amounts smaller than 2%.
|
||||
// With a value of 1%, I was getting within noise on a Sandy Bridge.
|
||||
const float clock_speed_tolerance = 0.02f;
|
||||
|
||||
if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) {
|
||||
// Clock speed is now higher than we previously measured.
|
||||
// Either our initial measurement was inaccurate, which won't happen
|
||||
// too many times as we are keeping the best clock speed value and
|
||||
// and allowing some tolerance; or an unexpected condition occurred,
|
||||
// which invalidates all benchmark results collected so far.
|
||||
// Either way, we better restart all over again now.
|
||||
if (benchmark_index) {
|
||||
cerr << "Restarting at " << 100.0f * ratio_done << " % because clock speed increased. " << endl;
|
||||
}
|
||||
max_clock_speed = current_clock_speed;
|
||||
first_benchmark_to_run = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
bool rerun_last_tests = false;
|
||||
|
||||
if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
|
||||
cerr << "Measurements completed so far: " << 100.0f * ratio_done << " % " << endl;
|
||||
cerr << "Clock speed seems to be only " << current_clock_speed / max_clock_speed << " times what it used to be."
|
||||
<< endl;
|
||||
|
||||
unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
|
||||
|
||||
while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
|
||||
if (seconds_to_sleep_if_lower_clock_speed > 32) {
|
||||
cerr << "Sleeping longer probably won't make a difference." << endl;
|
||||
cerr << "Serializing benchmarks to " << session_filename << endl;
|
||||
serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
|
||||
cerr << "Now restart this benchmark, and it should pick up where we left." << endl;
|
||||
exit(2);
|
||||
}
|
||||
rerun_last_tests = true;
|
||||
cerr << "Sleeping " << seconds_to_sleep_if_lower_clock_speed << " s... \r"
|
||||
<< endl;
|
||||
sleep(seconds_to_sleep_if_lower_clock_speed);
|
||||
current_clock_speed = measure_clock_speed();
|
||||
seconds_to_sleep_if_lower_clock_speed *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (rerun_last_tests) {
|
||||
cerr << "Redoing the last " << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
|
||||
<< " % because clock speed had been low. " << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// nothing wrong with the clock speed so far, so there won't be a need to rerun
|
||||
// benchmarks run so far in case we later encounter a lower clock speed.
|
||||
first_benchmark_to_run = benchmark_index;
|
||||
}
|
||||
|
||||
if (benchmark_index == benchmarks.size()) {
|
||||
// We're done!
|
||||
first_benchmark_to_run = benchmarks.size();
|
||||
// Erase progress info
|
||||
cerr << " " << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// Display progress info on stderr
|
||||
if (time_now > time_last_progress_update + 1.0f) {
|
||||
time_last_progress_update = time_now;
|
||||
cerr << "Measurements... " << 100.0f * ratio_done << " %, ETA "
|
||||
<< human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
|
||||
<< " \r" << flush;
|
||||
}
|
||||
|
||||
// This is where we actually run a benchmark!
|
||||
benchmarks[benchmark_index].run();
|
||||
benchmark_index++;
|
||||
}
|
||||
}
|
||||
|
||||
void run_benchmarks(vector<benchmark_t>& benchmarks) {
|
||||
size_t first_benchmark_to_run;
|
||||
vector<benchmark_t> deserialized_benchmarks;
|
||||
bool use_deserialized_benchmarks = false;
|
||||
if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
|
||||
cerr << "Found serialized session with " << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
|
||||
<< " % already done" << endl;
|
||||
if (deserialized_benchmarks.size() == benchmarks.size() && first_benchmark_to_run > 0 &&
|
||||
first_benchmark_to_run < benchmarks.size()) {
|
||||
use_deserialized_benchmarks = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (use_deserialized_benchmarks) {
|
||||
benchmarks = deserialized_benchmarks;
|
||||
} else {
|
||||
// not using deserialized benchmarks, starting from scratch
|
||||
first_benchmark_to_run = 0;
|
||||
|
||||
// Randomly shuffling benchmarks allows us to get accurate enough progress info,
|
||||
// as now the cheap/expensive benchmarks are randomly mixed so they average out.
|
||||
// It also means that if data is corrupted for some time span, the odds are that
|
||||
// not all repetitions of a given benchmark will be corrupted.
|
||||
random_shuffle(benchmarks.begin(), benchmarks.end());
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
max_clock_speed = max(max_clock_speed, measure_clock_speed());
|
||||
}
|
||||
|
||||
double time_start = 0.0;
|
||||
while (first_benchmark_to_run < benchmarks.size()) {
|
||||
if (first_benchmark_to_run == 0) {
|
||||
time_start = timer.getRealTime();
|
||||
}
|
||||
try_run_some_benchmarks(benchmarks, time_start, first_benchmark_to_run);
|
||||
}
|
||||
|
||||
// Sort timings by increasing benchmark parameters, and decreasing gflops.
|
||||
// The latter is very important. It means that we can ignore all but the first
|
||||
// benchmark with given parameters.
|
||||
sort(benchmarks.begin(), benchmarks.end());
|
||||
|
||||
// Collect best (i.e. now first) results for each parameter values.
|
||||
vector<benchmark_t> best_benchmarks;
|
||||
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
|
||||
if (best_benchmarks.empty() || best_benchmarks.back().compact_product_size != it->compact_product_size ||
|
||||
best_benchmarks.back().compact_block_size != it->compact_block_size) {
|
||||
best_benchmarks.push_back(*it);
|
||||
}
|
||||
}
|
||||
|
||||
// keep and return only the best benchmarks
|
||||
benchmarks = best_benchmarks;
|
||||
}
|
||||
|
||||
struct measure_all_pot_sizes_action_t : action_t {
|
||||
virtual const char* invokation_name() const { return "all-pot-sizes"; }
|
||||
virtual void run() const {
|
||||
vector<benchmark_t> benchmarks;
|
||||
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
|
||||
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
|
||||
for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
|
||||
for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
|
||||
for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) {
|
||||
for (size_t mblock = minsize; mblock <= msize; mblock *= 2) {
|
||||
for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) {
|
||||
benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
run_benchmarks(benchmarks);
|
||||
|
||||
cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl;
|
||||
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
|
||||
cout << *it << endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct measure_default_sizes_action_t : action_t {
|
||||
virtual const char* invokation_name() const { return "default-sizes"; }
|
||||
virtual void run() const {
|
||||
vector<benchmark_t> benchmarks;
|
||||
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
|
||||
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
|
||||
for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
|
||||
for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
|
||||
benchmarks.emplace_back(ksize, msize, nsize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
run_benchmarks(benchmarks);
|
||||
|
||||
cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl;
|
||||
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
|
||||
cout << *it << endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
double time_start = timer.getRealTime();
|
||||
cout.precision(4);
|
||||
cerr.precision(4);
|
||||
|
||||
vector<unique_ptr<action_t>> available_actions;
|
||||
available_actions.emplace_back(new measure_all_pot_sizes_action_t);
|
||||
available_actions.emplace_back(new measure_default_sizes_action_t);
|
||||
|
||||
auto action = available_actions.end();
|
||||
|
||||
if (argc <= 1) {
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
if (!strcmp(argv[1], (*it)->invokation_name())) {
|
||||
action = it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (action == available_actions.end()) {
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
|
||||
for (int i = 2; i < argc; i++) {
|
||||
if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
|
||||
const char* equals_sign = strchr(argv[i], '=');
|
||||
min_working_set_size = strtoul(equals_sign + 1, nullptr, 10);
|
||||
} else {
|
||||
cerr << "unrecognized option: " << argv[i] << endl << endl;
|
||||
show_usage_and_exit(argc, argv, available_actions);
|
||||
}
|
||||
}
|
||||
|
||||
print_cpuinfo();
|
||||
|
||||
cout << "benchmark parameters:" << endl;
|
||||
cout << "pointer size: " << 8 * sizeof(void*) << " bits" << endl;
|
||||
cout << "scalar type: " << type_name<Scalar>() << endl;
|
||||
cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
|
||||
cout << "minsize = " << minsize << endl;
|
||||
cout << "maxsize = " << maxsize << endl;
|
||||
cout << "measurement_repetitions = " << measurement_repetitions << endl;
|
||||
cout << "min_accurate_time = " << min_accurate_time << endl;
|
||||
cout << "min_working_set_size = " << min_working_set_size;
|
||||
if (min_working_set_size == 0) {
|
||||
cout << " (try to outsize caches)";
|
||||
}
|
||||
cout << endl << endl;
|
||||
|
||||
(*action)->run();
|
||||
|
||||
double time_end = timer.getRealTime();
|
||||
cerr << "Finished in " << human_duration_t(time_end - time_start) << endl;
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
// g++ -O3 -DNDEBUG -DMATSIZE=<x> benchmark.cpp -o benchmark && time ./benchmark
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <Eigen/Core>
|
||||
|
||||
#ifndef MATSIZE
|
||||
#define MATSIZE 3
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 40000000
|
||||
#endif
|
||||
|
||||
#ifndef SCALAR
|
||||
#define SCALAR double
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
Matrix<SCALAR, MATSIZE, MATSIZE> I = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones();
|
||||
Matrix<SCALAR, MATSIZE, MATSIZE> m;
|
||||
for (int i = 0; i < MATSIZE; i++)
|
||||
for (int j = 0; j < MATSIZE; j++) {
|
||||
m(i, j) = (i + MATSIZE * j);
|
||||
}
|
||||
asm("#begin");
|
||||
for (int a = 0; a < REPEAT; a++) {
|
||||
m = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones() + 0.00005 * (m + (m * m));
|
||||
}
|
||||
asm("#end");
|
||||
cout << m << endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 10000
|
||||
#endif
|
||||
|
||||
#ifndef SCALAR
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
typedef Matrix<SCALAR, Eigen::Dynamic, Eigen::Dynamic> Mat;
|
||||
Mat m(100, 100);
|
||||
m.setRandom();
|
||||
|
||||
for (int a = 0; a < REPEAT; a++) {
|
||||
int r, c, nr, nc;
|
||||
r = Eigen::internal::random<int>(0, 10);
|
||||
c = Eigen::internal::random<int>(0, 10);
|
||||
nr = Eigen::internal::random<int>(50, 80);
|
||||
nc = Eigen::internal::random<int>(50, 80);
|
||||
m.block(r, c, nr, nc) += Mat::Ones(nr, nc);
|
||||
m.block(r, c, nr, nc) *= SCALAR(10);
|
||||
m.block(r, c, nr, nc) -= Mat::constant(nr, nc, 10);
|
||||
m.block(r, c, nr, nc) /= SCALAR(10);
|
||||
}
|
||||
cout << m[0] << endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
// g++ -fopenmp -I .. -O3 -DNDEBUG -finline-limit=1000 benchmarkX.cpp -o b && time ./b
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef MATTYPE
|
||||
#define MATTYPE MatrixXLd
|
||||
#endif
|
||||
|
||||
#ifndef MATSIZE
|
||||
#define MATSIZE 400
|
||||
#endif
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 100
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
MATTYPE I = MATTYPE::Ones(MATSIZE, MATSIZE);
|
||||
MATTYPE m(MATSIZE, MATSIZE);
|
||||
for (int i = 0; i < MATSIZE; i++)
|
||||
for (int j = 0; j < MATSIZE; j++) {
|
||||
m(i, j) = (i + j + 1) / (MATSIZE * MATSIZE);
|
||||
}
|
||||
for (int a = 0; a < REPEAT; a++) {
|
||||
m = I + 0.0001 * (m + m * m);
|
||||
}
|
||||
cout << m(0, 0) << endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
|
||||
|
||||
#include <iostream>
|
||||
#include <Eigen/Core>
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef VECTYPE
|
||||
#define VECTYPE VectorXLd
|
||||
#endif
|
||||
|
||||
#ifndef VECSIZE
|
||||
#define VECSIZE 1000000
|
||||
#endif
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 1000
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
VECTYPE I = VECTYPE::Ones(VECSIZE);
|
||||
VECTYPE m(VECSIZE, 1);
|
||||
for (int i = 0; i < VECSIZE; i++) {
|
||||
m[i] = 0.1 * i / VECSIZE;
|
||||
}
|
||||
for (int a = 0; a < REPEAT; a++) {
|
||||
m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m / 4);
|
||||
}
|
||||
cout << m[0] << endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -1,362 +0,0 @@
|
||||
/*
|
||||
* benchmark_aocl.cpp - AOCL Performance Benchmark Suite for Eigen
|
||||
*
|
||||
* Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*
|
||||
* Description:
|
||||
* ------------
|
||||
* This benchmark suite evaluates the performance of Eigen mathematical
|
||||
* operations when integrated with AMD Optimizing CPU Libraries (AOCL). It
|
||||
* tests:
|
||||
*
|
||||
* 1. Vector Math Operations: Transcendental functions (exp, sin, cos, sqrt,
|
||||
* log, etc.) using AOCL Vector Math Library (VML) for optimized
|
||||
* double-precision operations
|
||||
*
|
||||
* 2. Matrix Operations: BLAS Level-3 operations (DGEMM) using AOCL BLAS library
|
||||
* with support for both single-threaded and multithreaded execution
|
||||
*
|
||||
* 3. Linear Algebra: LAPACK operations (eigenvalue decomposition) using
|
||||
* libflame
|
||||
*
|
||||
* 4. Real-world Scenarios: Financial risk computation simulating covariance
|
||||
* matrix calculations and eigenvalue analysis for portfolio optimization
|
||||
*
|
||||
* The benchmark automatically detects AOCL configuration and adjusts test
|
||||
* execution accordingly, providing performance comparisons between standard
|
||||
* Eigen operations and AOCL-accelerated implementations.
|
||||
*
|
||||
* Compilation:
|
||||
* ------------
|
||||
* # Using AOCC compiler (recommended for best AOCL compatibility):
|
||||
* clang++ -O3 -g -DEIGEN_USE_AOCL_ALL -I<PATH_TO_EIGEN_INCLUDE>
|
||||
* -I${AOCL_ROOT}/include \
|
||||
* -Wno-parentheses src/benchmark_aocl.cpp -L${AOCL_ROOT}/lib \
|
||||
* -lamdlibm -lm -lblis -lflame -lpthread -lrt -pthread \
|
||||
* -o build/eigen_aocl_benchmark
|
||||
*
|
||||
* # Alternative: Using GCC with proper library paths:
|
||||
* g++ -O3 -g -DEIGEN_USE_AOCL_ALL -I<PATH_TO_EIGEN_INCLUDE>
|
||||
* -I${AOCL_ROOT}/include \
|
||||
* -Wno-parentheses src/benchmark_aocl.cpp -L${AOCL_ROOT}/lib \
|
||||
* -lamdlibm -lm -lblis -lflame -lpthread -lrt \
|
||||
* -o build/eigen_aocl_benchmark
|
||||
*
|
||||
* # For multithreaded BLIS support:
|
||||
* clang++ -O3 -g -fopenmp -DEIGEN_USE_AOCL_MT -I<PATH_TO_EIGEN_INCLUDE> \
|
||||
* -I${AOCL_ROOT}/include -Wno-parentheses src/benchmark_aocl.cpp \
|
||||
* -L${AOCL_ROOT}/lib -lamdlibm -lm -lblis-mt -lflame -lpthread -lrt \
|
||||
* -o build/eigen_aocl_benchmark_mt
|
||||
*
|
||||
* Usage:
|
||||
* ------
|
||||
* export AOCL_ROOT=/path/to/aocl/installation
|
||||
* export LD_LIBRARY_PATH=$AOCL_ROOT/lib:$LD_LIBRARY_PATH
|
||||
* ./build/eigen_aocl_benchmark
|
||||
*
|
||||
* Developer:
|
||||
* ----------
|
||||
* Name: Sharad Saurabh Bhaskar
|
||||
* Email: shbhaska@amd.com
|
||||
* Organization: Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
// Simple - just include Eigen headers
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Dense>
|
||||
#include <Eigen/Eigenvalues>
|
||||
|
||||
// Only include CBLAS if AOCL BLIS is available
|
||||
#ifdef EIGEN_USE_AOCL_ALL
|
||||
#include <cblas.h>
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
using namespace std::chrono;
|
||||
using namespace Eigen;
|
||||
|
||||
void benchmarkVectorMath(int size) {
|
||||
VectorXd v = VectorXd::LinSpaced(size, 0.1, 10.0);
|
||||
VectorXd result(size);
|
||||
double elapsed_ms = 0;
|
||||
|
||||
cout << "\n--- Vector Math Benchmark (size = " << size << ") ---" << endl;
|
||||
|
||||
auto start = high_resolution_clock::now();
|
||||
result = v.array().exp();
|
||||
auto end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "exp() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().sin();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "sin() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().cos();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "cos() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().sqrt();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "sqrt() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().cbrt();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "cbrt() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().abs();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "abs() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().log();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "log() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().log10();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "log10() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().exp2();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "exp2() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().asin();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "asin() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().sinh();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "sinh() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().acos();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "acos() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().cosh();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "cosh() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().tan();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "tan() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().atan();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "atan() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().tanh();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "tanh() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
VectorXd v2 = VectorXd::Random(size);
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array() + v2.array();
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "add() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().pow(2.0);
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "pow() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().max(v2.array());
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "max() time: " << elapsed_ms << " ms" << endl;
|
||||
|
||||
start = high_resolution_clock::now();
|
||||
result = v.array().min(v2.array());
|
||||
end = high_resolution_clock::now();
|
||||
elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "min() time: " << elapsed_ms << " ms" << endl;
|
||||
}
|
||||
|
||||
// Function to benchmark BLAS operation: Matrix multiplication.
|
||||
void benchmarkMatrixMultiplication(int matSize) {
|
||||
cout << "\n--- BLIS-st DGEMM Benchmark (" << matSize << " x " << matSize
|
||||
<< ") ---" << endl;
|
||||
|
||||
MatrixXd A = MatrixXd::Random(matSize, matSize);
|
||||
MatrixXd B = MatrixXd::Random(matSize, matSize);
|
||||
MatrixXd C(matSize, matSize);
|
||||
|
||||
auto start = high_resolution_clock::now();
|
||||
C = A * B;
|
||||
auto end = high_resolution_clock::now();
|
||||
double elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "Matrix multiplication time: " << elapsed_ms << " ms" << endl;
|
||||
}
|
||||
|
||||
// Benchmark BLIS directly using its CBLAS interface if available.
|
||||
void benchmarkBlisMultithreaded(int matSize, int numThreads) {
|
||||
#if defined(EIGEN_AOCL_USE_BLIS_MT)
|
||||
cout << "\n--- BLIS-mt DGEMM Benchmark (" << matSize << " x " << matSize
|
||||
<< ", threads=" << numThreads << ") ---" << endl;
|
||||
vector<double> A(matSize * matSize);
|
||||
vector<double> B(matSize * matSize);
|
||||
vector<double> C(matSize * matSize);
|
||||
for (auto &v : A)
|
||||
v = static_cast<double>(rand()) / RAND_MAX;
|
||||
for (auto &v : B)
|
||||
v = static_cast<double>(rand()) / RAND_MAX;
|
||||
double alpha = 1.0, beta = 0.0;
|
||||
string th = to_string(numThreads);
|
||||
setenv("BLIS_NUM_THREADS", th.c_str(), 1);
|
||||
auto start = high_resolution_clock::now();
|
||||
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, matSize, matSize,
|
||||
matSize, alpha, A.data(), matSize, B.data(), matSize, beta,
|
||||
C.data(), matSize);
|
||||
auto end = high_resolution_clock::now();
|
||||
double elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "BLIS dgemm time: " << elapsed_ms << " ms" << endl;
|
||||
#else
|
||||
(void)matSize;
|
||||
(void)numThreads;
|
||||
cout << "\nBLIS multithreaded support not enabled." << endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Function to benchmark LAPACK operation: Eigenvalue decomposition.
|
||||
void benchmarkEigenDecomposition(int matSize) {
|
||||
cout << "\n--- Eigenvalue Decomposition Benchmark (Matrix Size: " << matSize
|
||||
<< " x " << matSize << ") ---" << endl;
|
||||
MatrixXd M = MatrixXd::Random(matSize, matSize);
|
||||
// Make matrix symmetric (necessary for eigenvalue decomposition of
|
||||
// self-adjoint matrices)
|
||||
M = (M + M.transpose()) * 0.5;
|
||||
|
||||
SelfAdjointEigenSolver<MatrixXd> eigensolver;
|
||||
auto start = high_resolution_clock::now();
|
||||
eigensolver.compute(M);
|
||||
auto end = high_resolution_clock::now();
|
||||
double elapsed_ms = duration_cast<milliseconds>(end - start).count();
|
||||
if (eigensolver.info() == Success) {
|
||||
cout << "Eigenvalue decomposition time: " << elapsed_ms << " ms" << endl;
|
||||
} else {
|
||||
cout << "Eigenvalue decomposition failed." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Function simulating a real-world FSI risk computation scenario.
|
||||
// Example: Compute covariance matrix from simulated asset returns, then perform
|
||||
// eigenvalue decomposition.
|
||||
void benchmarkFSIRiskComputation(int numPeriods, int numAssets) {
|
||||
cout << "\n--- FSI Risk Computation Benchmark ---" << endl;
|
||||
cout << "Simulating " << numPeriods << " periods for " << numAssets
|
||||
<< " assets." << endl;
|
||||
|
||||
// Simulate asset returns: each column represents an asset's returns.
|
||||
MatrixXd returns = MatrixXd::Random(numPeriods, numAssets);
|
||||
|
||||
// Compute covariance matrix: cov = (returns^T * returns) / (numPeriods - 1)
|
||||
auto start = high_resolution_clock::now();
|
||||
MatrixXd cov = (returns.transpose() * returns) / (numPeriods - 1);
|
||||
auto end = high_resolution_clock::now();
|
||||
double cov_time = duration_cast<milliseconds>(end - start).count();
|
||||
cout << "Covariance matrix computation time: " << cov_time << " ms" << endl;
|
||||
|
||||
// Eigenvalue decomposition on covariance matrix.
|
||||
SelfAdjointEigenSolver<MatrixXd> eigensolver;
|
||||
start = high_resolution_clock::now();
|
||||
eigensolver.compute(cov);
|
||||
end = high_resolution_clock::now();
|
||||
double eig_time = duration_cast<milliseconds>(end - start).count();
|
||||
if (eigensolver.info() == Success) {
|
||||
cout << "Eigenvalue decomposition (covariance) time: " << eig_time << " ms"
|
||||
<< endl;
|
||||
cout << "Top 3 Eigenvalues: "
|
||||
<< eigensolver.eigenvalues().tail(3).transpose() << endl;
|
||||
} else {
|
||||
cout << "Eigenvalue decomposition failed." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
cout << "=== AOCL Benchmark for Eigen on AMD Platforms ===" << endl;
|
||||
cout << "Developer: Sharad Saurabh Bhaskar (shbhaska@amd.com)" << endl;
|
||||
cout << "Organization: Advanced Micro Devices, Inc." << endl;
|
||||
cout << "License: Mozilla Public License 2.0" << endl << endl;
|
||||
|
||||
// Print AOCL configuration
|
||||
#ifdef EIGEN_USE_AOCL_MT
|
||||
cout << "AOCL Mode: MULTITHREADED (MT)" << endl;
|
||||
cout << "Features: Multithreaded BLIS, AOCL VML, LAPACK" << endl;
|
||||
#elif defined(EIGEN_USE_AOCL_ALL)
|
||||
cout << "AOCL Mode: SINGLE-THREADED (ALL)" << endl;
|
||||
cout << "Features: Single-threaded BLIS, AOCL VML, LAPACK" << endl;
|
||||
#else
|
||||
cout << "AOCL Mode: DISABLED" << endl;
|
||||
cout << "Using standard Eigen implementation" << endl;
|
||||
#endif
|
||||
cout << "Hardware threads available: " << thread::hardware_concurrency() << endl << endl;
|
||||
|
||||
// Benchmark vector math functions with varying vector sizes.
|
||||
vector<int> vectorSizes = {5000000, 10000000, 50000000};
|
||||
for (int size : vectorSizes) {
|
||||
benchmarkVectorMath(size);
|
||||
}
|
||||
|
||||
// Benchmark matrix multiplication for varying sizes.
|
||||
vector<int> matrixSizes = {1024};
|
||||
for (int msize : matrixSizes) {
|
||||
benchmarkMatrixMultiplication(msize);
|
||||
#if defined(EIGEN_AOCL_USE_BLIS_MT)
|
||||
benchmarkBlisMultithreaded(msize, thread::hardware_concurrency());
|
||||
#endif
|
||||
}
|
||||
|
||||
// Benchmark LAPACK: Eigenvalue Decomposition.
|
||||
for (int msize : matrixSizes) {
|
||||
benchmarkEigenDecomposition(msize);
|
||||
}
|
||||
|
||||
// Benchmark a complex FSI risk computation scenario.
|
||||
// For example, simulate 10,000 time periods (days) for 500 assets.
|
||||
benchmarkFSIRiskComputation(10000, 500);
|
||||
|
||||
cout << "\n=== Benchmark Complete ===" << endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
#!/bin/bash
|
||||
CXX=${CXX-g++} # default value unless caller has defined CXX
|
||||
echo "Fixed size 3x3, column-major, -DNDEBUG"
|
||||
$CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
|
||||
echo "Fixed size 3x3, column-major, with asserts"
|
||||
$CXX -O3 -I .. benchmark.cpp -o benchmark && time ./benchmark >/dev/null
|
||||
echo "Fixed size 3x3, row-major, -DNDEBUG"
|
||||
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
|
||||
echo "Fixed size 3x3, row-major, with asserts"
|
||||
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmark.cpp -o benchmark && time ./benchmark >/dev/null
|
||||
echo "Dynamic size 20x20, column-major, -DNDEBUG"
|
||||
$CXX -O3 -I .. -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
|
||||
echo "Dynamic size 20x20, column-major, with asserts"
|
||||
$CXX -O3 -I .. benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
|
||||
echo "Dynamic size 20x20, row-major, -DNDEBUG"
|
||||
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
|
||||
echo "Dynamic size 20x20, row-major, with asserts"
|
||||
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
|
||||
@@ -1,107 +0,0 @@
|
||||
project(BTL)
|
||||
|
||||
cmake_minimum_required(VERSION 2.6.2)
|
||||
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${Eigen_SOURCE_DIR}/cmake)
|
||||
include(MacroOptionalAddSubdirectory)
|
||||
|
||||
option(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF)
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
|
||||
string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER})
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
|
||||
set(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}")
|
||||
set(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}")
|
||||
if(BTL_NOVEC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
|
||||
endif(BTL_NOVEC)
|
||||
endif(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
|
||||
|
||||
if(MSVC)
|
||||
set(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG")
|
||||
# set(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG")
|
||||
if(BTL_NOVEC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
|
||||
endif(BTL_NOVEC)
|
||||
endif(MSVC)
|
||||
|
||||
if(IS_ICPC)
|
||||
set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}")
|
||||
set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}")
|
||||
endif()
|
||||
|
||||
include_directories(
|
||||
${PROJECT_SOURCE_DIR}/actions
|
||||
${PROJECT_SOURCE_DIR}/generic_bench
|
||||
${PROJECT_SOURCE_DIR}/generic_bench/utils
|
||||
${PROJECT_SOURCE_DIR}/libs/STL)
|
||||
|
||||
# find_package(MKL)
|
||||
# if (MKL_FOUND)
|
||||
# add_definitions(-DHAVE_MKL)
|
||||
# set(DEFAULT_LIBRARIES ${MKL_LIBRARIES})
|
||||
# endif ()
|
||||
|
||||
find_library(EIGEN_BTL_RT_LIBRARY rt)
|
||||
# if we cannot find it easily, then we don't need it!
|
||||
if(NOT EIGEN_BTL_RT_LIBRARY)
|
||||
set(EIGEN_BTL_RT_LIBRARY "")
|
||||
endif()
|
||||
|
||||
macro(BTL_ADD_BENCH targetname)
|
||||
|
||||
foreach(_current_var ${ARGN})
|
||||
set(_last_var ${_current_var})
|
||||
endforeach()
|
||||
|
||||
set(_sources ${ARGN})
|
||||
list(LENGTH _sources _argn_length)
|
||||
|
||||
list(REMOVE_ITEM _sources ON OFF TRUE FALSE)
|
||||
|
||||
list(LENGTH _sources _src_length)
|
||||
|
||||
if (${_argn_length} EQUAL ${_src_length})
|
||||
set(_last_var ON)
|
||||
endif ()
|
||||
|
||||
option(BUILD_${targetname} "Build benchmark ${targetname}" ${_last_var})
|
||||
|
||||
if(BUILD_${targetname})
|
||||
add_executable(${targetname} ${_sources})
|
||||
add_test(${targetname} "${targetname}")
|
||||
target_link_libraries(${targetname} ${DEFAULT_LIBRARIES} ${EIGEN_BTL_RT_LIBRARY})
|
||||
endif(BUILD_${targetname})
|
||||
|
||||
endmacro(BTL_ADD_BENCH)
|
||||
|
||||
macro(btl_add_target_property target prop value)
|
||||
|
||||
if(BUILD_${target})
|
||||
get_target_property(previous ${target} ${prop})
|
||||
if(NOT previous)
|
||||
set(previous "")
|
||||
endif()
|
||||
set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}")
|
||||
endif()
|
||||
|
||||
endmacro()
|
||||
|
||||
enable_testing()
|
||||
|
||||
add_subdirectory(libs/eigen3)
|
||||
add_subdirectory(libs/eigen2)
|
||||
add_subdirectory(libs/tensors)
|
||||
add_subdirectory(libs/BLAS)
|
||||
add_subdirectory(libs/ublas)
|
||||
add_subdirectory(libs/gmm)
|
||||
add_subdirectory(libs/mtl4)
|
||||
add_subdirectory(libs/blitz)
|
||||
add_subdirectory(libs/tvmet)
|
||||
add_subdirectory(libs/STL)
|
||||
add_subdirectory(libs/blaze)
|
||||
|
||||
add_subdirectory(data)
|
||||
|
||||
|
||||
@@ -1,340 +0,0 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
||||
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Library General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Library General
|
||||
Public License instead of this License.
|
||||
154
bench/btl/README
154
bench/btl/README
@@ -1,154 +0,0 @@
|
||||
Bench Template Library
|
||||
|
||||
****************************************
|
||||
Introduction :
|
||||
|
||||
The aim of this project is to compare the performance
|
||||
of available numerical libraries. The code is designed
|
||||
as generic and modular as possible. Thus, adding new
|
||||
numerical libraries or new numerical tests should
|
||||
require minimal effort.
|
||||
|
||||
|
||||
*****************************************
|
||||
|
||||
Installation :
|
||||
|
||||
BTL uses cmake / ctest:
|
||||
|
||||
1 - create a build directory:
|
||||
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
|
||||
2 - configure:
|
||||
|
||||
$ ccmake ..
|
||||
|
||||
3 - run the bench using ctest:
|
||||
|
||||
$ ctest -V
|
||||
|
||||
You can run the benchmarks only on libraries matching a given regular expression:
|
||||
ctest -V -R <regexp>
|
||||
For instance:
|
||||
ctest -V -R eigen2
|
||||
|
||||
You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
|
||||
BTL_CONFIG="-a action1{:action2}*" ctest -V
|
||||
An example:
|
||||
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
|
||||
|
||||
Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
|
||||
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
|
||||
|
||||
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
|
||||
If gnuplot is available, choose a directory name in the data directory to store the results and type:
|
||||
$ cd data
|
||||
$ mkdir my_directory
|
||||
$ cp ../libs/*/*.dat my_directory
|
||||
Build the data utilities in this (data) directory
|
||||
make
|
||||
Then you can look the raw data,
|
||||
go_mean my_directory
|
||||
or smooth the data first :
|
||||
smooth_all.sh my_directory
|
||||
go_mean my_directory_smooth
|
||||
|
||||
|
||||
*************************************************
|
||||
|
||||
Files and directories :
|
||||
|
||||
generic_bench : all the bench sources common to all libraries
|
||||
|
||||
actions : sources for different action wrappers (axpy, matrix-matrix product) to be tested.
|
||||
|
||||
libs/* : bench sources specific to each tested libraries.
|
||||
|
||||
machine_dep : directory used to store machine specific Makefile.in
|
||||
|
||||
data : directory used to store gnuplot scripts and data analysis utilities
|
||||
|
||||
**************************************************
|
||||
|
||||
Principles : the code modularity is achieved by defining two concepts :
|
||||
|
||||
****** Action concept : This is a class defining which kind
|
||||
of test must be performed (e.g. a matrix_vector_product).
|
||||
An Action should define the following methods :
|
||||
|
||||
*** Ctor using the size of the problem (matrix or vector size) as an argument
|
||||
Action action(size);
|
||||
*** initialize : this method initialize the calculation (e.g. initialize the matrices and vectors arguments)
|
||||
action.initialize();
|
||||
*** calculate : this method actually launch the calculation to be benchmarked
|
||||
action.calculate;
|
||||
*** nb_op_base() : this method returns the complexity of the calculate method (allowing the mflops evaluation)
|
||||
*** name() : this method returns the name of the action (std::string)
|
||||
|
||||
****** Interface concept : This is a class or namespace defining how to use a given library and
|
||||
its specific containers (matrix and vector). Up to now an interface should following types
|
||||
|
||||
*** real_type : kind of float to be used (float or double)
|
||||
*** stl_vector : must correspond to std::vector<real_type>
|
||||
*** stl_matrix : must correspond to std::vector<stl_vector>
|
||||
*** gene_vector : the vector type for this interface --> e.g. (real_type *) for the C_interface
|
||||
*** gene_matrix : the matrix type for this interface --> e.g. (gene_vector *) for the C_interface
|
||||
|
||||
+ the following common methods
|
||||
|
||||
*** free_matrix(gene_matrix & A, int N) dealocation of a N sized gene_matrix A
|
||||
*** free_vector(gene_vector & B) dealocation of a N sized gene_vector B
|
||||
*** matrix_from_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an stl_matrix A_stl into a gene_matrix A.
|
||||
The allocation of A is done in this function.
|
||||
*** vector_to_stl(gene_vector & B, stl_vector & B_stl) copy the content of an stl_vector B_stl into a gene_vector B.
|
||||
The allocation of B is done in this function.
|
||||
*** matrix_to_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an gene_matrix A into an stl_matrix A_stl.
|
||||
The size of A_STL must corresponds to the size of A.
|
||||
*** vector_to_stl(gene_vector & A, stl_vector & A_stl) copy the content of an gene_vector A into an stl_vector A_stl.
|
||||
The size of B_STL must corresponds to the size of B.
|
||||
*** copy_matrix(gene_matrix & source, gene_matrix & cible, int N) : copy the content of source in cible. Both source
|
||||
and cible must be sized NxN.
|
||||
*** copy_vector(gene_vector & source, gene_vector & cible, int N) : copy the content of source in cible. Both source
|
||||
and cible must be sized N.
|
||||
|
||||
and the following method corresponding to the action one wants to be benchmarked :
|
||||
|
||||
*** matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
*** matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
|
||||
*** ata_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
*** aat_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
*** axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
|
||||
|
||||
The bench algorithm (generic_bench/bench.hh) is templated with an action itself templated with
|
||||
an interface. A typical main.cpp source stored in a given library directory libs/A_LIB
|
||||
looks like :
|
||||
|
||||
bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
|
||||
|
||||
this function will produce XY data file containing measured mflops as a function of the size for 50
|
||||
sizes between 10 and 10000.
|
||||
|
||||
This algorithm can be adapted by providing a given Perf_Analyzer object which determines how the time
|
||||
measurements must be done. For example, the X86_Perf_Analyzer use the asm rdtsc function and provides
|
||||
a very fast and accurate (but less portable) timing method. The default is the Portable_Perf_Analyzer
|
||||
so
|
||||
|
||||
bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
|
||||
|
||||
is equivalent to
|
||||
|
||||
bench< Portable_Perf_Analyzer,AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
|
||||
|
||||
If your system supports it we suggest to use a mixed implementation (X86_perf_Analyzer+Portable_Perf_Analyzer).
|
||||
replace
|
||||
bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
|
||||
with
|
||||
bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
|
||||
in generic/bench.hh
|
||||
|
||||
.
|
||||
|
||||
|
||||
|
||||
@@ -1,118 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_aat_product.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_AAT_PRODUCT
|
||||
#define ACTION_AAT_PRODUCT
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_aat_product {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_aat_product(int size) : _size(size) {
|
||||
MESSAGE("Action_aat_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_matrix<null_function>(X_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_aat_product(const Action_aat_product&) {
|
||||
INFOS("illegal call to Action_aat_product Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_aat_product(void) {
|
||||
MESSAGE("Action_aat_product Dtor");
|
||||
|
||||
// deallocation
|
||||
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "aat_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return double(_size) * double(_size) * double(_size); }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
}
|
||||
|
||||
inline void calculate(void) { Interface::aat_product(A, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
// calculation check
|
||||
|
||||
Interface::matrix_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::aat_product(A_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_matrix X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,118 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_ata_product.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_ATA_PRODUCT
|
||||
#define ACTION_ATA_PRODUCT
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_ata_product {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_ata_product(int size) : _size(size) {
|
||||
MESSAGE("Action_ata_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_matrix<null_function>(X_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_ata_product(const Action_ata_product&) {
|
||||
INFOS("illegal call to Action_ata_product Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_ata_product(void) {
|
||||
MESSAGE("Action_ata_product Dtor");
|
||||
|
||||
// deallocation
|
||||
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "ata_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 2.0 * _size * _size * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
}
|
||||
|
||||
inline void calculate(void) { Interface::ata_product(A, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
// calculation check
|
||||
|
||||
Interface::matrix_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::ata_product(A_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_matrix X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,120 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_atv_product.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_ATV_PRODUCT
|
||||
#define ACTION_ATV_PRODUCT
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_atv_product {
|
||||
public:
|
||||
Action_atv_product(int size) : _size(size) {
|
||||
MESSAGE("Action_atv_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_vector<pseudo_random>(B_stl, _size);
|
||||
init_vector<null_function>(X_stl, _size);
|
||||
init_vector<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::vector_from_stl(B_ref, B_stl);
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_atv_product(const Action_atv_product&) {
|
||||
INFOS("illegal call to Action_atv_product Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
~Action_atv_product(void) {
|
||||
MESSAGE("Action_atv_product Dtor");
|
||||
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_vector(B);
|
||||
Interface::free_vector(X);
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_vector(B_ref);
|
||||
Interface::free_vector(X_ref);
|
||||
}
|
||||
|
||||
static inline std::string name() { return "atv_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_vector(B_ref, B, _size);
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void calculate(void) {
|
||||
BTL_ASM_COMMENT("begin atv");
|
||||
Interface::atv_product(A, B, X, _size);
|
||||
BTL_ASM_COMMENT("end atv");
|
||||
}
|
||||
|
||||
void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
Interface::vector_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::atv_product(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_vector B_ref;
|
||||
typename Interface::gene_vector X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,116 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_axpby.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_AXPBY
|
||||
#define ACTION_AXPBY
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_axpby {
|
||||
public:
|
||||
// Ctor
|
||||
Action_axpby(int size) : _alpha(0.5), _beta(0.95), _size(size) {
|
||||
MESSAGE("Action_axpby Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_vector<pseudo_random>(X_stl, _size);
|
||||
init_vector<pseudo_random>(Y_stl, _size);
|
||||
init_vector<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
Interface::vector_from_stl(Y_ref, Y_stl);
|
||||
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
Interface::vector_from_stl(Y, Y_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_axpby(const Action_axpby&) {
|
||||
INFOS("illegal call to Action_axpby Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
~Action_axpby(void) {
|
||||
MESSAGE("Action_axpby Dtor");
|
||||
|
||||
// deallocation
|
||||
Interface::free_vector(X_ref);
|
||||
Interface::free_vector(Y_ref);
|
||||
|
||||
Interface::free_vector(X);
|
||||
Interface::free_vector(Y);
|
||||
}
|
||||
|
||||
// action name
|
||||
static inline std::string name(void) { return "axpby_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 3.0 * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
Interface::copy_vector(Y_ref, Y, _size);
|
||||
}
|
||||
|
||||
inline void calculate(void) {
|
||||
BTL_ASM_COMMENT("mybegin axpby");
|
||||
Interface::axpby(_alpha, X, _beta, Y, _size);
|
||||
BTL_ASM_COMMENT("myend axpby");
|
||||
}
|
||||
|
||||
void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
// calculation check
|
||||
Interface::vector_to_stl(Y, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::axpby(_alpha, X_stl, _beta, Y_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector Y_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
||||
typename Interface::gene_vector X_ref;
|
||||
typename Interface::gene_vector Y_ref;
|
||||
|
||||
typename Interface::gene_vector X;
|
||||
typename Interface::gene_vector Y;
|
||||
|
||||
typename Interface::real_type _alpha;
|
||||
typename Interface::real_type _beta;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,124 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_axpy.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_AXPY
|
||||
#define ACTION_AXPY
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_axpy {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_axpy(int size) : _coef(1.0), _size(size) {
|
||||
MESSAGE("Action_axpy Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
|
||||
init_vector<pseudo_random>(X_stl, _size);
|
||||
init_vector<pseudo_random>(Y_stl, _size);
|
||||
init_vector<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
Interface::vector_from_stl(Y_ref, Y_stl);
|
||||
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
Interface::vector_from_stl(Y, Y_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_axpy(const Action_axpy&) {
|
||||
INFOS("illegal call to Action_axpy Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_axpy(void) {
|
||||
MESSAGE("Action_axpy Dtor");
|
||||
|
||||
// deallocation
|
||||
|
||||
Interface::free_vector(X_ref);
|
||||
Interface::free_vector(Y_ref);
|
||||
|
||||
Interface::free_vector(X);
|
||||
Interface::free_vector(Y);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "axpy_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 2.0 * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
Interface::copy_vector(Y_ref, Y, _size);
|
||||
}
|
||||
|
||||
inline void calculate(void) {
|
||||
BTL_ASM_COMMENT("mybegin axpy");
|
||||
Interface::axpy(_coef, X, Y, _size);
|
||||
BTL_ASM_COMMENT("myend axpy");
|
||||
}
|
||||
|
||||
void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
// calculation check
|
||||
|
||||
Interface::vector_to_stl(Y, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::axpy(_coef, X_stl, Y_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector Y_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
||||
typename Interface::gene_vector X_ref;
|
||||
typename Interface::gene_vector Y_ref;
|
||||
|
||||
typename Interface::gene_vector X;
|
||||
typename Interface::gene_vector Y;
|
||||
|
||||
typename Interface::real_type _coef;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,110 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_cholesky.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_CHOLESKY
|
||||
#define ACTION_CHOLESKY
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_cholesky {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_cholesky(int size) : _size(size) {
|
||||
MESSAGE("Action_cholesky Ctor");
|
||||
|
||||
// STL mat/vec initialization
|
||||
init_matrix_symm<pseudo_random>(X_stl, _size);
|
||||
init_matrix<null_function>(C_stl, _size);
|
||||
|
||||
// make sure X is invertible
|
||||
for (int i = 0; i < _size; ++i) X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
Interface::matrix_from_stl(C, C_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
double r = std::max(_size - j - 1, 0);
|
||||
_cost += 2 * (r * j + r + j);
|
||||
}
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_cholesky(const Action_cholesky&) {
|
||||
INFOS("illegal call to Action_cholesky Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_cholesky(void) {
|
||||
MESSAGE("Action_cholesky Dtor");
|
||||
|
||||
// deallocation
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
Interface::free_matrix(C, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "cholesky_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate(void) { Interface::cholesky(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
// STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
|
||||
//
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
|
||||
//
|
||||
// if (error>1.e-6){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
|
||||
typename Interface::gene_matrix X_ref;
|
||||
typename Interface::gene_matrix X;
|
||||
typename Interface::gene_matrix C;
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,114 +0,0 @@
|
||||
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_GER
|
||||
#define ACTION_GER
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_ger {
|
||||
public:
|
||||
// Ctor
|
||||
BTL_DONT_INLINE Action_ger(int size) : _size(size) {
|
||||
MESSAGE("Action_ger Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
typename Interface::stl_matrix tmp;
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_vector<pseudo_random>(B_stl, _size);
|
||||
init_vector<pseudo_random>(X_stl, _size);
|
||||
init_vector<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::vector_from_stl(B_ref, B_stl);
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_ger(const Action_ger&) {
|
||||
INFOS("illegal call to Action_ger Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
BTL_DONT_INLINE ~Action_ger(void) {
|
||||
MESSAGE("Action_ger Dtor");
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_vector(B);
|
||||
Interface::free_vector(X);
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_vector(B_ref);
|
||||
Interface::free_vector(X_ref);
|
||||
}
|
||||
|
||||
// action name
|
||||
static inline std::string name(void) { return "ger_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_vector(B_ref, B, _size);
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void calculate(void) {
|
||||
BTL_ASM_COMMENT("#begin ger");
|
||||
Interface::ger(A, B, X, _size);
|
||||
BTL_ASM_COMMENT("end ger");
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void check_result(void) {
|
||||
// calculation check
|
||||
Interface::vector_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::ger(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-3) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_vector B_ref;
|
||||
typename Interface::gene_vector X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,200 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_hessenberg.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_HESSENBERG
|
||||
#define ACTION_HESSENBERG
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_hessenberg {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_hessenberg(int size) : _size(size) {
|
||||
MESSAGE("Action_hessenberg Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_matrix<pseudo_random>(X_stl, _size);
|
||||
|
||||
init_matrix<null_function>(C_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
Interface::matrix_from_stl(C, C_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j = 0; j < _size - 2; ++j) {
|
||||
double r = std::max(0, _size - j - 1);
|
||||
double b = std::max(0, _size - j - 2);
|
||||
_cost += 6 + 3 * b + r * r * 4 + r * _size * 4;
|
||||
}
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_hessenberg(const Action_hessenberg&) {
|
||||
INFOS("illegal call to Action_hessenberg Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_hessenberg(void) {
|
||||
MESSAGE("Action_hessenberg Dtor");
|
||||
|
||||
// deallocation
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
Interface::free_matrix(C, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "hessenberg_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate(void) { Interface::hessenberg(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
Interface::matrix_to_stl(C, resu_stl);
|
||||
|
||||
// STL_interface<typename Interface::real_type>::hessenberg(X_stl,C_stl,_size);
|
||||
//
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
|
||||
//
|
||||
// if (error>1.e-6){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix X_ref;
|
||||
typename Interface::gene_matrix X;
|
||||
typename Interface::gene_matrix C;
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
};
|
||||
|
||||
template <class Interface>
|
||||
class Action_tridiagonalization {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_tridiagonalization(int size) : _size(size) {
|
||||
MESSAGE("Action_tridiagonalization Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_matrix<pseudo_random>(X_stl, _size);
|
||||
|
||||
for (int i = 0; i < _size; ++i) {
|
||||
for (int j = 0; j < i; ++j) X_stl[i][j] = X_stl[j][i];
|
||||
}
|
||||
|
||||
init_matrix<null_function>(C_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
Interface::matrix_from_stl(C, C_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j = 0; j < _size - 2; ++j) {
|
||||
double r = std::max(0, _size - j - 1);
|
||||
double b = std::max(0, _size - j - 2);
|
||||
_cost += 6. + 3. * b + r * r * 8.;
|
||||
}
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_tridiagonalization(const Action_tridiagonalization&) {
|
||||
INFOS("illegal call to Action_tridiagonalization Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_tridiagonalization(void) {
|
||||
MESSAGE("Action_tridiagonalization Dtor");
|
||||
|
||||
// deallocation
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
Interface::free_matrix(C, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "tridiagonalization_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate(void) { Interface::tridiagonalization(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
Interface::matrix_to_stl(C, resu_stl);
|
||||
|
||||
// STL_interface<typename Interface::real_type>::tridiagonalization(X_stl,C_stl,_size);
|
||||
//
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
|
||||
//
|
||||
// if (error>1.e-6){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix X_ref;
|
||||
typename Interface::gene_matrix X;
|
||||
typename Interface::gene_matrix C;
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,108 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_lu_decomp.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_LU_DECOMP
|
||||
#define ACTION_LU_DECOMP
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_lu_decomp {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_lu_decomp(int size) : _size(size) {
|
||||
MESSAGE("Action_lu_decomp Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_matrix<pseudo_random>(X_stl, _size);
|
||||
|
||||
init_matrix<null_function>(C_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
Interface::matrix_from_stl(C, C_stl);
|
||||
|
||||
_cost = 2.0 * size * size * size / 3.0 + size * size;
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_lu_decomp(const Action_lu_decomp&) {
|
||||
INFOS("illegal call to Action_lu_decomp Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_lu_decomp(void) {
|
||||
MESSAGE("Action_lu_decomp Dtor");
|
||||
|
||||
// deallocation
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
Interface::free_matrix(C, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "complete_lu_decomp_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate(void) { Interface::lu_decomp(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
Interface::matrix_to_stl(C, resu_stl);
|
||||
|
||||
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
|
||||
//
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
|
||||
//
|
||||
// if (error>1.e-6){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix X_ref;
|
||||
typename Interface::gene_matrix X;
|
||||
typename Interface::gene_matrix C;
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,120 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_lu_solve.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_LU_SOLVE
|
||||
#define ACTION_LU_SOLVE
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_lu_solve {
|
||||
public:
|
||||
static inline std::string name(void) { return "lu_solve_" + Interface::name(); }
|
||||
|
||||
static double nb_op_base(int size) {
|
||||
return 2.0 * size * size * size / 3.0; // questionable but not really important
|
||||
}
|
||||
|
||||
static double calculate(int nb_calc, int size) {
|
||||
// STL matrix and vector initialization
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, size);
|
||||
init_vector<pseudo_random>(B_stl, size);
|
||||
init_vector<null_function>(X_stl, size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
typename Interface::gene_matrix LU;
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
Interface::matrix_from_stl(LU, A_stl);
|
||||
|
||||
// local variable :
|
||||
|
||||
typename Interface::Pivot_Vector pivot; // pivot vector
|
||||
Interface::new_Pivot_Vector(pivot, size);
|
||||
|
||||
// timer utilities
|
||||
|
||||
Portable_Timer chronos;
|
||||
|
||||
// time measurement
|
||||
|
||||
chronos.start();
|
||||
|
||||
for (int ii = 0; ii < nb_calc; ii++) {
|
||||
// LU factorization
|
||||
Interface::copy_matrix(A, LU, size);
|
||||
Interface::LU_factor(LU, pivot, size);
|
||||
|
||||
// LU solve
|
||||
|
||||
Interface::LU_solve(LU, pivot, B, X, size);
|
||||
}
|
||||
|
||||
// Time stop
|
||||
|
||||
chronos.stop();
|
||||
|
||||
double time = chronos.user_time();
|
||||
|
||||
// check result :
|
||||
|
||||
typename Interface::stl_vector B_new_stl(size);
|
||||
Interface::vector_to_stl(X, X_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, X_stl, B_new_stl, size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(B_stl, B_new_stl);
|
||||
|
||||
if (error > 1.e-5) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
STL_interface<typename Interface::real_type>::display_vector(B_stl);
|
||||
STL_interface<typename Interface::real_type>::display_vector(B_new_stl);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// deallocation and return time
|
||||
|
||||
Interface::free_matrix(A, size);
|
||||
Interface::free_vector(B);
|
||||
Interface::free_vector(X);
|
||||
Interface::free_Pivot_Vector(pivot);
|
||||
|
||||
return time;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,124 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_matrix_matrix_product.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_MATRIX_MATRIX_PRODUCT
|
||||
#define ACTION_MATRIX_MATRIX_PRODUCT
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_matrix_matrix_product {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_matrix_matrix_product(int size) : _size(size) {
|
||||
MESSAGE("Action_matrix_matrix_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_matrix<pseudo_random>(B_stl, _size);
|
||||
init_matrix<null_function>(X_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(B_ref, B_stl);
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(B, B_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_matrix_matrix_product(const Action_matrix_matrix_product&) {
|
||||
INFOS("illegal call to Action_matrix_matrix_product Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_matrix_matrix_product(void) {
|
||||
MESSAGE("Action_matrix_matrix_product Dtor");
|
||||
|
||||
// deallocation
|
||||
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_matrix(B, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(B_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 2.0 * _size * _size * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(B_ref, B, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
}
|
||||
|
||||
inline void calculate(void) { Interface::matrix_matrix_product(A, B, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
if (_size < 200) {
|
||||
Interface::matrix_to_stl(X, resu_stl);
|
||||
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, _size);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix B_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_matrix B_ref;
|
||||
typename Interface::gene_matrix X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_matrix B;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,131 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_matrix_matrix_product_bis.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_MATRIX_MATRIX_PRODUCT_BIS
|
||||
#define ACTION_MATRIX_MATRIX_PRODUCT_BIS
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include "STL_timer.hh"
|
||||
#include <string>
|
||||
#include "init_function.hh"
|
||||
#include "init_vector.hh"
|
||||
#include "init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_matrix_matrix_product_bis {
|
||||
public:
|
||||
static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
|
||||
|
||||
static double nb_op_base(int size) { return 2.0 * size * size * size; }
|
||||
|
||||
static double calculate(int nb_calc, int size) {
|
||||
// STL matrix and vector initialization
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix B_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, size);
|
||||
init_matrix<pseudo_random>(B_stl, size);
|
||||
init_matrix<null_function>(X_stl, size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_matrix B_ref;
|
||||
typename Interface::gene_matrix X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_matrix B;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(B_ref, B_stl);
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(B, B_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
// STL_timer utilities
|
||||
|
||||
STL_timer chronos;
|
||||
|
||||
// Baseline evaluation
|
||||
|
||||
chronos.start_baseline(nb_calc);
|
||||
|
||||
do {
|
||||
Interface::copy_matrix(A_ref, A, size);
|
||||
Interface::copy_matrix(B_ref, B, size);
|
||||
Interface::copy_matrix(X_ref, X, size);
|
||||
|
||||
// Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!!
|
||||
} while (chronos.check());
|
||||
|
||||
chronos.report(true);
|
||||
|
||||
// Time measurement
|
||||
|
||||
chronos.start(nb_calc);
|
||||
|
||||
do {
|
||||
Interface::copy_matrix(A_ref, A, size);
|
||||
Interface::copy_matrix(B_ref, B, size);
|
||||
Interface::copy_matrix(X_ref, X, size);
|
||||
|
||||
Interface::matrix_matrix_product(A, B, X, size); // here it is not commented !!!!
|
||||
} while (chronos.check());
|
||||
|
||||
chronos.report(true);
|
||||
|
||||
double time = chronos.calculated_time / 2000.0;
|
||||
|
||||
// calculation check
|
||||
|
||||
typename Interface::stl_matrix resu_stl(size);
|
||||
|
||||
Interface::matrix_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// deallocation and return time
|
||||
|
||||
Interface::free_matrix(A, size);
|
||||
Interface::free_matrix(B, size);
|
||||
Interface::free_matrix(X, size);
|
||||
|
||||
Interface::free_matrix(A_ref, size);
|
||||
Interface::free_matrix(B_ref, size);
|
||||
Interface::free_matrix(X_ref, size);
|
||||
|
||||
return time;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,129 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_matrix_vector_product.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_MATRIX_VECTOR_PRODUCT
|
||||
#define ACTION_MATRIX_VECTOR_PRODUCT
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_matrix_vector_product {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
BTL_DONT_INLINE Action_matrix_vector_product(int size) : _size(size) {
|
||||
MESSAGE("Action_matrix_vector_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_vector<pseudo_random>(B_stl, _size);
|
||||
init_vector<null_function>(X_stl, _size);
|
||||
init_vector<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::vector_from_stl(B_ref, B_stl);
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_matrix_vector_product(const Action_matrix_vector_product&) {
|
||||
INFOS("illegal call to Action_matrix_vector_product Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
BTL_DONT_INLINE ~Action_matrix_vector_product(void) {
|
||||
MESSAGE("Action_matrix_vector_product Dtor");
|
||||
|
||||
// deallocation
|
||||
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_vector(B);
|
||||
Interface::free_vector(X);
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_vector(B_ref);
|
||||
Interface::free_vector(X_ref);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "matrix_vector_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_vector(B_ref, B, _size);
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void calculate(void) {
|
||||
BTL_ASM_COMMENT("#begin matrix_vector_product");
|
||||
Interface::matrix_vector_product(A, B, X, _size);
|
||||
BTL_ASM_COMMENT("end matrix_vector_product");
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void check_result(void) {
|
||||
// calculation check
|
||||
|
||||
Interface::vector_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-5) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_vector B_ref;
|
||||
typename Interface::gene_vector X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,108 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_lu_decomp.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_PARTIAL_LU
|
||||
#define ACTION_PARTIAL_LU
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_partial_lu {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_partial_lu(int size) : _size(size) {
|
||||
MESSAGE("Action_partial_lu Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_matrix<pseudo_random>(X_stl, _size);
|
||||
init_matrix<null_function>(C_stl, _size);
|
||||
|
||||
// make sure X is invertible
|
||||
for (int i = 0; i < _size; ++i) X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
Interface::matrix_from_stl(C, C_stl);
|
||||
|
||||
_cost = 2.0 * size * size * size / 3.0 + size * size;
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_partial_lu(const Action_partial_lu&) {
|
||||
INFOS("illegal call to Action_partial_lu Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_partial_lu(void) {
|
||||
MESSAGE("Action_partial_lu Dtor");
|
||||
|
||||
// deallocation
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
Interface::free_matrix(C, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "partial_lu_decomp_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate(void) { Interface::partial_lu_decomp(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
// Interface::matrix_to_stl(C,resu_stl);
|
||||
|
||||
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
|
||||
//
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
|
||||
//
|
||||
// if (error>1.e-6){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
|
||||
typename Interface::gene_matrix X_ref;
|
||||
typename Interface::gene_matrix X;
|
||||
typename Interface::gene_matrix C;
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,104 +0,0 @@
|
||||
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_ROT
|
||||
#define ACTION_ROT
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_rot {
|
||||
public:
|
||||
// Ctor
|
||||
BTL_DONT_INLINE Action_rot(int size) : _size(size) {
|
||||
MESSAGE("Action_rot Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
typename Interface::stl_matrix tmp;
|
||||
init_vector<pseudo_random>(A_stl, _size);
|
||||
init_vector<pseudo_random>(B_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::vector_from_stl(A_ref, A_stl);
|
||||
Interface::vector_from_stl(A, A_stl);
|
||||
Interface::vector_from_stl(B_ref, B_stl);
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_rot(const Action_rot&) {
|
||||
INFOS("illegal call to Action_rot Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
BTL_DONT_INLINE ~Action_rot(void) {
|
||||
MESSAGE("Action_rot Dtor");
|
||||
Interface::free_vector(A);
|
||||
Interface::free_vector(B);
|
||||
Interface::free_vector(A_ref);
|
||||
Interface::free_vector(B_ref);
|
||||
}
|
||||
|
||||
// action name
|
||||
static inline std::string name(void) { return "rot_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 6.0 * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
Interface::copy_vector(A_ref, A, _size);
|
||||
Interface::copy_vector(B_ref, B, _size);
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void calculate(void) {
|
||||
BTL_ASM_COMMENT("#begin rot");
|
||||
Interface::rot(A, B, 0.5, 0.6, _size);
|
||||
BTL_ASM_COMMENT("end rot");
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void check_result(void) {
|
||||
// calculation check
|
||||
// Interface::vector_to_stl(X,resu_stl);
|
||||
|
||||
// STL_interface<typename Interface::real_type>::rot(A_stl,B_stl,X_stl,_size);
|
||||
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
|
||||
// if (error>1.e-3){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_vector A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
|
||||
typename Interface::gene_vector A_ref;
|
||||
typename Interface::gene_vector B_ref;
|
||||
|
||||
typename Interface::gene_vector A;
|
||||
typename Interface::gene_vector B;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,121 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_symv.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_SYMV
|
||||
#define ACTION_SYMV
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_symv {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
BTL_DONT_INLINE Action_symv(int size) : _size(size) {
|
||||
MESSAGE("Action_symv Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
init_matrix_symm<pseudo_random>(A_stl, _size);
|
||||
init_vector<pseudo_random>(B_stl, _size);
|
||||
init_vector<null_function>(X_stl, _size);
|
||||
init_vector<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::vector_from_stl(B_ref, B_stl);
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_symv(const Action_symv&) {
|
||||
INFOS("illegal call to Action_symv Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
BTL_DONT_INLINE ~Action_symv(void) {
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_vector(B);
|
||||
Interface::free_vector(X);
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_vector(B_ref);
|
||||
Interface::free_vector(X_ref);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "symv_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_vector(B_ref, B, _size);
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void calculate(void) {
|
||||
BTL_ASM_COMMENT("#begin symv");
|
||||
Interface::symv(A, B, X, _size);
|
||||
BTL_ASM_COMMENT("end symv");
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
// calculation check
|
||||
Interface::vector_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::symv(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-5) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_vector B_ref;
|
||||
typename Interface::gene_vector X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,118 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_syr2.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_SYR2
|
||||
#define ACTION_SYR2
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_syr2 {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
BTL_DONT_INLINE Action_syr2(int size) : _size(size) {
|
||||
// STL matrix and vector initialization
|
||||
typename Interface::stl_matrix tmp;
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_vector<pseudo_random>(B_stl, _size);
|
||||
init_vector<pseudo_random>(X_stl, _size);
|
||||
init_vector<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::vector_from_stl(B_ref, B_stl);
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_syr2(const Action_syr2&) {
|
||||
INFOS("illegal call to Action_syr2 Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
BTL_DONT_INLINE ~Action_syr2(void) {
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_vector(B);
|
||||
Interface::free_vector(X);
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_vector(B_ref);
|
||||
Interface::free_vector(X_ref);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "syr2_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_vector(B_ref, B, _size);
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void calculate(void) {
|
||||
BTL_ASM_COMMENT("#begin syr2");
|
||||
Interface::syr2(A, B, X, _size);
|
||||
BTL_ASM_COMMENT("end syr2");
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void check_result(void) {
|
||||
// calculation check
|
||||
Interface::vector_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::syr2(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-3) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_vector B_ref;
|
||||
typename Interface::gene_vector X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
int _size;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,119 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_trisolve.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_TRISOLVE
|
||||
#define ACTION_TRISOLVE
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_trisolve {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_trisolve(int size) : _size(size) {
|
||||
MESSAGE("Action_trisolve Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_matrix<pseudo_random>(L_stl, _size);
|
||||
init_vector<pseudo_random>(B_stl, _size);
|
||||
init_vector<null_function>(X_stl, _size);
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
for (int i = 0; i < j; ++i) L_stl[j][i] = 0;
|
||||
L_stl[j][j] += 3;
|
||||
}
|
||||
|
||||
init_vector<null_function>(resu_stl, _size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(L, L_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
_cost += 2 * j + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_trisolve(const Action_trisolve&) {
|
||||
INFOS("illegal call to Action_trisolve Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_trisolve(void) {
|
||||
MESSAGE("Action_trisolve Dtor");
|
||||
|
||||
// deallocation
|
||||
Interface::free_matrix(L, _size);
|
||||
Interface::free_vector(B);
|
||||
Interface::free_vector(X);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "trisolve_vector_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) {
|
||||
// Interface::copy_vector(X_ref,X,_size);
|
||||
}
|
||||
|
||||
inline void calculate(void) { Interface::trisolve_lower(L, B, X, _size); }
|
||||
|
||||
void check_result() {
|
||||
if (_size > 128) return;
|
||||
// calculation check
|
||||
Interface::vector_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::trisolve_lower(L_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-4) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(2);
|
||||
} // else INFOS("CALCULATION OK...residual=" << error);
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix L_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
||||
typename Interface::gene_matrix L;
|
||||
typename Interface::gene_vector X;
|
||||
typename Interface::gene_vector B;
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,139 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_matrix_matrix_product.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_TRISOLVE_MATRIX_PRODUCT
|
||||
#define ACTION_TRISOLVE_MATRIX_PRODUCT
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_trisolve_matrix {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_trisolve_matrix(int size) : _size(size) {
|
||||
MESSAGE("Action_trisolve_matrix Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_matrix<pseudo_random>(B_stl, _size);
|
||||
init_matrix<null_function>(X_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
for (int i = 0; i < j; ++i) A_stl[j][i] = 0;
|
||||
A_stl[j][j] += 3;
|
||||
}
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(B_ref, B_stl);
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(B, B_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
_cost += 2 * j + 1;
|
||||
}
|
||||
_cost *= _size;
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_trisolve_matrix(const Action_trisolve_matrix&) {
|
||||
INFOS("illegal call to Action_trisolve_matrix Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_trisolve_matrix(void) {
|
||||
MESSAGE("Action_trisolve_matrix Dtor");
|
||||
|
||||
// deallocation
|
||||
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_matrix(B, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(B_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "trisolve_matrix_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(B_ref, B, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
}
|
||||
|
||||
inline void calculate(void) { Interface::trisolve_lower_matrix(A, B, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
|
||||
// Interface::matrix_to_stl(X,resu_stl);
|
||||
//
|
||||
// STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
|
||||
//
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
//
|
||||
// if (error>1.e-6){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// // exit(1);
|
||||
// }
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix B_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_matrix B_ref;
|
||||
typename Interface::gene_matrix X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_matrix B;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,139 +0,0 @@
|
||||
//=====================================================
|
||||
// File : action_matrix_matrix_product.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_TRMM
|
||||
#define ACTION_TRMM
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_trmm {
|
||||
public:
|
||||
// Ctor
|
||||
|
||||
Action_trmm(int size) : _size(size) {
|
||||
MESSAGE("Action_trmm Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
init_matrix<pseudo_random>(B_stl, _size);
|
||||
init_matrix<null_function>(X_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
for (int i = 0; i < j; ++i) A_stl[j][i] = 0;
|
||||
A_stl[j][j] += 3;
|
||||
}
|
||||
|
||||
// generic matrix and vector initialization
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(B_ref, B_stl);
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(B, B_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
_cost += 2 * j + 1;
|
||||
}
|
||||
_cost *= _size;
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_trmm(const Action_trmm&) {
|
||||
INFOS("illegal call to Action_trmm Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_trmm(void) {
|
||||
MESSAGE("Action_trmm Dtor");
|
||||
|
||||
// deallocation
|
||||
|
||||
Interface::free_matrix(A, _size);
|
||||
Interface::free_matrix(B, _size);
|
||||
Interface::free_matrix(X, _size);
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(B_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name(void) { return "trmm_" + Interface::name(); }
|
||||
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(B_ref, B, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
}
|
||||
|
||||
inline void calculate(void) { Interface::trmm(A, B, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
|
||||
// Interface::matrix_to_stl(X,resu_stl);
|
||||
//
|
||||
// STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
|
||||
//
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
//
|
||||
// if (error>1.e-6){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// // exit(1);
|
||||
// }
|
||||
}
|
||||
|
||||
private:
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix B_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix A_ref;
|
||||
typename Interface::gene_matrix B_ref;
|
||||
typename Interface::gene_matrix X_ref;
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_matrix B;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,20 +0,0 @@
|
||||
|
||||
#include "action_axpy.hh"
|
||||
#include "action_axpby.hh"
|
||||
|
||||
#include "action_matrix_vector_product.hh"
|
||||
#include "action_atv_product.hh"
|
||||
|
||||
#include "action_matrix_matrix_product.hh"
|
||||
#include "action_ata_product.hh"
|
||||
#include "action_aat_product.hh"
|
||||
|
||||
#include "action_trisolve.hh"
|
||||
#include "action_trmm.hh"
|
||||
#include "action_symv.hh"
|
||||
// #include "action_symm.hh"
|
||||
#include "action_syr2.hh"
|
||||
#include "action_ger.hh"
|
||||
#include "action_rot.hh"
|
||||
|
||||
// #include "action_lu_solve.hh"
|
||||
@@ -1,51 +0,0 @@
|
||||
|
||||
if (ACML_LIBRARIES)
|
||||
set(ACML_FIND_QUIETLY TRUE)
|
||||
endif ()
|
||||
|
||||
find_library(ACML_LIBRARIES
|
||||
NAMES
|
||||
acml_mp acml_mv
|
||||
PATHS
|
||||
$ENV{ACMLDIR}/lib
|
||||
$ENV{ACML_DIR}/lib
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
find_file(ACML_LIBRARIES
|
||||
NAMES
|
||||
libacml_mp.so
|
||||
PATHS
|
||||
/usr/lib
|
||||
/usr/lib64
|
||||
$ENV{ACMLDIR}/lib
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
if(NOT ACML_LIBRARIES)
|
||||
message(STATUS "Multi-threaded library not found, looking for single-threaded")
|
||||
find_library(ACML_LIBRARIES
|
||||
NAMES
|
||||
acml acml_mv
|
||||
PATHS
|
||||
$ENV{ACMLDIR}/lib
|
||||
$ENV{ACML_DIR}/lib
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
find_file(ACML_LIBRARIES
|
||||
libacml.so libacml_mv.so
|
||||
PATHS
|
||||
/usr/lib
|
||||
/usr/lib64
|
||||
$ENV{ACMLDIR}/lib
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(ACML DEFAULT_MSG ACML_LIBRARIES)
|
||||
|
||||
mark_as_advanced(ACML_LIBRARIES)
|
||||
@@ -1,31 +0,0 @@
|
||||
|
||||
if (ATLAS_LIBRARIES)
|
||||
set(ATLAS_FIND_QUIETLY TRUE)
|
||||
endif ()
|
||||
|
||||
find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||
find_library(ATLAS_LIB satlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||
|
||||
find_file(ATLAS_LAPACK NAMES liblapack_atlas.so.3 liblapack.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||
find_library(ATLAS_LAPACK NAMES lapack_atlas lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||
|
||||
find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||
find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||
|
||||
if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
|
||||
|
||||
set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_LIB})
|
||||
|
||||
# search the default lapack lib link to it
|
||||
find_file(ATLAS_REFERENCE_LAPACK liblapack.so.3 PATHS /usr/lib /usr/lib64)
|
||||
find_library(ATLAS_REFERENCE_LAPACK NAMES lapack)
|
||||
# if(ATLAS_REFERENCE_LAPACK)
|
||||
# set(ATLAS_LIBRARIES ${ATLAS_LIBRARIES} ${ATLAS_REFERENCE_LAPACK})
|
||||
# endif()
|
||||
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(ATLAS DEFAULT_MSG ATLAS_LIBRARIES)
|
||||
|
||||
mark_as_advanced(ATLAS_LIBRARIES)
|
||||
@@ -1,31 +0,0 @@
|
||||
# - Try to find eigen2 headers
|
||||
# Once done this will define
|
||||
#
|
||||
# BLAZE_FOUND - system has blaze lib
|
||||
# BLAZE_INCLUDE_DIR - the blaze include directory
|
||||
#
|
||||
# Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
# Adapted from FindEigen.cmake:
|
||||
# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
|
||||
# Redistribution and use is allowed according to the terms of the BSD license.
|
||||
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
|
||||
|
||||
if (BLAZE_INCLUDE_DIR)
|
||||
|
||||
# in cache already
|
||||
set(BLAZE_FOUND TRUE)
|
||||
|
||||
else ()
|
||||
|
||||
find_path(BLAZE_INCLUDE_DIR NAMES blaze/Blaze.h
|
||||
PATHS
|
||||
${INCLUDE_INSTALL_DIR}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(BLAZE DEFAULT_MSG BLAZE_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(BLAZE_INCLUDE_DIR)
|
||||
|
||||
endif()
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
# - Try to find blitz lib
|
||||
# Once done this will define
|
||||
#
|
||||
# BLITZ_FOUND - system has blitz lib
|
||||
# BLITZ_INCLUDES - the blitz include directory
|
||||
# BLITZ_LIBRARIES - The libraries needed to use blitz
|
||||
|
||||
# Copyright (c) 2006, Montel Laurent, <montel@kde.org>
|
||||
# Copyright (c) 2007, Allen Winter, <winter@kde.org>
|
||||
# Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
# Redistribution and use is allowed according to the terms of the BSD license.
|
||||
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
|
||||
|
||||
# include(FindLibraryWithDebug)
|
||||
|
||||
if (BLITZ_INCLUDES AND BLITZ_LIBRARIES)
|
||||
set(Blitz_FIND_QUIETLY TRUE)
|
||||
endif ()
|
||||
|
||||
find_path(BLITZ_INCLUDES
|
||||
NAMES
|
||||
blitz/array.h
|
||||
PATH_SUFFIXES blitz*
|
||||
PATHS
|
||||
$ENV{BLITZDIR}/include
|
||||
${INCLUDE_INSTALL_DIR}
|
||||
)
|
||||
|
||||
find_library(BLITZ_LIBRARIES
|
||||
blitz
|
||||
PATHS
|
||||
$ENV{BLITZDIR}/lib
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Blitz DEFAULT_MSG
|
||||
BLITZ_INCLUDES BLITZ_LIBRARIES)
|
||||
|
||||
mark_as_advanced(BLITZ_INCLUDES BLITZ_LIBRARIES)
|
||||
@@ -1,35 +0,0 @@
|
||||
# include(FindLibraryWithDebug)
|
||||
|
||||
if (CBLAS_INCLUDES AND CBLAS_LIBRARIES)
|
||||
set(CBLAS_FIND_QUIETLY TRUE)
|
||||
endif ()
|
||||
|
||||
find_path(CBLAS_INCLUDES
|
||||
NAMES
|
||||
cblas.h
|
||||
PATHS
|
||||
$ENV{CBLASDIR}/include
|
||||
${INCLUDE_INSTALL_DIR}
|
||||
)
|
||||
|
||||
find_library(CBLAS_LIBRARIES
|
||||
cblas
|
||||
PATHS
|
||||
$ENV{CBLASDIR}/lib
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
find_file(CBLAS_LIBRARIES
|
||||
libcblas.so.3
|
||||
PATHS
|
||||
/usr/lib
|
||||
/usr/lib64
|
||||
$ENV{CBLASDIR}/lib
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(CBLAS DEFAULT_MSG
|
||||
CBLAS_INCLUDES CBLAS_LIBRARIES)
|
||||
|
||||
mark_as_advanced(CBLAS_INCLUDES CBLAS_LIBRARIES)
|
||||
@@ -1,17 +0,0 @@
|
||||
if (GMM_INCLUDE_DIR)
|
||||
# in cache already
|
||||
set(GMM_FOUND TRUE)
|
||||
else ()
|
||||
|
||||
find_path(GMM_INCLUDE_DIR NAMES gmm/gmm.h
|
||||
PATHS
|
||||
${INCLUDE_INSTALL_DIR}
|
||||
${GMM_INCLUDE_PATH}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(GMM DEFAULT_MSG GMM_INCLUDE_DIR )
|
||||
|
||||
mark_as_advanced(GMM_INCLUDE_DIR)
|
||||
|
||||
endif()
|
||||
@@ -1,65 +0,0 @@
|
||||
|
||||
if (MKL_LIBRARIES)
|
||||
set(MKL_FIND_QUIETLY TRUE)
|
||||
endif ()
|
||||
|
||||
if(CMAKE_MINOR_VERSION GREATER 4)
|
||||
|
||||
if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
|
||||
|
||||
find_library(MKL_LIBRARIES
|
||||
mkl_core
|
||||
PATHS
|
||||
$ENV{MKLLIB}
|
||||
/opt/intel/mkl/*/lib/em64t
|
||||
/opt/intel/Compiler/*/*/mkl/lib/em64t
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
find_library(MKL_GUIDE
|
||||
guide
|
||||
PATHS
|
||||
$ENV{MKLLIB}
|
||||
/opt/intel/mkl/*/lib/em64t
|
||||
/opt/intel/Compiler/*/*/mkl/lib/em64t
|
||||
/opt/intel/Compiler/*/*/lib/intel64
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
if(MKL_LIBRARIES AND MKL_GUIDE)
|
||||
set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64 mkl_sequential ${MKL_GUIDE} pthread)
|
||||
endif()
|
||||
|
||||
else()
|
||||
|
||||
find_library(MKL_LIBRARIES
|
||||
mkl_core
|
||||
PATHS
|
||||
$ENV{MKLLIB}
|
||||
/opt/intel/mkl/*/lib/32
|
||||
/opt/intel/Compiler/*/*/mkl/lib/32
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
find_library(MKL_GUIDE
|
||||
guide
|
||||
PATHS
|
||||
$ENV{MKLLIB}
|
||||
/opt/intel/mkl/*/lib/32
|
||||
/opt/intel/Compiler/*/*/mkl/lib/32
|
||||
/opt/intel/Compiler/*/*/lib/intel32
|
||||
${LIB_INSTALL_DIR}
|
||||
)
|
||||
|
||||
if(MKL_LIBRARIES AND MKL_GUIDE)
|
||||
set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel mkl_sequential ${MKL_GUIDE} pthread)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(MKL DEFAULT_MSG MKL_LIBRARIES)
|
||||
|
||||
mark_as_advanced(MKL_LIBRARIES)
|
||||
@@ -1,31 +0,0 @@
|
||||
# - Try to find eigen2 headers
|
||||
# Once done this will define
|
||||
#
|
||||
# MTL4_FOUND - system has eigen2 lib
|
||||
# MTL4_INCLUDE_DIR - the eigen2 include directory
|
||||
#
|
||||
# Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
# Adapted from FindEigen.cmake:
|
||||
# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
|
||||
# Redistribution and use is allowed according to the terms of the BSD license.
|
||||
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
|
||||
|
||||
if (MTL4_INCLUDE_DIR)
|
||||
|
||||
# in cache already
|
||||
set(MTL4_FOUND TRUE)
|
||||
|
||||
else ()
|
||||
|
||||
find_path(MTL4_INCLUDE_DIR NAMES boost/numeric/mtl/mtl.hpp
|
||||
PATHS
|
||||
${INCLUDE_INSTALL_DIR}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(MTL4 DEFAULT_MSG MTL4_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(MTL4_INCLUDE_DIR)
|
||||
|
||||
endif()
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
|
||||
if (OPENBLAS_LIBRARIES)
|
||||
set(OPENBLAS_FIND_QUIETLY TRUE)
|
||||
endif ()
|
||||
|
||||
find_file(OPENBLAS_LIBRARIES NAMES libopenblas.so libopenblas.so.0 PATHS /usr/lib /usr/lib64 $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR})
|
||||
find_library(OPENBLAS_LIBRARIES openblas PATHS $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR})
|
||||
|
||||
if(OPENBLAS_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(OPENBLAS_LIBRARIES ${OPENBLAS_LIBRARIES} "-lpthread -lgfortran")
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(OPENBLAS DEFAULT_MSG
|
||||
OPENBLAS_LIBRARIES)
|
||||
|
||||
mark_as_advanced(OPENBLAS_LIBRARIES)
|
||||
@@ -1,60 +0,0 @@
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
|
||||
#
|
||||
# This macro is intended to be used in FindXXX.cmake modules files.
|
||||
# It handles the REQUIRED and QUIET argument to find_package() and
|
||||
# it also sets the <UPPERCASED_NAME>_FOUND variable.
|
||||
# The package is found if all variables listed are TRUE.
|
||||
# Example:
|
||||
#
|
||||
# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
|
||||
#
|
||||
# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
|
||||
# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
|
||||
# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
|
||||
# independent whether QUIET was used or not.
|
||||
#
|
||||
# If it is found, the location is reported using the VAR1 argument, so
|
||||
# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
|
||||
# If the second argument is DEFAULT_MSG, the message in the failure case will
|
||||
# be "Could NOT find LibXml2", if you don't like this message you can specify
|
||||
# your own custom failure message there.
|
||||
|
||||
macro(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
|
||||
|
||||
if("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
if (${_NAME}_FIND_REQUIRED)
|
||||
set(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
|
||||
else (${_NAME}_FIND_REQUIRED)
|
||||
set(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
|
||||
endif (${_NAME}_FIND_REQUIRED)
|
||||
else("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
set(_FAIL_MESSAGE "${_FAIL_MSG}")
|
||||
endif("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
|
||||
|
||||
string(TOUPPER ${_NAME} _NAME_UPPER)
|
||||
|
||||
set(${_NAME_UPPER}_FOUND TRUE)
|
||||
if(NOT ${_VAR1})
|
||||
set(${_NAME_UPPER}_FOUND FALSE)
|
||||
endif(NOT ${_VAR1})
|
||||
|
||||
foreach(_CURRENT_VAR ${ARGN})
|
||||
if(NOT ${_CURRENT_VAR})
|
||||
set(${_NAME_UPPER}_FOUND FALSE)
|
||||
endif(NOT ${_CURRENT_VAR})
|
||||
endforeach(_CURRENT_VAR)
|
||||
|
||||
if (${_NAME_UPPER}_FOUND)
|
||||
if (NOT ${_NAME}_FIND_QUIETLY)
|
||||
message(STATUS "Found ${_NAME}: ${${_VAR1}}")
|
||||
endif (NOT ${_NAME}_FIND_QUIETLY)
|
||||
else (${_NAME_UPPER}_FOUND)
|
||||
if (${_NAME}_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "${_FAIL_MESSAGE}")
|
||||
else (${_NAME}_FIND_REQUIRED)
|
||||
if (NOT ${_NAME}_FIND_QUIETLY)
|
||||
message(STATUS "${_FAIL_MESSAGE}")
|
||||
endif (NOT ${_NAME}_FIND_QUIETLY)
|
||||
endif (${_NAME}_FIND_REQUIRED)
|
||||
endif (${_NAME_UPPER}_FOUND)
|
||||
endmacro(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
|
||||
@@ -1,32 +0,0 @@
|
||||
# - Try to find tvmet headers
|
||||
# Once done this will define
|
||||
#
|
||||
# TVMET_FOUND - system has tvmet lib
|
||||
# TVMET_INCLUDE_DIR - the tvmet include directory
|
||||
#
|
||||
# Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
# Adapted from FindEigen.cmake:
|
||||
# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
|
||||
# Redistribution and use is allowed according to the terms of the BSD license.
|
||||
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
|
||||
|
||||
if (TVMET_INCLUDE_DIR)
|
||||
|
||||
# in cache already
|
||||
set(TVMET_FOUND TRUE)
|
||||
|
||||
else ()
|
||||
|
||||
find_path(TVMET_INCLUDE_DIR NAMES tvmet/tvmet.h
|
||||
PATHS
|
||||
${TVMETDIR}/
|
||||
${INCLUDE_INSTALL_DIR}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Tvmet DEFAULT_MSG TVMET_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(TVMET_INCLUDE_DIR)
|
||||
|
||||
endif()
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
# - MACRO_OPTIONAL_ADD_SUBDIRECTORY() combines add_subdirectory() with an option()
|
||||
# MACRO_OPTIONAL_ADD_SUBDIRECTORY( <dir> )
|
||||
# If you use MACRO_OPTIONAL_ADD_SUBDIRECTORY() instead of add_subdirectory(),
|
||||
# this will have two effects
|
||||
# 1 - CMake will not complain if the directory doesn't exist
|
||||
# This makes sense if you want to distribute just one of the subdirs
|
||||
# in a source package, e.g. just one of the subdirs in kdeextragear.
|
||||
# 2 - If the directory exists, it will offer an option to skip the
|
||||
# subdirectory.
|
||||
# This is useful if you want to compile only a subset of all
|
||||
# directories.
|
||||
|
||||
# Copyright (c) 2007, Alexander Neundorf, <neundorf@kde.org>
|
||||
#
|
||||
# Redistribution and use is allowed according to the terms of the BSD license.
|
||||
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
|
||||
|
||||
|
||||
macro (MACRO_OPTIONAL_ADD_SUBDIRECTORY _dir )
|
||||
get_filename_component(_fullPath ${_dir} ABSOLUTE)
|
||||
if(EXISTS ${_fullPath})
|
||||
if(${ARGC} EQUAL 2)
|
||||
option(BUILD_${_dir} "Build directory ${_dir}" ${ARGV1})
|
||||
else(${ARGC} EQUAL 2)
|
||||
option(BUILD_${_dir} "Build directory ${_dir}" TRUE)
|
||||
endif(${ARGC} EQUAL 2)
|
||||
if(BUILD_${_dir})
|
||||
add_subdirectory(${_dir})
|
||||
endif(BUILD_${_dir})
|
||||
endif(EXISTS ${_fullPath})
|
||||
endmacro (MACRO_OPTIONAL_ADD_SUBDIRECTORY)
|
||||
@@ -1,32 +0,0 @@
|
||||
|
||||
add_custom_target(copy_scripts)
|
||||
|
||||
set(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh
|
||||
perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh )
|
||||
|
||||
foreach(script_file ${script_files})
|
||||
add_custom_command(
|
||||
TARGET copy_scripts
|
||||
POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${script_file} ${CMAKE_CURRENT_BINARY_DIR}/
|
||||
ARGS
|
||||
)
|
||||
endforeach(script_file)
|
||||
|
||||
add_custom_command(
|
||||
TARGET copy_scripts
|
||||
POST_BUILD
|
||||
COMMAND ${CMAKE_CXX_COMPILER} --version | head -n 1 > ${CMAKE_CURRENT_BINARY_DIR}/compiler_version.txt
|
||||
ARGS
|
||||
)
|
||||
add_custom_command(
|
||||
TARGET copy_scripts
|
||||
POST_BUILD
|
||||
COMMAND echo "${Eigen_SOURCE_DIR}" > ${CMAKE_CURRENT_BINARY_DIR}/eigen_root_dir.txt
|
||||
ARGS
|
||||
)
|
||||
|
||||
add_executable(smooth smooth.cxx)
|
||||
add_executable(regularize regularize.cxx)
|
||||
add_executable(main mean.cxx)
|
||||
add_dependencies(main copy_scripts)
|
||||
@@ -1,19 +0,0 @@
|
||||
aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:5000
|
||||
ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:5000
|
||||
atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:5000
|
||||
axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
|
||||
axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
|
||||
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:5000
|
||||
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:5000
|
||||
trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:5000
|
||||
trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:5000
|
||||
trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:5000
|
||||
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:5000
|
||||
complete_lu_decomp ; "{/*1.5 Complete LU decomposition}" ; "matrix size" ; 4:5000
|
||||
partial_lu_decomp ; "{/*1.5 Partial LU decomposition}" ; "matrix size" ; 4:5000
|
||||
tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:5000
|
||||
hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:5000
|
||||
symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:5000
|
||||
syr2 ; "{/*1.5 symmretric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:5000
|
||||
ger ; "{/*1.5 general rank-1 update (A += u v^T)}" ; "matrix size" ; 4:5000
|
||||
rot ; "{/*1.5 apply rotation in the plane}" ; "vector size" ; 4:1000000
|
||||
@@ -1,87 +0,0 @@
|
||||
set noclip points
|
||||
set clip one
|
||||
set noclip two
|
||||
set bar 1.000000
|
||||
set border 31 lt -1 lw 1.000
|
||||
set xdata
|
||||
set ydata
|
||||
set zdata
|
||||
set x2data
|
||||
set y2data
|
||||
set boxwidth
|
||||
set dummy x,y
|
||||
set format x "%g"
|
||||
set format y "%g"
|
||||
set format x2 "%g"
|
||||
set format y2 "%g"
|
||||
set format z "%g"
|
||||
set angles radians
|
||||
set nogrid
|
||||
set key title ""
|
||||
set key left top Right noreverse box linetype -2 linewidth 1.000 samplen 4 spacing 1 width 0
|
||||
set nolabel
|
||||
set noarrow
|
||||
# set nolinestyle # deprecated
|
||||
set nologscale
|
||||
set logscale x 10
|
||||
set offsets 0, 0, 0, 0
|
||||
set pointsize 1
|
||||
set encoding default
|
||||
set nopolar
|
||||
set noparametric
|
||||
set view 60, 30, 1, 1
|
||||
set samples 100, 100
|
||||
set isosamples 10, 10
|
||||
set surface
|
||||
set nocontour
|
||||
set clabel '%8.3g'
|
||||
set mapping cartesian
|
||||
set nohidden3d
|
||||
set cntrparam order 4
|
||||
set cntrparam linear
|
||||
set cntrparam levels auto 5
|
||||
set cntrparam points 5
|
||||
set size ratio 0 1,1
|
||||
set origin 0,0
|
||||
# set data style lines
|
||||
# set function style lines
|
||||
set xzeroaxis lt -2 lw 1.000
|
||||
set x2zeroaxis lt -2 lw 1.000
|
||||
set yzeroaxis lt -2 lw 1.000
|
||||
set y2zeroaxis lt -2 lw 1.000
|
||||
set tics in
|
||||
set ticslevel 0.5
|
||||
set tics scale 1, 0.5
|
||||
set mxtics default
|
||||
set mytics default
|
||||
set mx2tics default
|
||||
set my2tics default
|
||||
set xtics border mirror norotate autofreq
|
||||
set ytics border mirror norotate autofreq
|
||||
set ztics border nomirror norotate autofreq
|
||||
set nox2tics
|
||||
set noy2tics
|
||||
set timestamp "" bottom norotate offset 0,0
|
||||
set rrange [ * : * ] noreverse nowriteback # (currently [-0:10] )
|
||||
set trange [ * : * ] noreverse nowriteback # (currently [-5:5] )
|
||||
set urange [ * : * ] noreverse nowriteback # (currently [-5:5] )
|
||||
set vrange [ * : * ] noreverse nowriteback # (currently [-5:5] )
|
||||
set xlabel "matrix size" offset 0,0
|
||||
set x2label "" offset 0,0
|
||||
set timefmt "%d/%m/%y\n%H:%M"
|
||||
set xrange [ 10 : 1000 ] noreverse nowriteback
|
||||
set x2range [ * : * ] noreverse nowriteback # (currently [-10:10] )
|
||||
set ylabel "MFLOPS" offset 0,0
|
||||
set y2label "" offset 0,0
|
||||
set yrange [ * : * ] noreverse nowriteback # (currently [-10:10] )
|
||||
set y2range [ * : * ] noreverse nowriteback # (currently [-10:10] )
|
||||
set zlabel "" offset 0,0
|
||||
set zrange [ * : * ] noreverse nowriteback # (currently [-10:10] )
|
||||
set zero 1e-08
|
||||
set lmargin -1
|
||||
set bmargin -1
|
||||
set rmargin -1
|
||||
set tmargin -1
|
||||
set locale "C"
|
||||
set xrange [4:1024]
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ $# < 1 ]; then
|
||||
echo "Usage: $0 working_directory [tiny|large [prefix]]"
|
||||
else
|
||||
|
||||
mkdir -p $1
|
||||
##cp ../libs/*/*.dat $1
|
||||
|
||||
mode=large
|
||||
if [ $# > 2 ]; then
|
||||
mode=$2
|
||||
fi
|
||||
if [ $# > 3 ]; then
|
||||
prefix=$3
|
||||
fi
|
||||
|
||||
EIGENDIR=`cat eigen_root_dir.txt`
|
||||
|
||||
webpagefilename=$1/index.html
|
||||
meanstatsfilename=$1/mean.html
|
||||
|
||||
echo '' > $meanstatsfilename
|
||||
echo '' > $webpagefilename
|
||||
echo '<p><strong>Configuration</strong>' >> $webpagefilename
|
||||
echo '<ul>'\
|
||||
'<li>' `cat /proc/cpuinfo | grep "model name" | head -n 1`\
|
||||
' (' `uname -m` ')</li>'\
|
||||
'<li> compiler: ' `cat compiler_version.txt` '</li>'\
|
||||
'<li> eigen3: ' `git ls-remote --refs -q $EIGENDIR HEAD | cut -f 1` '</li>'\
|
||||
'</ul>' \
|
||||
'</p>' >> $webpagefilename
|
||||
|
||||
source mk_mean_script.sh axpy $1 11 2500 100000 250000 $mode $prefix
|
||||
source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix
|
||||
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
|
||||
# source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh partial_lu_decomp $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh ger $1 11 50 300 1000 $mode $prefix
|
||||
source mk_mean_script.sh rot $1 11 2500 100000 250000 $mode $prefix
|
||||
source mk_mean_script.sh complete_lu_decomp $1 11 100 300 1000 $mode $prefix
|
||||
|
||||
fi
|
||||
|
||||
## compile the web page ##
|
||||
|
||||
#echo `cat footer.html` >> $webpagefilename
|
||||
@@ -1,165 +0,0 @@
|
||||
//=====================================================
|
||||
// File : mean.cxx
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#include "utilities.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "bench_parameter.hh"
|
||||
#include "utils/xy_file.hh"
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
double mean_calc(const vector<int> &tab_sizes, const vector<double> &tab_mflops, const int size_min,
|
||||
const int size_max);
|
||||
|
||||
class Lib_Mean {
|
||||
public:
|
||||
Lib_Mean(void) : _lib_name(), _mean_in_cache(), _mean_out_of_cache() {
|
||||
MESSAGE("Lib_mean Default Ctor");
|
||||
MESSAGE("!!! should not be used");
|
||||
exit(0);
|
||||
}
|
||||
Lib_Mean(const string &name, const double &mic, const double &moc)
|
||||
: _lib_name(name), _mean_in_cache(mic), _mean_out_of_cache(moc) {
|
||||
MESSAGE("Lib_mean Ctor");
|
||||
}
|
||||
Lib_Mean(const Lib_Mean &lm)
|
||||
: _lib_name(lm._lib_name), _mean_in_cache(lm._mean_in_cache), _mean_out_of_cache(lm._mean_out_of_cache) {
|
||||
MESSAGE("Lib_mean Copy Ctor");
|
||||
}
|
||||
~Lib_Mean(void) { MESSAGE("Lib_mean Dtor"); }
|
||||
|
||||
double _mean_in_cache;
|
||||
double _mean_out_of_cache;
|
||||
string _lib_name;
|
||||
|
||||
bool operator<(const Lib_Mean &right) const {
|
||||
// return ( this->_mean_out_of_cache > right._mean_out_of_cache) ;
|
||||
return (this->_mean_in_cache > right._mean_in_cache);
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 6) {
|
||||
INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2...");
|
||||
exit(0);
|
||||
}
|
||||
INFOS(argc);
|
||||
|
||||
int min_in_cache = atoi(argv[2]);
|
||||
int max_in_cache = atoi(argv[3]);
|
||||
int min_out_of_cache = atoi(argv[4]);
|
||||
int max_out_of_cache = atoi(argv[5]);
|
||||
|
||||
multiset<Lib_Mean> s_lib_mean;
|
||||
|
||||
for (int i = 6; i < argc; i++) {
|
||||
string filename = argv[i];
|
||||
|
||||
INFOS(filename);
|
||||
|
||||
double mic = 0;
|
||||
double moc = 0;
|
||||
|
||||
{
|
||||
vector<int> tab_sizes;
|
||||
vector<double> tab_mflops;
|
||||
|
||||
read_xy_file(filename, tab_sizes, tab_mflops);
|
||||
|
||||
mic = mean_calc(tab_sizes, tab_mflops, min_in_cache, max_in_cache);
|
||||
moc = mean_calc(tab_sizes, tab_mflops, min_out_of_cache, max_out_of_cache);
|
||||
|
||||
Lib_Mean cur_lib_mean(filename, mic, moc);
|
||||
|
||||
s_lib_mean.insert(cur_lib_mean);
|
||||
}
|
||||
}
|
||||
|
||||
cout << "<TABLE BORDER CELLPADDING=2>" << endl;
|
||||
cout << " <TR>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> " << argv[1] << " </TH>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> <a href="
|
||||
"#mean_marker"
|
||||
"> in cache <BR> mean perf <BR> Mflops </a></TH>"
|
||||
<< endl;
|
||||
cout << " <TH ALIGN=CENTER> in cache <BR> % best </TH>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> <a href="
|
||||
"#mean_marker"
|
||||
"> out of cache <BR> mean perf <BR> Mflops </a></TH>"
|
||||
<< endl;
|
||||
cout << " <TH ALIGN=CENTER> out of cache <BR> % best </TH>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> details </TH>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> comments </TH>" << endl;
|
||||
cout << " </TR>" << endl;
|
||||
|
||||
multiset<Lib_Mean>::iterator is = s_lib_mean.begin();
|
||||
Lib_Mean best(*is);
|
||||
|
||||
for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) {
|
||||
cout << " <TR>" << endl;
|
||||
cout << " <TD> " << is->_lib_name << " </TD>" << endl;
|
||||
cout << " <TD> " << is->_mean_in_cache << " </TD>" << endl;
|
||||
cout << " <TD> " << 100 * (is->_mean_in_cache / best._mean_in_cache) << " </TD>" << endl;
|
||||
cout << " <TD> " << is->_mean_out_of_cache << " </TD>" << endl;
|
||||
cout << " <TD> " << 100 * (is->_mean_out_of_cache / best._mean_out_of_cache) << " </TD>" << endl;
|
||||
cout << " <TD> "
|
||||
<< "<a href=\"#" << is->_lib_name << "_" << argv[1]
|
||||
<< "\">snippet</a>/"
|
||||
"<a href=\"#"
|
||||
<< is->_lib_name << "_flags\">flags</a> </TD>" << endl;
|
||||
cout << " <TD> "
|
||||
<< "<a href=\"#" << is->_lib_name << "_comments\">click here</a> </TD>" << endl;
|
||||
cout << " </TR>" << endl;
|
||||
}
|
||||
|
||||
cout << "</TABLE>" << endl;
|
||||
|
||||
ofstream output_file("../order_lib", ios::out);
|
||||
|
||||
for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) {
|
||||
output_file << is->_lib_name << endl;
|
||||
}
|
||||
|
||||
output_file.close();
|
||||
}
|
||||
|
||||
double mean_calc(const vector<int> &tab_sizes, const vector<double> &tab_mflops, const int size_min,
|
||||
const int size_max) {
|
||||
int size = tab_sizes.size();
|
||||
int nb_sample = 0;
|
||||
double mean = 0.0;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
if ((tab_sizes[i] >= size_min) && (tab_sizes[i] <= size_max)) {
|
||||
nb_sample++;
|
||||
mean += tab_mflops[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (nb_sample == 0) {
|
||||
INFOS("no data for mean calculation");
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return mean / nb_sample;
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
#! /bin/bash
|
||||
WHAT=$1
|
||||
DIR=$2
|
||||
echo $WHAT script generation
|
||||
cat $WHAT.hh > $WHAT.gnuplot
|
||||
|
||||
DATA_FILE=`find $DIR -name "*.dat" | grep $WHAT`
|
||||
|
||||
echo plot \\ >> $WHAT.gnuplot
|
||||
|
||||
for FILE in $DATA_FILE
|
||||
do
|
||||
LAST=$FILE
|
||||
done
|
||||
|
||||
echo LAST=$LAST
|
||||
|
||||
for FILE in $DATA_FILE
|
||||
do
|
||||
if [ $FILE != $LAST ]
|
||||
then
|
||||
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
|
||||
echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot
|
||||
fi
|
||||
done
|
||||
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
|
||||
echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot
|
||||
|
||||
#echo set term postscript color >> $WHAT.gnuplot
|
||||
#echo set output "'"$WHAT.ps"'" >> $WHAT.gnuplot
|
||||
echo set term pbm small color >> $WHAT.gnuplot
|
||||
echo set output "'"$WHAT.ppm"'" >> $WHAT.gnuplot
|
||||
echo plot \\ >> $WHAT.gnuplot
|
||||
|
||||
for FILE in $DATA_FILE
|
||||
do
|
||||
if [ $FILE != $LAST ]
|
||||
then
|
||||
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
|
||||
echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot
|
||||
fi
|
||||
done
|
||||
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
|
||||
echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot
|
||||
|
||||
echo set term jpeg large >> $WHAT.gnuplot
|
||||
echo set output "'"$WHAT.jpg"'" >> $WHAT.gnuplot
|
||||
echo plot \\ >> $WHAT.gnuplot
|
||||
|
||||
for FILE in $DATA_FILE
|
||||
do
|
||||
if [ $FILE != $LAST ]
|
||||
then
|
||||
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
|
||||
echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot
|
||||
fi
|
||||
done
|
||||
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
|
||||
echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot
|
||||
|
||||
|
||||
gnuplot -persist < $WHAT.gnuplot
|
||||
|
||||
rm $WHAT.gnuplot
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
#! /bin/bash
|
||||
WHAT=$1
|
||||
DIR=$2
|
||||
MINIC=$3
|
||||
MAXIC=$4
|
||||
MINOC=$5
|
||||
MAXOC=$6
|
||||
prefix=$8
|
||||
|
||||
meanstatsfilename=$2/mean.html
|
||||
|
||||
WORK_DIR=tmp
|
||||
mkdir $WORK_DIR
|
||||
|
||||
DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}`
|
||||
|
||||
if [ -n "$DATA_FILE" ]; then
|
||||
|
||||
echo ""
|
||||
echo "$1..."
|
||||
for FILE in $DATA_FILE
|
||||
do
|
||||
##echo hello world
|
||||
##echo "mk_mean_script1" ${FILE}
|
||||
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
|
||||
|
||||
##echo "mk_mean_script1" ${TITLE}
|
||||
cp $FILE ${WORK_DIR}/${TITLE}
|
||||
|
||||
done
|
||||
|
||||
cd $WORK_DIR
|
||||
../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename
|
||||
../mk_new_gnuplot.sh $1 $2 $7
|
||||
rm -f *.gnuplot
|
||||
cd ..
|
||||
|
||||
echo '<br/>' >> $meanstatsfilename
|
||||
|
||||
webpagefilename=$2/index.html
|
||||
# echo '<h3>'${WHAT}'</h3>' >> $webpagefilename
|
||||
echo '<hr/><a href="'$prefix$1'.pdf"><img src="'$prefix$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename
|
||||
|
||||
fi
|
||||
|
||||
rm -R $WORK_DIR
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
#!/bin/bash
|
||||
WHAT=$1
|
||||
DIR=$2
|
||||
|
||||
cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot
|
||||
|
||||
echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot
|
||||
echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` " offset 0,0" >> $WHAT.gnuplot
|
||||
echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot
|
||||
|
||||
if [ $# > 3 ]; then
|
||||
if [ "$3" == "tiny" ]; then
|
||||
echo "set xrange [2:16]" >> $WHAT.gnuplot
|
||||
echo "set nologscale" >> $WHAT.gnuplot
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
||||
DATA_FILE=`cat ../order_lib`
|
||||
echo set term postscript color rounded enhanced >> $WHAT.gnuplot
|
||||
echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot
|
||||
|
||||
# echo set term svg color rounded enhanced >> $WHAT.gnuplot
|
||||
# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot
|
||||
# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot
|
||||
|
||||
echo plot \\ >> $WHAT.gnuplot
|
||||
|
||||
for FILE in $DATA_FILE
|
||||
do
|
||||
LAST=$FILE
|
||||
done
|
||||
|
||||
for FILE in $DATA_FILE
|
||||
do
|
||||
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
|
||||
|
||||
echo "'"$FILE"'" `grep $TITLE ../perlib_plot_settings.txt | head -n 1 | cut -d ";" -f 2` "\\" >> $WHAT.gnuplot
|
||||
if [ $FILE != $LAST ]
|
||||
then
|
||||
echo ", \\" >> $WHAT.gnuplot
|
||||
fi
|
||||
done
|
||||
echo " " >> $WHAT.gnuplot
|
||||
|
||||
gnuplot -persist < $WHAT.gnuplot
|
||||
|
||||
rm $WHAT.gnuplot
|
||||
|
||||
ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf
|
||||
convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 ../${DIR}/$WHAT.ps -background white -flatten ../${DIR}/$WHAT.png
|
||||
|
||||
# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100 -f plot-svg aat.ps aat2.svg
|
||||
@@ -1,16 +0,0 @@
|
||||
eigen3 ; with lines lw 4 lt 1 lc rgbcolor "black"
|
||||
eigen2 ; with lines lw 3 lt 1 lc rgbcolor "#999999"
|
||||
EigenBLAS ; with lines lw 3 lt 3 lc rgbcolor "#999999"
|
||||
eigen3_novec ; with lines lw 2 lt 1 lc rgbcolor "#999999"
|
||||
eigen3_nogccvec ; with lines lw 2 lt 2 lc rgbcolor "#991010"
|
||||
INTEL_MKL ; with lines lw 3 lt 1 lc rgbcolor "#ff0000"
|
||||
ATLAS ; with lines lw 3 lt 1 lc rgbcolor "#008000"
|
||||
gmm ; with lines lw 3 lt 1 lc rgbcolor "#0000ff"
|
||||
ublas ; with lines lw 3 lt 1 lc rgbcolor "#00b7ff"
|
||||
mtl4 ; with lines lw 3 lt 1 lc rgbcolor "#d18847"
|
||||
blitz ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff"
|
||||
F77 ; with lines lw 3 lt 3 lc rgbcolor "#e6e64c"
|
||||
OPENBLAS ; with lines lw 3 lt 1 lc rgbcolor "#C05600"
|
||||
C ; with lines lw 3 lt 3 lc rgbcolor "#e6bd96"
|
||||
ACML ; with lines lw 2 lt 3 lc rgbcolor "#e6e64c"
|
||||
blaze ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff"
|
||||
@@ -1,113 +0,0 @@
|
||||
//=====================================================
|
||||
// File : regularize.cxx
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#include "utilities.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "bench_parameter.hh"
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
|
||||
void regularize_curve(const string &filename, const vector<double> &tab_mflops, const vector<int> &tab_sizes,
|
||||
int start_cut_size, int stop_cut_size);
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// input data
|
||||
|
||||
if (argc < 4) {
|
||||
INFOS("!!! Error ... usage : main filename start_cut_size stop_cut_size regularize_filename");
|
||||
exit(0);
|
||||
}
|
||||
INFOS(argc);
|
||||
|
||||
int start_cut_size = atoi(argv[2]);
|
||||
int stop_cut_size = atoi(argv[3]);
|
||||
|
||||
string filename = argv[1];
|
||||
string regularize_filename = argv[4];
|
||||
|
||||
INFOS(filename);
|
||||
INFOS("start_cut_size=" << start_cut_size);
|
||||
|
||||
vector<int> tab_sizes;
|
||||
vector<double> tab_mflops;
|
||||
|
||||
read_xy_file(filename, tab_sizes, tab_mflops);
|
||||
|
||||
// regularizeing
|
||||
|
||||
regularize_curve(regularize_filename, tab_mflops, tab_sizes, start_cut_size, stop_cut_size);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void regularize_curve(const string &filename, const vector<double> &tab_mflops, const vector<int> &tab_sizes,
|
||||
int start_cut_size, int stop_cut_size) {
|
||||
int size = tab_mflops.size();
|
||||
ofstream output_file(filename.c_str(), ios::out);
|
||||
|
||||
int i = 0;
|
||||
|
||||
while (tab_sizes[i] < start_cut_size) {
|
||||
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
|
||||
i++;
|
||||
}
|
||||
|
||||
output_file << endl;
|
||||
|
||||
while (tab_sizes[i] < stop_cut_size) {
|
||||
i++;
|
||||
}
|
||||
|
||||
while (i < size) {
|
||||
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
|
||||
i++;
|
||||
}
|
||||
|
||||
output_file.close();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
|
||||
ifstream input_file(filename.c_str(), ios::in);
|
||||
|
||||
if (!input_file) {
|
||||
INFOS("!!! Error opening " << filename);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int nb_point = 0;
|
||||
int size = 0;
|
||||
double mflops = 0;
|
||||
|
||||
while (input_file >> size >> mflops) {
|
||||
nb_point++;
|
||||
tab_sizes.push_back(size);
|
||||
tab_mflops.push_back(mflops);
|
||||
}
|
||||
SCRUTE(nb_point);
|
||||
|
||||
input_file.close();
|
||||
}
|
||||
@@ -1,165 +0,0 @@
|
||||
//=====================================================
|
||||
// File : smooth.cxx
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#include "utilities.h"
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "bench_parameter.hh"
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
|
||||
void write_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
|
||||
void smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width);
|
||||
void centered_smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// input data
|
||||
|
||||
if (argc < 3) {
|
||||
INFOS("!!! Error ... usage : main filename window_half_width smooth_filename");
|
||||
exit(0);
|
||||
}
|
||||
INFOS(argc);
|
||||
|
||||
int window_half_width = atoi(argv[2]);
|
||||
|
||||
string filename = argv[1];
|
||||
string smooth_filename = argv[3];
|
||||
|
||||
INFOS(filename);
|
||||
INFOS("window_half_width=" << window_half_width);
|
||||
|
||||
vector<int> tab_sizes;
|
||||
vector<double> tab_mflops;
|
||||
|
||||
read_xy_file(filename, tab_sizes, tab_mflops);
|
||||
|
||||
// smoothing
|
||||
|
||||
vector<double> smooth_tab_mflops;
|
||||
|
||||
// smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
|
||||
centered_smooth_curve(tab_mflops, smooth_tab_mflops, window_half_width);
|
||||
|
||||
// output result
|
||||
|
||||
write_xy_file(smooth_filename, tab_sizes, smooth_tab_mflops);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class VECTOR>
|
||||
double weighted_mean(const VECTOR &data) {
|
||||
double mean = 0.0;
|
||||
|
||||
for (int i = 0; i < data.size(); i++) {
|
||||
mean += data[i];
|
||||
}
|
||||
|
||||
return mean / double(data.size());
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width) {
|
||||
int window_width = 2 * window_half_width + 1;
|
||||
|
||||
int size = tab_mflops.size();
|
||||
|
||||
vector<double> sample(window_width);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (int j = 0; j < window_width; j++) {
|
||||
int shifted_index = i + j - window_half_width;
|
||||
if (shifted_index < 0) shifted_index = 0;
|
||||
if (shifted_index > size - 1) shifted_index = size - 1;
|
||||
sample[j] = tab_mflops[shifted_index];
|
||||
}
|
||||
|
||||
smooth_tab_mflops.push_back(weighted_mean(sample));
|
||||
}
|
||||
}
|
||||
|
||||
void centered_smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width) {
|
||||
int max_window_width = 2 * window_half_width + 1;
|
||||
|
||||
int size = tab_mflops.size();
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
deque<double> sample;
|
||||
|
||||
sample.push_back(tab_mflops[i]);
|
||||
|
||||
for (int j = 1; j <= window_half_width; j++) {
|
||||
int before = i - j;
|
||||
int after = i + j;
|
||||
|
||||
if ((before >= 0) && (after < size)) // inside of the vector
|
||||
{
|
||||
sample.push_front(tab_mflops[before]);
|
||||
sample.push_back(tab_mflops[after]);
|
||||
}
|
||||
}
|
||||
|
||||
smooth_tab_mflops.push_back(weighted_mean(sample));
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void write_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
|
||||
ofstream output_file(filename.c_str(), ios::out);
|
||||
|
||||
for (int i = 0; i < tab_sizes.size(); i++) {
|
||||
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
|
||||
}
|
||||
|
||||
output_file.close();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
|
||||
ifstream input_file(filename.c_str(), ios::in);
|
||||
|
||||
if (!input_file) {
|
||||
INFOS("!!! Error opening " << filename);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int nb_point = 0;
|
||||
int size = 0;
|
||||
double mflops = 0;
|
||||
|
||||
while (input_file >> size >> mflops) {
|
||||
nb_point++;
|
||||
tab_sizes.push_back(size);
|
||||
tab_mflops.push_back(mflops);
|
||||
}
|
||||
SCRUTE(nb_point);
|
||||
|
||||
input_file.close();
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
#! /bin/bash
|
||||
ORIG_DIR=$1
|
||||
SMOOTH_DIR=${ORIG_DIR}_smooth
|
||||
mkdir ${SMOOTH_DIR}
|
||||
|
||||
AXPY_FILE=`find ${ORIG_DIR} -name "*.dat" | grep axpy`
|
||||
for FILE in ${AXPY_FILE}
|
||||
do
|
||||
echo $FILE
|
||||
BASE=${FILE##*/}
|
||||
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp
|
||||
./regularize ${SMOOTH_DIR}/${BASE}_tmp 2500 15000 ${SMOOTH_DIR}/${BASE}
|
||||
rm -f ${SMOOTH_DIR}/${BASE}_tmp
|
||||
done
|
||||
|
||||
|
||||
MATRIX_VECTOR_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_vector`
|
||||
for FILE in ${MATRIX_VECTOR_FILE}
|
||||
do
|
||||
echo $FILE
|
||||
BASE=${FILE##*/}
|
||||
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp
|
||||
./regularize ${SMOOTH_DIR}/${BASE}_tmp 50 180 ${SMOOTH_DIR}/${BASE}
|
||||
rm -f ${SMOOTH_DIR}/${BASE}_tmp
|
||||
done
|
||||
|
||||
MATRIX_MATRIX_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_matrix`
|
||||
for FILE in ${MATRIX_MATRIX_FILE}
|
||||
do
|
||||
echo $FILE
|
||||
BASE=${FILE##*/}
|
||||
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
|
||||
done
|
||||
|
||||
AAT_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _aat`
|
||||
for FILE in ${AAT_FILE}
|
||||
do
|
||||
echo $FILE
|
||||
BASE=${FILE##*/}
|
||||
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
|
||||
done
|
||||
|
||||
|
||||
ATA_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _ata`
|
||||
for FILE in ${ATA_FILE}
|
||||
do
|
||||
echo $FILE
|
||||
BASE=${FILE##*/}
|
||||
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
|
||||
done
|
||||
|
||||
### no smoothing for tinyvector and matrices libs
|
||||
|
||||
TINY_BLITZ_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tiny_blitz`
|
||||
for FILE in ${TINY_BLITZ_FILE}
|
||||
do
|
||||
echo $FILE
|
||||
BASE=${FILE##*/}
|
||||
cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE}
|
||||
done
|
||||
|
||||
TVMET_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tvmet`
|
||||
for FILE in ${TVMET_FILE}
|
||||
do
|
||||
echo $FILE
|
||||
BASE=${FILE##*/}
|
||||
cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE}
|
||||
done
|
||||
@@ -1,149 +0,0 @@
|
||||
//=====================================================
|
||||
// File : bench.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef BENCH_HH
|
||||
#define BENCH_HH
|
||||
|
||||
#include "btl.hh"
|
||||
#include "bench_parameter.hh"
|
||||
#include <iostream>
|
||||
#include "utilities.h"
|
||||
#include "size_lin_log.hh"
|
||||
#include "xy_file.hh"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "timers/portable_perf_analyzer.hh"
|
||||
// #include "timers/mixed_perf_analyzer.hh"
|
||||
// #include "timers/x86_perf_analyzer.hh"
|
||||
// #include "timers/STL_perf_analyzer.hh"
|
||||
#ifdef HAVE_MKL
|
||||
extern "C" void cblas_saxpy(const int, const float, const float *, const int, float *, const int);
|
||||
#endif
|
||||
using namespace std;
|
||||
|
||||
template <template <class> class Perf_Analyzer, class Action>
|
||||
BTL_DONT_INLINE void bench(int size_min, int size_max, int nb_point) {
|
||||
if (BtlConfig::skipAction(Action::name())) return;
|
||||
|
||||
string filename = "bench_" + Action::name() + ".dat";
|
||||
|
||||
INFOS("starting " << filename);
|
||||
|
||||
// utilities
|
||||
|
||||
std::vector<double> tab_mflops(nb_point);
|
||||
std::vector<int> tab_sizes(nb_point);
|
||||
|
||||
// matrices and vector size calculations
|
||||
size_lin_log(nb_point, size_min, size_max, tab_sizes);
|
||||
|
||||
std::vector<int> oldSizes;
|
||||
std::vector<double> oldFlops;
|
||||
bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true);
|
||||
int oldi = oldSizes.size() - 1;
|
||||
|
||||
// loop on matrix size
|
||||
Perf_Analyzer<Action> perf_action;
|
||||
for (int i = nb_point - 1; i >= 0; i--) {
|
||||
// INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
|
||||
std::cout << " "
|
||||
<< "size = " << tab_sizes[i] << " " << std::flush;
|
||||
|
||||
BTL_DISABLE_SSE_EXCEPTIONS();
|
||||
#ifdef HAVE_MKL
|
||||
{
|
||||
float dummy;
|
||||
cblas_saxpy(1, 0, &dummy, 1, &dummy, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
|
||||
std::cout << tab_mflops[i];
|
||||
|
||||
if (hasOldResults) {
|
||||
while (oldi >= 0 && oldSizes[oldi] > tab_sizes[i]) --oldi;
|
||||
if (oldi >= 0 && oldSizes[oldi] == tab_sizes[i]) {
|
||||
if (oldFlops[oldi] < tab_mflops[i])
|
||||
std::cout << "\t > ";
|
||||
else
|
||||
std::cout << "\t < ";
|
||||
std::cout << oldFlops[oldi];
|
||||
}
|
||||
--oldi;
|
||||
}
|
||||
std::cout << " MFlops (" << nb_point - i << "/" << nb_point << ")" << std::endl;
|
||||
}
|
||||
|
||||
if (!BtlConfig::Instance.overwriteResults) {
|
||||
if (hasOldResults) {
|
||||
// merge the two data
|
||||
std::vector<int> newSizes;
|
||||
std::vector<double> newFlops;
|
||||
unsigned int i = 0;
|
||||
unsigned int j = 0;
|
||||
while (i < tab_sizes.size() && j < oldSizes.size()) {
|
||||
if (tab_sizes[i] == oldSizes[j]) {
|
||||
newSizes.push_back(tab_sizes[i]);
|
||||
newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
|
||||
++i;
|
||||
++j;
|
||||
} else if (tab_sizes[i] < oldSizes[j]) {
|
||||
newSizes.push_back(tab_sizes[i]);
|
||||
newFlops.push_back(tab_mflops[i]);
|
||||
++i;
|
||||
} else {
|
||||
newSizes.push_back(oldSizes[j]);
|
||||
newFlops.push_back(oldFlops[j]);
|
||||
++j;
|
||||
}
|
||||
}
|
||||
while (i < tab_sizes.size()) {
|
||||
newSizes.push_back(tab_sizes[i]);
|
||||
newFlops.push_back(tab_mflops[i]);
|
||||
++i;
|
||||
}
|
||||
while (j < oldSizes.size()) {
|
||||
newSizes.push_back(oldSizes[j]);
|
||||
newFlops.push_back(oldFlops[j]);
|
||||
++j;
|
||||
}
|
||||
tab_mflops = newFlops;
|
||||
tab_sizes = newSizes;
|
||||
}
|
||||
}
|
||||
|
||||
// dump the result in a file :
|
||||
dump_xy_file(tab_sizes, tab_mflops, filename);
|
||||
}
|
||||
|
||||
// default Perf Analyzer
|
||||
|
||||
template <class Action>
|
||||
BTL_DONT_INLINE void bench(int size_min, int size_max, int nb_point) {
|
||||
// if the rdtsc is not available :
|
||||
bench<Portable_Perf_Analyzer, Action>(size_min, size_max, nb_point);
|
||||
// if the rdtsc is available :
|
||||
// bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
|
||||
|
||||
// Only for small problem size. Otherwise it will be too long
|
||||
// bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
|
||||
// bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,53 +0,0 @@
|
||||
//=====================================================
|
||||
// File : bench_parameter.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef BENCH_PARAMETER_HH
|
||||
#define BENCH_PARAMETER_HH
|
||||
|
||||
// minimal time for each measurement
|
||||
#define REAL_TYPE float
|
||||
// minimal time for each measurement
|
||||
#define MIN_TIME 0.2
|
||||
// nb of point on bench curves
|
||||
#define NB_POINT 100
|
||||
// min vector size for axpy bench
|
||||
#define MIN_AXPY 5
|
||||
// max vector size for axpy bench
|
||||
#define MAX_AXPY 3000000
|
||||
// min matrix size for matrix vector product bench
|
||||
#define MIN_MV 5
|
||||
// max matrix size for matrix vector product bench
|
||||
#define MAX_MV 5000
|
||||
// min matrix size for matrix matrix product bench
|
||||
#define MIN_MM 5
|
||||
// max matrix size for matrix matrix product bench
|
||||
#define MAX_MM MAX_MV
|
||||
// min matrix size for LU bench
|
||||
#define MIN_LU 5
|
||||
// max matrix size for LU bench
|
||||
#define MAX_LU 3000
|
||||
// max size for tiny vector and matrix
|
||||
#define TINY_MV_MAX_SIZE 16
|
||||
// default nb_sample for x86 timer
|
||||
#define DEFAULT_NB_SAMPLE 1000
|
||||
|
||||
// how many times we run a single bench (keep the best perf)
|
||||
#define DEFAULT_NB_TRIES 3
|
||||
|
||||
#endif
|
||||
@@ -1,205 +0,0 @@
|
||||
//=====================================================
|
||||
// File : btl.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef BTL_HH
|
||||
#define BTL_HH
|
||||
|
||||
#include "bench_parameter.hh"
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "utilities.h"
|
||||
|
||||
#if (defined __GNUC__)
|
||||
#define BTL_ALWAYS_INLINE __attribute__((always_inline)) inline
|
||||
#else
|
||||
#define BTL_ALWAYS_INLINE inline
|
||||
#endif
|
||||
|
||||
#if (defined __GNUC__)
|
||||
#define BTL_DONT_INLINE __attribute__((noinline))
|
||||
#else
|
||||
#define BTL_DONT_INLINE
|
||||
#endif
|
||||
|
||||
#if (defined __GNUC__)
|
||||
#define BTL_ASM_COMMENT(X) asm("#" X)
|
||||
#else
|
||||
#define BTL_ASM_COMMENT(X)
|
||||
#endif
|
||||
|
||||
#ifdef __SSE__
|
||||
#include "xmmintrin.h"
|
||||
// This enables flush to zero (FTZ) and denormals are zero (DAZ) modes:
|
||||
#define BTL_DISABLE_SSE_EXCEPTIONS() \
|
||||
{ _mm_setcsr(_mm_getcsr() | 0x8040); }
|
||||
#else
|
||||
#define BTL_DISABLE_SSE_EXCEPTIONS()
|
||||
#endif
|
||||
|
||||
/** Enhanced std::string
|
||||
*/
|
||||
class BtlString : public std::string {
|
||||
public:
|
||||
BtlString() : std::string() {}
|
||||
BtlString(const BtlString& str) : std::string(static_cast<const std::string&>(str)) {}
|
||||
BtlString(const std::string& str) : std::string(str) {}
|
||||
BtlString(const char* str) : std::string(str) {}
|
||||
|
||||
operator const char*() const { return c_str(); }
|
||||
|
||||
void trim(bool left = true, bool right = true) {
|
||||
int lspaces, rspaces, len = length(), i;
|
||||
lspaces = rspaces = 0;
|
||||
|
||||
if (left)
|
||||
for (i = 0; i < len && (at(i) == ' ' || at(i) == '\t' || at(i) == '\r' || at(i) == '\n'); ++lspaces, ++i)
|
||||
;
|
||||
|
||||
if (right && lspaces < len)
|
||||
for (i = len - 1; i >= 0 && (at(i) == ' ' || at(i) == '\t' || at(i) == '\r' || at(i) == '\n'); rspaces++, i--)
|
||||
;
|
||||
|
||||
*this = substr(lspaces, len - lspaces - rspaces);
|
||||
}
|
||||
|
||||
std::vector<BtlString> split(const BtlString& delims = "\t\n ") const {
|
||||
std::vector<BtlString> ret;
|
||||
unsigned int numSplits = 0;
|
||||
size_t start, pos;
|
||||
start = 0;
|
||||
do {
|
||||
pos = find_first_of(delims, start);
|
||||
if (pos == start) {
|
||||
ret.push_back("");
|
||||
start = pos + 1;
|
||||
} else if (pos == npos)
|
||||
ret.push_back(substr(start));
|
||||
else {
|
||||
ret.push_back(substr(start, pos - start));
|
||||
start = pos + 1;
|
||||
}
|
||||
// start = find_first_not_of(delims, start);
|
||||
++numSplits;
|
||||
} while (pos != npos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool endsWith(const BtlString& str) const {
|
||||
if (str.size() > this->size()) return false;
|
||||
return this->substr(this->size() - str.size(), str.size()) == str;
|
||||
}
|
||||
bool contains(const BtlString& str) const { return this->find(str) < this->size(); }
|
||||
bool beginsWith(const BtlString& str) const {
|
||||
if (str.size() > this->size()) return false;
|
||||
return this->substr(0, str.size()) == str;
|
||||
}
|
||||
|
||||
BtlString toLowerCase(void) {
|
||||
std::transform(begin(), end(), begin(), static_cast<int (*)(int)>(::tolower));
|
||||
return *this;
|
||||
}
|
||||
BtlString toUpperCase(void) {
|
||||
std::transform(begin(), end(), begin(), static_cast<int (*)(int)>(::toupper));
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Case insensitive comparison.
|
||||
*/
|
||||
bool isEquiv(const BtlString& str) const {
|
||||
BtlString str0 = *this;
|
||||
str0.toLowerCase();
|
||||
BtlString str1 = str;
|
||||
str1.toLowerCase();
|
||||
return str0 == str1;
|
||||
}
|
||||
|
||||
/** Decompose the current string as a path and a file.
|
||||
For instance: "dir1/dir2/file.ext" leads to path="dir1/dir2/" and filename="file.ext"
|
||||
*/
|
||||
void decomposePathAndFile(BtlString& path, BtlString& filename) const {
|
||||
std::vector<BtlString> elements = this->split("/\\");
|
||||
path = "";
|
||||
filename = elements.back();
|
||||
elements.pop_back();
|
||||
if (this->at(0) == '/') path = "/";
|
||||
for (unsigned int i = 0; i < elements.size(); ++i) path += elements[i] + "/";
|
||||
}
|
||||
};
|
||||
|
||||
class BtlConfig {
|
||||
public:
|
||||
BtlConfig() : overwriteResults(false), checkResults(true), realclock(false), tries(DEFAULT_NB_TRIES) {
|
||||
char* _config;
|
||||
_config = getenv("BTL_CONFIG");
|
||||
if (_config != NULL) {
|
||||
std::vector<BtlString> config = BtlString(_config).split(" \t\n");
|
||||
for (unsigned int i = 0; i < config.size(); i++) {
|
||||
if (config[i].beginsWith("-a")) {
|
||||
if (i + 1 == config.size()) {
|
||||
std::cerr << "error processing option: " << config[i] << "\n";
|
||||
exit(2);
|
||||
}
|
||||
Instance.m_selectedActionNames = config[i + 1].split(":");
|
||||
|
||||
i += 1;
|
||||
} else if (config[i].beginsWith("-t")) {
|
||||
if (i + 1 == config.size()) {
|
||||
std::cerr << "error processing option: " << config[i] << "\n";
|
||||
exit(2);
|
||||
}
|
||||
Instance.tries = atoi(config[i + 1].c_str());
|
||||
|
||||
i += 1;
|
||||
} else if (config[i].beginsWith("--overwrite")) {
|
||||
Instance.overwriteResults = true;
|
||||
} else if (config[i].beginsWith("--nocheck")) {
|
||||
Instance.checkResults = false;
|
||||
} else if (config[i].beginsWith("--real")) {
|
||||
Instance.realclock = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BTL_DISABLE_SSE_EXCEPTIONS();
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE static bool skipAction(const std::string& _name) {
|
||||
if (Instance.m_selectedActionNames.empty()) return false;
|
||||
|
||||
BtlString name(_name);
|
||||
for (unsigned int i = 0; i < Instance.m_selectedActionNames.size(); ++i)
|
||||
if (name.contains(Instance.m_selectedActionNames[i])) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static BtlConfig Instance;
|
||||
bool overwriteResults;
|
||||
bool checkResults;
|
||||
bool realclock;
|
||||
int tries;
|
||||
|
||||
protected:
|
||||
std::vector<BtlString> m_selectedActionNames;
|
||||
};
|
||||
|
||||
#define BTL_MAIN BtlConfig BtlConfig::Instance
|
||||
|
||||
#endif // BTL_HH
|
||||
@@ -1,35 +0,0 @@
|
||||
//=====================================================
|
||||
// File : init_function.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef INIT_FUNCTION_HH
|
||||
#define INIT_FUNCTION_HH
|
||||
|
||||
double simple_function(int index) { return index; }
|
||||
|
||||
double simple_function(int index_i, int index_j) { return index_i + index_j; }
|
||||
|
||||
double pseudo_random(int /*index*/) { return std::rand() / double(RAND_MAX); }
|
||||
|
||||
double pseudo_random(int /*index_i*/, int /*index_j*/) { return std::rand() / double(RAND_MAX); }
|
||||
|
||||
double null_function(int /*index*/) { return 0.0; }
|
||||
|
||||
double null_function(int /*index_i*/, int /*index_j*/) { return 0.0; }
|
||||
|
||||
#endif
|
||||
@@ -1,61 +0,0 @@
|
||||
//=====================================================
|
||||
// File : init_matrix.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef INIT_MATRIX_HH
|
||||
#define INIT_MATRIX_HH
|
||||
|
||||
// The Vector class must satisfy the following part of STL vector concept :
|
||||
// resize() method
|
||||
// [] operator for setting element
|
||||
// value_type defined
|
||||
template <double init_function(int, int), class Vector>
|
||||
BTL_DONT_INLINE void init_row(Vector& X, int size, int row) {
|
||||
X.resize(size);
|
||||
|
||||
for (unsigned int j = 0; j < X.size(); j++) {
|
||||
X[j] = typename Vector::value_type(init_function(row, j));
|
||||
}
|
||||
}
|
||||
|
||||
// Matrix is a Vector of Vector
|
||||
// The Matrix class must satisfy the following part of STL vector concept :
|
||||
// resize() method
|
||||
// [] operator for setting rows
|
||||
template <double init_function(int, int), class Vector>
|
||||
BTL_DONT_INLINE void init_matrix(Vector& A, int size) {
|
||||
A.resize(size);
|
||||
for (unsigned int row = 0; row < A.size(); row++) {
|
||||
init_row<init_function>(A[row], size, row);
|
||||
}
|
||||
}
|
||||
|
||||
template <double init_function(int, int), class Matrix>
|
||||
BTL_DONT_INLINE void init_matrix_symm(Matrix& A, int size) {
|
||||
A.resize(size);
|
||||
for (unsigned int row = 0; row < A.size(); row++) A[row].resize(size);
|
||||
for (unsigned int row = 0; row < A.size(); row++) {
|
||||
A[row][row] = init_function(row, row);
|
||||
for (unsigned int col = 0; col < row; col++) {
|
||||
double x = init_function(row, col);
|
||||
A[row][col] = A[col][row] = x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,36 +0,0 @@
|
||||
//=====================================================
|
||||
// File : init_vector.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef INIT_VECTOR_HH
|
||||
#define INIT_VECTOR_HH
|
||||
|
||||
// The Vector class must satisfy the following part of STL vector concept :
|
||||
// resize() method
|
||||
// [] operator for setting element
|
||||
// value_type defined
|
||||
template <double init_function(int), class Vector>
|
||||
void init_vector(Vector& X, int size) {
|
||||
X.resize(size);
|
||||
|
||||
for (unsigned int i = 0; i < X.size(); i++) {
|
||||
X[i] = typename Vector::value_type(init_function(i));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,61 +0,0 @@
|
||||
//=====================================================
|
||||
// File : bench_static.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef BENCH_STATIC_HH
|
||||
#define BENCH_STATIC_HH
|
||||
|
||||
#include "btl.hh"
|
||||
#include "bench_parameter.hh"
|
||||
#include <iostream>
|
||||
#include "utilities.h"
|
||||
#include "xy_file.hh"
|
||||
#include "static/static_size_generator.hh"
|
||||
#include "timers/portable_perf_analyzer.hh"
|
||||
// #include "timers/mixed_perf_analyzer.hh"
|
||||
// #include "timers/x86_perf_analyzer.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <template <class> class Perf_Analyzer, template <class> class Action, template <class, int> class Interface>
|
||||
BTL_DONT_INLINE void bench_static(void) {
|
||||
if (BtlConfig::skipAction(Action<Interface<REAL_TYPE, 10> >::name())) return;
|
||||
|
||||
string filename = "bench_" + Action<Interface<REAL_TYPE, 10> >::name() + ".dat";
|
||||
|
||||
INFOS("starting " << filename);
|
||||
|
||||
const int max_size = TINY_MV_MAX_SIZE;
|
||||
|
||||
std::vector<double> tab_mflops;
|
||||
std::vector<double> tab_sizes;
|
||||
|
||||
static_size_generator<max_size, Perf_Analyzer, Action, Interface>::go(tab_sizes, tab_mflops);
|
||||
|
||||
dump_xy_file(tab_sizes, tab_mflops, filename);
|
||||
}
|
||||
|
||||
// default Perf Analyzer
|
||||
template <template <class> class Action, template <class, int> class Interface>
|
||||
BTL_DONT_INLINE void bench_static(void) {
|
||||
bench_static<Portable_Perf_Analyzer, Action, Interface>();
|
||||
// bench_static<Mixed_Perf_Analyzer,Action,Interface>();
|
||||
// bench_static<X86_Perf_Analyzer,Action,Interface>();
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,60 +0,0 @@
|
||||
//=====================================================
|
||||
// File : intel_bench_fixed_size.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar déc 3 18:59:37 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef _BENCH_FIXED_SIZE_HH_
|
||||
#define _BENCH_FIXED_SIZE_HH_
|
||||
|
||||
#include "utilities.h"
|
||||
#include "function_time.hh"
|
||||
|
||||
template <class Action>
|
||||
double bench_fixed_size(int size, unsigned long long& nb_calc, unsigned long long& nb_init) {
|
||||
Action action(size);
|
||||
|
||||
double time_baseline = time_init(nb_init, action);
|
||||
|
||||
while (time_baseline < MIN_TIME) {
|
||||
// INFOS("nb_init="<<nb_init);
|
||||
// INFOS("time_baseline="<<time_baseline);
|
||||
nb_init *= 2;
|
||||
time_baseline = time_init(nb_init, action);
|
||||
}
|
||||
|
||||
time_baseline = time_baseline / (double(nb_init));
|
||||
|
||||
double time_action = time_calculate(nb_calc, action);
|
||||
|
||||
while (time_action < MIN_TIME) {
|
||||
nb_calc *= 2;
|
||||
time_action = time_calculate(nb_calc, action);
|
||||
}
|
||||
|
||||
INFOS("nb_init=" << nb_init);
|
||||
INFOS("nb_calc=" << nb_calc);
|
||||
|
||||
time_action = time_action / (double(nb_calc));
|
||||
|
||||
action.check_result();
|
||||
|
||||
time_action = time_action - time_baseline;
|
||||
|
||||
return action.nb_op_base() / (time_action * 1000000.0);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,52 +0,0 @@
|
||||
//=====================================================
|
||||
// File : static_size_generator.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar déc 3 18:59:36 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef _STATIC_SIZE_GENERATOR_HH
|
||||
#define _STATIC_SIZE_GENERATOR_HH
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
// recursive generation of statically defined matrix and vector sizes
|
||||
|
||||
template <int SIZE, template <class> class Perf_Analyzer, template <class> class Action,
|
||||
template <class, int> class Interface>
|
||||
struct static_size_generator {
|
||||
static void go(vector<double>& tab_sizes, vector<double>& tab_mflops) {
|
||||
tab_sizes.push_back(SIZE);
|
||||
std::cout << tab_sizes.back() << " \t" << std::flush;
|
||||
Perf_Analyzer<Action<Interface<REAL_TYPE, SIZE> > > perf_action;
|
||||
tab_mflops.push_back(perf_action.eval_mflops(SIZE));
|
||||
std::cout << tab_mflops.back() << " MFlops" << std::endl;
|
||||
static_size_generator<SIZE - 1, Perf_Analyzer, Action, Interface>::go(tab_sizes, tab_mflops);
|
||||
};
|
||||
};
|
||||
|
||||
// recursion end
|
||||
|
||||
template <template <class> class Perf_Analyzer, template <class> class Action, template <class, int> class Interface>
|
||||
struct static_size_generator<1, Perf_Analyzer, Action, Interface> {
|
||||
static void go(vector<double>& tab_sizes, vector<double>& tab_mflops) {
|
||||
tab_sizes.push_back(1);
|
||||
Perf_Analyzer<Action<Interface<REAL_TYPE, 1> > > perf_action;
|
||||
tab_mflops.push_back(perf_action.eval_mflops(1));
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,70 +0,0 @@
|
||||
//=====================================================
|
||||
// File : STL_perf_analyzer.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar déc 3 18:59:35 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef _STL_PERF_ANALYSER_HH
|
||||
#define _STL_PERF_ANALYSER_HH
|
||||
|
||||
#include "STL_timer.hh"
|
||||
#include "bench_parameter.hh"
|
||||
|
||||
template <class ACTION>
|
||||
class STL_Perf_Analyzer {
|
||||
public:
|
||||
STL_Perf_Analyzer(unsigned long long nb_sample = DEFAULT_NB_SAMPLE) : _nb_sample(nb_sample), _chronos() {
|
||||
MESSAGE("STL_Perf_Analyzer Ctor");
|
||||
};
|
||||
STL_Perf_Analyzer(const STL_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~STL_Perf_Analyzer(void) { MESSAGE("STL_Perf_Analyzer Dtor"); };
|
||||
|
||||
inline double eval_mflops(int size) {
|
||||
ACTION action(size);
|
||||
|
||||
_chronos.start_baseline(_nb_sample);
|
||||
|
||||
do {
|
||||
action.initialize();
|
||||
} while (_chronos.check());
|
||||
|
||||
double baseline_time = _chronos.get_time();
|
||||
|
||||
_chronos.start(_nb_sample);
|
||||
do {
|
||||
action.initialize();
|
||||
action.calculate();
|
||||
} while (_chronos.check());
|
||||
|
||||
double calculate_time = _chronos.get_time();
|
||||
|
||||
double corrected_time = calculate_time - baseline_time;
|
||||
|
||||
// cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
|
||||
|
||||
return action.nb_op_base() / (corrected_time * 1000000.0);
|
||||
// return action.nb_op_base()/(calculate_time*1000000.0);
|
||||
}
|
||||
|
||||
private:
|
||||
STL_Timer _chronos;
|
||||
unsigned long long _nb_sample;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,75 +0,0 @@
|
||||
//=====================================================
|
||||
// File : STL_Timer.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar déc 3 18:59:35 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
// STL Timer Class. Adapted (L.P.) from the timer class by Musser et Al
|
||||
// described int the Book : STL Tutorial and reference guide.
|
||||
// Define a timer class for analyzing algorithm performance.
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
using namespace std;
|
||||
|
||||
class STL_Timer {
|
||||
public:
|
||||
STL_Timer() { baseline = false; }; // Default constructor
|
||||
// Start a series of r trials:
|
||||
void start(unsigned int r) {
|
||||
reps = r;
|
||||
count = 0;
|
||||
iterations.clear();
|
||||
iterations.reserve(reps);
|
||||
initial = time(0);
|
||||
};
|
||||
// Start a series of r trials to determine baseline time:
|
||||
void start_baseline(unsigned int r) {
|
||||
baseline = true;
|
||||
start(r);
|
||||
}
|
||||
// Returns true if the trials have been completed, else false
|
||||
bool check() {
|
||||
++count;
|
||||
final = time(0);
|
||||
if (initial < final) {
|
||||
iterations.push_back(count);
|
||||
initial = final;
|
||||
count = 0;
|
||||
}
|
||||
return (iterations.size() < reps);
|
||||
};
|
||||
// Returns the results for external use
|
||||
double get_time(void) {
|
||||
sort(iterations.begin(), iterations.end());
|
||||
return 1.0 / iterations[reps / 2];
|
||||
};
|
||||
|
||||
private:
|
||||
unsigned int reps; // Number of trials
|
||||
// For storing loop iterations of a trial
|
||||
vector<long> iterations;
|
||||
// For saving initial and final times of a trial
|
||||
time_t initial, final;
|
||||
// For counting loop iterations of a trial
|
||||
unsigned long count;
|
||||
// true if this is a baseline computation, false otherwise
|
||||
bool baseline;
|
||||
// For recording the baseline time
|
||||
double baseline_time;
|
||||
};
|
||||
@@ -1,58 +0,0 @@
|
||||
//=====================================================
|
||||
// File : mixed_perf_analyzer.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar déc 3 18:59:36 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef _MIXED_PERF_ANALYSER_HH
|
||||
#define _MIXED_PERF_ANALYSER_HH
|
||||
|
||||
#include "x86_perf_analyzer.hh"
|
||||
#include "portable_perf_analyzer.hh"
|
||||
|
||||
// choose portable perf analyzer for long calculations and x86 analyser for short ones
|
||||
|
||||
template <class Action>
|
||||
class Mixed_Perf_Analyzer {
|
||||
public:
|
||||
Mixed_Perf_Analyzer(void) : _x86pa(), _ppa(), _use_ppa(true) { MESSAGE("Mixed_Perf_Analyzer Ctor"); };
|
||||
Mixed_Perf_Analyzer(const Mixed_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~Mixed_Perf_Analyzer(void) { MESSAGE("Mixed_Perf_Analyzer Dtor"); };
|
||||
|
||||
inline double eval_mflops(int size) {
|
||||
double result = 0.0;
|
||||
if (_use_ppa) {
|
||||
result = _ppa.eval_mflops(size);
|
||||
if (_ppa.get_nb_calc() > DEFAULT_NB_SAMPLE) {
|
||||
_use_ppa = false;
|
||||
}
|
||||
} else {
|
||||
result = _x86pa.eval_mflops(size);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
Portable_Perf_Analyzer<Action> _ppa;
|
||||
X86_Perf_Analyzer<Action> _x86pa;
|
||||
bool _use_ppa;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,89 +0,0 @@
|
||||
//=====================================================
|
||||
// File : portable_perf_analyzer.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef _PORTABLE_PERF_ANALYZER_HH
|
||||
#define _PORTABLE_PERF_ANALYZER_HH
|
||||
|
||||
#include "utilities.h"
|
||||
#include "timers/portable_timer.hh"
|
||||
|
||||
template <class Action>
|
||||
class Portable_Perf_Analyzer {
|
||||
public:
|
||||
Portable_Perf_Analyzer() : _nb_calc(0), m_time_action(0), _chronos() { MESSAGE("Portable_Perf_Analyzer Ctor"); };
|
||||
Portable_Perf_Analyzer(const Portable_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~Portable_Perf_Analyzer() { MESSAGE("Portable_Perf_Analyzer Dtor"); };
|
||||
|
||||
BTL_DONT_INLINE double eval_mflops(int size) {
|
||||
Action action(size);
|
||||
|
||||
// action.initialize();
|
||||
// time_action = time_calculate(action);
|
||||
while (m_time_action < MIN_TIME) {
|
||||
if (_nb_calc == 0)
|
||||
_nb_calc = 1;
|
||||
else
|
||||
_nb_calc *= 2;
|
||||
action.initialize();
|
||||
m_time_action = time_calculate(action);
|
||||
}
|
||||
|
||||
// optimize
|
||||
for (int i = 1; i < BtlConfig::Instance.tries; ++i) {
|
||||
Action _action(size);
|
||||
std::cout << " " << _action.nb_op_base() * _nb_calc / (m_time_action * 1e6) << " ";
|
||||
_action.initialize();
|
||||
m_time_action = std::min(m_time_action, time_calculate(_action));
|
||||
}
|
||||
|
||||
double time_action = m_time_action / (double(_nb_calc));
|
||||
|
||||
// check
|
||||
if (BtlConfig::Instance.checkResults && size < 128) {
|
||||
action.initialize();
|
||||
action.calculate();
|
||||
action.check_result();
|
||||
}
|
||||
return action.nb_op_base() / (time_action * 1e6);
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE double time_calculate(Action& action) {
|
||||
// time measurement
|
||||
action.calculate();
|
||||
_chronos.start();
|
||||
for (unsigned int ii = 0; ii < _nb_calc; ii++) {
|
||||
action.calculate();
|
||||
}
|
||||
_chronos.stop();
|
||||
return _chronos.user_time();
|
||||
}
|
||||
|
||||
unsigned long long get_nb_calc() { return _nb_calc; }
|
||||
|
||||
private:
|
||||
unsigned long long _nb_calc;
|
||||
double m_time_action;
|
||||
Portable_Timer _chronos;
|
||||
};
|
||||
|
||||
#endif //_PORTABLE_PERF_ANALYZER_HH
|
||||
@@ -1,110 +0,0 @@
|
||||
//=====================================================
|
||||
// File : portable_perf_analyzer.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef _PORTABLE_PERF_ANALYZER_HH
|
||||
#define _PORTABLE_PERF_ANALYZER_HH
|
||||
|
||||
#include "utilities.h"
|
||||
#include "timers/portable_timer.hh"
|
||||
|
||||
template <class Action>
|
||||
class Portable_Perf_Analyzer {
|
||||
public:
|
||||
Portable_Perf_Analyzer(void) : _nb_calc(1), _nb_init(1), _chronos() { MESSAGE("Portable_Perf_Analyzer Ctor"); };
|
||||
Portable_Perf_Analyzer(const Portable_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~Portable_Perf_Analyzer(void) { MESSAGE("Portable_Perf_Analyzer Dtor"); };
|
||||
|
||||
inline double eval_mflops(int size) {
|
||||
Action action(size);
|
||||
|
||||
// double time_baseline = time_init(action);
|
||||
// while (time_baseline < MIN_TIME_INIT)
|
||||
// {
|
||||
// _nb_init *= 2;
|
||||
// time_baseline = time_init(action);
|
||||
// }
|
||||
//
|
||||
// // optimize
|
||||
// for (int i=1; i<NB_TRIES; ++i)
|
||||
// time_baseline = std::min(time_baseline, time_init(action));
|
||||
//
|
||||
// time_baseline = time_baseline/(double(_nb_init));
|
||||
|
||||
double time_action = time_calculate(action);
|
||||
while (time_action < MIN_TIME) {
|
||||
_nb_calc *= 2;
|
||||
time_action = time_calculate(action);
|
||||
}
|
||||
|
||||
// optimize
|
||||
for (int i = 1; i < NB_TRIES; ++i) time_action = std::min(time_action, time_calculate(action));
|
||||
|
||||
// INFOS("size="<<size);
|
||||
// INFOS("_nb_init="<<_nb_init);
|
||||
// INFOS("_nb_calc="<<_nb_calc);
|
||||
|
||||
time_action = time_action / (double(_nb_calc));
|
||||
|
||||
action.check_result();
|
||||
|
||||
double time_baseline = time_init(action);
|
||||
for (int i = 1; i < NB_TRIES; ++i) time_baseline = std::min(time_baseline, time_init(action));
|
||||
time_baseline = time_baseline / (double(_nb_init));
|
||||
|
||||
// INFOS("time_baseline="<<time_baseline);
|
||||
// INFOS("time_action="<<time_action);
|
||||
|
||||
time_action = time_action - time_baseline;
|
||||
|
||||
// INFOS("time_corrected="<<time_action);
|
||||
|
||||
return action.nb_op_base() / (time_action * 1000000.0);
|
||||
}
|
||||
|
||||
inline double time_init(Action& action) {
|
||||
// time measurement
|
||||
_chronos.start();
|
||||
for (int ii = 0; ii < _nb_init; ii++) action.initialize();
|
||||
_chronos.stop();
|
||||
return _chronos.user_time();
|
||||
}
|
||||
|
||||
inline double time_calculate(Action& action) {
|
||||
// time measurement
|
||||
_chronos.start();
|
||||
for (int ii = 0; ii < _nb_calc; ii++) {
|
||||
action.initialize();
|
||||
action.calculate();
|
||||
}
|
||||
_chronos.stop();
|
||||
return _chronos.user_time();
|
||||
}
|
||||
|
||||
unsigned long long get_nb_calc(void) { return _nb_calc; }
|
||||
|
||||
private:
|
||||
unsigned long long _nb_calc;
|
||||
unsigned long long _nb_init;
|
||||
Portable_Timer _chronos;
|
||||
};
|
||||
|
||||
#endif //_PORTABLE_PERF_ANALYZER_HH
|
||||
@@ -1,143 +0,0 @@
|
||||
//=====================================================
|
||||
// File : portable_timer.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)> from boost lib
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:17 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
// simple_time extracted from the boost library
|
||||
//
|
||||
#ifndef _PORTABLE_TIMER_HH
|
||||
#define _PORTABLE_TIMER_HH
|
||||
|
||||
#include <ctime>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#define USEC_IN_SEC 1000000
|
||||
|
||||
// timer -------------------------------------------------------------------//
|
||||
|
||||
// A timer object measures CPU time.
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
|
||||
/*#ifndef hr_timer
|
||||
#include "hr_time.h"
|
||||
#define hr_timer
|
||||
#endif*/
|
||||
|
||||
class Portable_Timer {
|
||||
public:
|
||||
typedef struct {
|
||||
LARGE_INTEGER start;
|
||||
LARGE_INTEGER stop;
|
||||
} stopWatch;
|
||||
|
||||
Portable_Timer() {
|
||||
startVal.QuadPart = 0;
|
||||
stopVal.QuadPart = 0;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
}
|
||||
|
||||
void start() { QueryPerformanceCounter(&startVal); }
|
||||
|
||||
void stop() { QueryPerformanceCounter(&stopVal); }
|
||||
|
||||
double elapsed() {
|
||||
LARGE_INTEGER time;
|
||||
time.QuadPart = stopVal.QuadPart - startVal.QuadPart;
|
||||
return LIToSecs(time);
|
||||
}
|
||||
|
||||
double user_time() { return elapsed(); }
|
||||
|
||||
private:
|
||||
double LIToSecs(LARGE_INTEGER& L) { return ((double)L.QuadPart / (double)frequency.QuadPart); }
|
||||
|
||||
LARGE_INTEGER startVal;
|
||||
LARGE_INTEGER stopVal;
|
||||
LARGE_INTEGER frequency;
|
||||
|
||||
}; // Portable_Timer
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
#include <CoreServices/CoreServices.h>
|
||||
#include <mach/mach_time.h>
|
||||
|
||||
class Portable_Timer {
|
||||
public:
|
||||
Portable_Timer() {}
|
||||
|
||||
void start() {
|
||||
m_start_time = double(mach_absolute_time()) * 1e-9;
|
||||
;
|
||||
}
|
||||
|
||||
void stop() {
|
||||
m_stop_time = double(mach_absolute_time()) * 1e-9;
|
||||
;
|
||||
}
|
||||
|
||||
double elapsed() { return user_time(); }
|
||||
|
||||
double user_time() { return m_stop_time - m_start_time; }
|
||||
|
||||
private:
|
||||
double m_stop_time, m_start_time;
|
||||
|
||||
}; // Portable_Timer (Apple)
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/times.h>
|
||||
|
||||
class Portable_Timer {
|
||||
public:
|
||||
Portable_Timer() { m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID; }
|
||||
|
||||
Portable_Timer(int clkid) : m_clkid(clkid) {}
|
||||
|
||||
void start() {
|
||||
timespec ts;
|
||||
clock_gettime(m_clkid, &ts);
|
||||
m_start_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
|
||||
}
|
||||
|
||||
void stop() {
|
||||
timespec ts;
|
||||
clock_gettime(m_clkid, &ts);
|
||||
m_stop_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
|
||||
}
|
||||
|
||||
double elapsed() { return user_time(); }
|
||||
|
||||
double user_time() { return m_stop_time - m_start_time; }
|
||||
|
||||
private:
|
||||
int m_clkid;
|
||||
double m_stop_time, m_start_time;
|
||||
|
||||
}; // Portable_Timer (Linux)
|
||||
|
||||
#endif
|
||||
|
||||
#endif // PORTABLE_TIMER_HPP
|
||||
@@ -1,91 +0,0 @@
|
||||
//=====================================================
|
||||
// File : x86_perf_analyzer.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef _X86_PERF_ANALYSER_HH
|
||||
#define _X86_PERF_ANALYSER_HH
|
||||
|
||||
#include "x86_timer.hh"
|
||||
#include "bench_parameter.hh"
|
||||
|
||||
template <class ACTION>
|
||||
class X86_Perf_Analyzer {
|
||||
public:
|
||||
X86_Perf_Analyzer(unsigned long long nb_sample = DEFAULT_NB_SAMPLE) : _nb_sample(nb_sample), _chronos() {
|
||||
MESSAGE("X86_Perf_Analyzer Ctor");
|
||||
_chronos.find_frequency();
|
||||
};
|
||||
X86_Perf_Analyzer(const X86_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~X86_Perf_Analyzer(void) { MESSAGE("X86_Perf_Analyzer Dtor"); };
|
||||
|
||||
inline double eval_mflops(int size) {
|
||||
ACTION action(size);
|
||||
|
||||
int nb_loop = 5;
|
||||
double calculate_time = 0.0;
|
||||
double baseline_time = 0.0;
|
||||
|
||||
for (int j = 0; j < nb_loop; j++) {
|
||||
_chronos.clear();
|
||||
|
||||
for (int i = 0; i < _nb_sample; i++) {
|
||||
_chronos.start();
|
||||
action.initialize();
|
||||
action.calculate();
|
||||
_chronos.stop();
|
||||
_chronos.add_get_click();
|
||||
}
|
||||
|
||||
calculate_time += double(_chronos.get_shortest_clicks()) / _chronos.frequency();
|
||||
|
||||
if (j == 0) action.check_result();
|
||||
|
||||
_chronos.clear();
|
||||
|
||||
for (int i = 0; i < _nb_sample; i++) {
|
||||
_chronos.start();
|
||||
action.initialize();
|
||||
_chronos.stop();
|
||||
_chronos.add_get_click();
|
||||
}
|
||||
|
||||
baseline_time += double(_chronos.get_shortest_clicks()) / _chronos.frequency();
|
||||
}
|
||||
|
||||
double corrected_time = (calculate_time - baseline_time) / double(nb_loop);
|
||||
|
||||
// INFOS("_nb_sample="<<_nb_sample);
|
||||
// INFOS("baseline_time="<<baseline_time);
|
||||
// INFOS("calculate_time="<<calculate_time);
|
||||
// INFOS("corrected_time="<<corrected_time);
|
||||
|
||||
// cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
|
||||
|
||||
return action.nb_op_base() / (corrected_time * 1000000.0);
|
||||
// return action.nb_op_base()/(calculate_time*1000000.0);
|
||||
}
|
||||
|
||||
private:
|
||||
X86_Timer _chronos;
|
||||
unsigned long long _nb_sample;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,176 +0,0 @@
|
||||
//=====================================================
|
||||
// File : x86_timer.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef _X86_TIMER_HH
|
||||
#define _X86_TIMER_HH
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/times.h>
|
||||
// #include "system_time.h"
|
||||
#define u32 unsigned int
|
||||
#include <asm/msr.h>
|
||||
#include "utilities.h"
|
||||
#include <map>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
// CPU frequency in Hz
|
||||
// #define FREQUENCY 648000000
|
||||
// #define FREQUENCY 1400000000
|
||||
#define FREQUENCY 1695000000
|
||||
|
||||
using namespace std;
|
||||
|
||||
class X86_Timer {
|
||||
public:
|
||||
X86_Timer(void) : _frequency(FREQUENCY), _nb_sample(0) { MESSAGE("X86_Timer Default Ctor"); }
|
||||
|
||||
inline void start(void) { rdtsc(_click_start.n32[0], _click_start.n32[1]); }
|
||||
|
||||
inline void stop(void) { rdtsc(_click_stop.n32[0], _click_stop.n32[1]); }
|
||||
|
||||
inline double frequency(void) { return _frequency; }
|
||||
|
||||
double get_elapsed_time_in_second(void) { return (_click_stop.n64 - _click_start.n64) / double(FREQUENCY); }
|
||||
|
||||
unsigned long long get_click(void) { return (_click_stop.n64 - _click_start.n64); }
|
||||
|
||||
inline void find_frequency(void) {
|
||||
time_t initial, final;
|
||||
int dummy = 2;
|
||||
|
||||
initial = time(0);
|
||||
start();
|
||||
do {
|
||||
dummy += 2;
|
||||
} while (time(0) == initial);
|
||||
// We are at the start of a one-second cycle
|
||||
initial = time(0);
|
||||
start();
|
||||
do {
|
||||
dummy += 2;
|
||||
} while (time(0) == initial);
|
||||
final = time(0);
|
||||
stop();
|
||||
// INFOS("fine grained time : "<< get_elapsed_time_in_second());
|
||||
// INFOS("coarse grained time : "<< final-initial);
|
||||
_frequency = _frequency * get_elapsed_time_in_second() / double(final - initial);
|
||||
/// INFOS("CPU frequency : "<< _frequency);
|
||||
}
|
||||
|
||||
void add_get_click(void) {
|
||||
_nb_sample++;
|
||||
_counted_clicks[get_click()]++;
|
||||
fill_history_clicks();
|
||||
}
|
||||
|
||||
void dump_statistics(string filemane) {
|
||||
ofstream outfile(filemane.c_str(), ios::out);
|
||||
|
||||
std::map<unsigned long long, unsigned long long>::iterator itr;
|
||||
for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
|
||||
outfile << (*itr).first << " " << (*itr).second << endl;
|
||||
}
|
||||
|
||||
outfile.close();
|
||||
}
|
||||
|
||||
void dump_history(string filemane) {
|
||||
ofstream outfile(filemane.c_str(), ios::out);
|
||||
|
||||
for (int i = 0; i < _history_mean_clicks.size(); i++) {
|
||||
outfile << i << " " << _history_mean_clicks[i] << " " << _history_shortest_clicks[i] << " "
|
||||
<< _history_most_occured_clicks[i] << endl;
|
||||
}
|
||||
|
||||
outfile.close();
|
||||
}
|
||||
|
||||
double get_mean_clicks(void) {
|
||||
std::map<unsigned long long, unsigned long long>::iterator itr;
|
||||
|
||||
unsigned long long mean_clicks = 0;
|
||||
|
||||
for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
|
||||
mean_clicks += (*itr).second * (*itr).first;
|
||||
}
|
||||
|
||||
return mean_clicks / double(_nb_sample);
|
||||
}
|
||||
|
||||
double get_shortest_clicks(void) { return double((*_counted_clicks.begin()).first); }
|
||||
|
||||
void fill_history_clicks(void) {
|
||||
_history_mean_clicks.push_back(get_mean_clicks());
|
||||
_history_shortest_clicks.push_back(get_shortest_clicks());
|
||||
_history_most_occured_clicks.push_back(get_most_occured_clicks());
|
||||
}
|
||||
|
||||
double get_most_occured_clicks(void) {
|
||||
unsigned long long moc = 0;
|
||||
unsigned long long max_occurence = 0;
|
||||
|
||||
std::map<unsigned long long, unsigned long long>::iterator itr;
|
||||
|
||||
for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
|
||||
if (max_occurence <= (*itr).second) {
|
||||
max_occurence = (*itr).second;
|
||||
moc = (*itr).first;
|
||||
}
|
||||
}
|
||||
|
||||
return double(moc);
|
||||
}
|
||||
|
||||
void clear(void) {
|
||||
_counted_clicks.clear();
|
||||
|
||||
_history_mean_clicks.clear();
|
||||
_history_shortest_clicks.clear();
|
||||
_history_most_occured_clicks.clear();
|
||||
|
||||
_nb_sample = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
union {
|
||||
unsigned long int n32[2];
|
||||
unsigned long long n64;
|
||||
} _click_start;
|
||||
|
||||
union {
|
||||
unsigned long int n32[2];
|
||||
unsigned long long n64;
|
||||
} _click_stop;
|
||||
|
||||
double _frequency;
|
||||
|
||||
map<unsigned long long, unsigned long long> _counted_clicks;
|
||||
|
||||
vector<double> _history_mean_clicks;
|
||||
vector<double> _history_shortest_clicks;
|
||||
vector<double> _history_most_occured_clicks;
|
||||
|
||||
unsigned long long _nb_sample;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,56 +0,0 @@
|
||||
//=====================================================
|
||||
// File : size_lin_log.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, mar déc 3 18:59:37 CET 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef SIZE_LIN_LOG
|
||||
#define SIZE_LIN_LOG
|
||||
|
||||
#include "size_log.hh"
|
||||
|
||||
template <class Vector>
|
||||
void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector& X) {
|
||||
int ten = 10;
|
||||
int nine = 9;
|
||||
|
||||
X.resize(nb_point);
|
||||
|
||||
if (nb_point > ten) {
|
||||
for (int i = 0; i < nine; i++) {
|
||||
X[i] = i + 1;
|
||||
}
|
||||
|
||||
Vector log_size;
|
||||
size_log(nb_point - nine, ten, size_max, log_size);
|
||||
|
||||
for (int i = 0; i < nb_point - nine; i++) {
|
||||
X[i + nine] = log_size[i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < nb_point; i++) {
|
||||
X[i] = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// for (int i=0;i<nb_point;i++){
|
||||
|
||||
// INFOS("computed sizes : X["<<i<<"]="<<X[i]);
|
||||
|
||||
// }
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,50 +0,0 @@
|
||||
//=====================================================
|
||||
// File : size_log.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:17 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef SIZE_LOG
|
||||
#define SIZE_LOG
|
||||
|
||||
#include "math.h"
|
||||
// The Vector class must satisfy the following part of STL vector concept :
|
||||
// resize() method
|
||||
// [] operator for setting element
|
||||
// the vector element are int compatible.
|
||||
template <class Vector>
|
||||
void size_log(const int nb_point, const int size_min, const int size_max, Vector& X) {
|
||||
X.resize(nb_point);
|
||||
|
||||
float ls_min = log(float(size_min));
|
||||
float ls_max = log(float(size_max));
|
||||
|
||||
float ls = 0.0;
|
||||
|
||||
float delta_ls = (ls_max - ls_min) / (float(nb_point - 1));
|
||||
|
||||
int size = 0;
|
||||
|
||||
for (int i = 0; i < nb_point; i++) {
|
||||
ls = ls_min + float(i) * delta_ls;
|
||||
|
||||
size = int(exp(ls));
|
||||
|
||||
X[i] = size;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,130 +0,0 @@
|
||||
//=============================================================================
|
||||
// File : utilities.h
|
||||
// Created : mar jun 19 13:18:14 CEST 2001
|
||||
// Author : Antoine YESSAYAN, Paul RASCLE, EDF
|
||||
// Project : SALOME
|
||||
// Copyright : EDF 2001
|
||||
// $Header$
|
||||
//=============================================================================
|
||||
|
||||
/* --- Definition macros file to print information if _DEBUG_ is defined --- */
|
||||
|
||||
#ifndef UTILITIES_H
|
||||
#define UTILITIES_H
|
||||
|
||||
#include <stdlib.h>
|
||||
// # include <iostream> ok for gcc3.01
|
||||
#include <iostream>
|
||||
|
||||
/* --- INFOS is always defined (without _DEBUG_): to be used for warnings, with release version --- */
|
||||
|
||||
#define HEREWEARE \
|
||||
cout << flush; \
|
||||
cerr << __FILE__ << " [" << __LINE__ << "] : " << flush;
|
||||
#define INFOS(chain) \
|
||||
{ \
|
||||
HEREWEARE; \
|
||||
cerr << chain << endl; \
|
||||
}
|
||||
#define PYSCRIPT(chain) \
|
||||
{ \
|
||||
cout << flush; \
|
||||
cerr << "---PYSCRIPT--- " << chain << endl; \
|
||||
}
|
||||
|
||||
/* --- To print date and time of compilation of current source on stdout --- */
|
||||
|
||||
#if defined(__NVCOMPILER)
|
||||
#define COMPILER "nvc++";
|
||||
#elif defined(__GNUC__)
|
||||
#define COMPILER "g++";
|
||||
#elif defined(__sun)
|
||||
#define COMPILER "CC";
|
||||
#elif defined(__KCC)
|
||||
#define COMPILER "KCC";
|
||||
#elif defined(__PGI)
|
||||
#define COMPILER "pgCC";
|
||||
#else
|
||||
#define COMPILER "undefined";
|
||||
#endif
|
||||
|
||||
#ifdef INFOS_COMPILATION
|
||||
#error INFOS_COMPILATION already defined
|
||||
#endif
|
||||
#define INFOS_COMPILATION \
|
||||
{ \
|
||||
cerr << flush; \
|
||||
cout << __FILE__; \
|
||||
cout << " [" << __LINE__ << "] : "; \
|
||||
cout << "COMPILED with " << COMPILER; \
|
||||
cout << ", " << __DATE__; \
|
||||
cout << " at " << __TIME__ << endl; \
|
||||
cout << "\n\n"; \
|
||||
cout << flush; \
|
||||
}
|
||||
|
||||
#ifdef _DEBUG_
|
||||
|
||||
/* --- the following MACROS are useful at debug time --- */
|
||||
|
||||
#define HERE \
|
||||
cout << flush; \
|
||||
cerr << "- Trace " << __FILE__ << " [" << __LINE__ << "] : " << flush;
|
||||
#define SCRUTE(var) \
|
||||
HERE; \
|
||||
cerr << #var << "=" << var << endl;
|
||||
#define MESSAGE(chain) \
|
||||
{ \
|
||||
HERE; \
|
||||
cerr << chain << endl; \
|
||||
}
|
||||
#define INTERRUPTION(code) \
|
||||
HERE; \
|
||||
cerr << "INTERRUPTION return code= " << code << endl; \
|
||||
exit(code);
|
||||
|
||||
#ifndef ASSERT
|
||||
#define ASSERT(condition) \
|
||||
if (!(condition)) { \
|
||||
HERE; \
|
||||
cerr << "CONDITION " << #condition << " NOT VERIFIED" << endl; \
|
||||
INTERRUPTION(1); \
|
||||
}
|
||||
#endif /* ASSERT */
|
||||
|
||||
#define REPERE \
|
||||
cout << flush; \
|
||||
cerr << " --------------" << endl << flush;
|
||||
#define BEGIN_OF(chain) \
|
||||
{ \
|
||||
REPERE; \
|
||||
HERE; \
|
||||
cerr << "Begin of: " << chain << endl; \
|
||||
REPERE; \
|
||||
}
|
||||
#define END_OF(chain) \
|
||||
{ \
|
||||
REPERE; \
|
||||
HERE; \
|
||||
cerr << "Normal end of: " << chain << endl; \
|
||||
REPERE; \
|
||||
}
|
||||
|
||||
#else /* ifdef _DEBUG_*/
|
||||
|
||||
#define HERE
|
||||
#define SCRUTE(var)
|
||||
#define MESSAGE(chain)
|
||||
#define INTERRUPTION(code)
|
||||
|
||||
#ifndef ASSERT
|
||||
#define ASSERT(condition)
|
||||
#endif /* ASSERT */
|
||||
|
||||
#define REPERE
|
||||
#define BEGIN_OF(chain)
|
||||
#define END_OF(chain)
|
||||
|
||||
#endif /* ifdef _DEBUG_*/
|
||||
|
||||
#endif /* ifndef UTILITIES_H */
|
||||
@@ -1,71 +0,0 @@
|
||||
//=====================================================
|
||||
// File : dump_file_x_y.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:20 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef XY_FILE_HH
|
||||
#define XY_FILE_HH
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
bool read_xy_file(const std::string& filename, std::vector<int>& tab_sizes, std::vector<double>& tab_mflops,
|
||||
bool quiet = false) {
|
||||
std::ifstream input_file(filename.c_str(), std::ios::in);
|
||||
|
||||
if (!input_file) {
|
||||
if (!quiet) {
|
||||
INFOS("!!! Error opening " << filename);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int nb_point = 0;
|
||||
int size = 0;
|
||||
double mflops = 0;
|
||||
|
||||
while (input_file >> size >> mflops) {
|
||||
nb_point++;
|
||||
tab_sizes.push_back(size);
|
||||
tab_mflops.push_back(mflops);
|
||||
}
|
||||
SCRUTE(nb_point);
|
||||
|
||||
input_file.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
// The Vector class must satisfy the following part of STL vector concept :
|
||||
// resize() method
|
||||
// [] operator for setting element
|
||||
// the vector element must have the << operator define
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Vector_A, class Vector_B>
|
||||
void dump_xy_file(const Vector_A& X, const Vector_B& Y, const std::string& filename) {
|
||||
ofstream outfile(filename.c_str(), ios::out);
|
||||
int size = X.size();
|
||||
|
||||
for (int i = 0; i < size; i++) outfile << X[i] << " " << Y[i] << endl;
|
||||
|
||||
outfile.close();
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,47 +0,0 @@
|
||||
|
||||
find_package(ATLAS)
|
||||
if (ATLAS_FOUND)
|
||||
btl_add_bench(btl_atlas main.cpp)
|
||||
if(BUILD_btl_atlas)
|
||||
target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
|
||||
set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
find_package(MKL)
|
||||
if (MKL_FOUND)
|
||||
btl_add_bench(btl_mkl main.cpp)
|
||||
if(BUILD_btl_mkl)
|
||||
target_link_libraries(btl_mkl ${MKL_LIBRARIES})
|
||||
set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
|
||||
find_package(OPENBLAS)
|
||||
if (OPENBLAS_FOUND)
|
||||
btl_add_bench(btl_openblas main.cpp)
|
||||
if(BUILD_btl_openblas)
|
||||
target_link_libraries(btl_openblas ${OPENBLAS_LIBRARIES} )
|
||||
set_target_properties(btl_openblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=OPENBLAS")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
find_package(ACML)
|
||||
if (ACML_FOUND)
|
||||
btl_add_bench(btl_acml main.cpp)
|
||||
if(BUILD_btl_acml)
|
||||
target_link_libraries(btl_acml ${ACML_LIBRARIES} )
|
||||
set_target_properties(btl_acml PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ACML -DHAS_LAPACK=1")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if(Eigen_SOURCE_DIR AND CMAKE_Fortran_COMPILER_WORKS)
|
||||
# we are inside Eigen and blas/lapack interface is compilable
|
||||
include_directories(${Eigen_SOURCE_DIR})
|
||||
btl_add_bench(btl_eigenblas main.cpp)
|
||||
if(BUILD_btl_eigenblas)
|
||||
target_link_libraries(btl_eigenblas eigen_blas eigen_lapack )
|
||||
set_target_properties(btl_eigenblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=EigenBLAS")
|
||||
endif()
|
||||
endif()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user