From 32ffce04fc3415ef10d2913fc90806077602e87d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 21 Apr 2016 08:47:28 -0700 Subject: [PATCH 1/7] Use EIGEN_THREAD_YIELD instead of std::this_thread::yield to make the code more portable. --- unsupported/test/cxx11_eventcount.cpp | 2 +- unsupported/test/cxx11_runqueue.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp index 2f250338c..f16cc6f07 100644 --- a/unsupported/test/cxx11_eventcount.cpp +++ b/unsupported/test/cxx11_eventcount.cpp @@ -95,7 +95,7 @@ static void test_stress_eventcount() ec.Notify(false); continue; } - std::this_thread::yield(); + EIGEN_THREAD_YIELD(); j--; } })); diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp index 4207824bf..6c99eb981 100644 --- a/unsupported/test/cxx11_runqueue.cpp +++ b/unsupported/test/cxx11_runqueue.cpp @@ -184,7 +184,7 @@ void test_stress_runqueue() sum += j; continue; } - std::this_thread::yield(); + EIGEN_THREAD_YIELD(); j--; } total += sum; @@ -194,7 +194,7 @@ void test_stress_runqueue() std::vector stolen; for (int j = 1; j < kEvents;) { if (q.PopBackHalf(&stolen) == 0) { - std::this_thread::yield(); + EIGEN_THREAD_YIELD(); continue; } while (stolen.size() && j < kEvents) { @@ -209,7 +209,7 @@ void test_stress_runqueue() int v = stolen.back(); stolen.pop_back(); VERIFY_IS_NOT_EQUAL(v, 0); - while ((v = q.PushBack(v)) != 0) std::this_thread::yield(); + while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD(); } total -= sum; })); From 6015422ee6e07377e9f8c776d136674ea303c57b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 21 Apr 2016 10:30:29 -0700 Subject: [PATCH 2/7] Added an option to enable the use of the F16C instruction set --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 51beba118..3b3753332 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -221,6 +221,12 @@ if(NOT MSVC) message(STATUS "Enabling FMA in tests/examples") endif() + option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF) + if(EIGEN_TEST_F16C) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c") + message(STATUS "Enabling F16C in tests/examples") + endif() + option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF) if(EIGEN_TEST_ALTIVEC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec") From f670613e4b90609229b016c3e2d1be9f4b8d54eb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 21 Apr 2016 11:03:02 -0700 Subject: [PATCH 3/7] Fixed several compilation warnings --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 2 +- unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h | 2 +- unsupported/test/cxx11_runqueue.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index c3edae477..5c3d4d630 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -135,7 +135,7 @@ class TensorExecutor { { const Index PacketSize = Vectorizable ? unpacket_traits::size : 1; const Index size = array_prod(evaluator.dimensions()); - int num_threads = device.numThreads(); + size_t num_threads = device.numThreads(); #ifdef EIGEN_USE_COST_MODEL if (num_threads > 1) { num_threads = TensorCostModel::numThreads( diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h index aaa1d92c7..0544a6e15 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h @@ -168,7 +168,7 @@ class RunQueue { // larger than it is during concurrent modifications. E.g. pop can // decrement size before the corresponding push has incremented it. // So the computed size can be up to kSize + 1, fix it. - if (size > kSize) size = kSize; + if (size > static_cast(kSize)) size = kSize; return size; } } diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp index 6c99eb981..d1770ee1b 100644 --- a/unsupported/test/cxx11_runqueue.cpp +++ b/unsupported/test/cxx11_runqueue.cpp @@ -30,11 +30,11 @@ void test_basic_runqueue() RunQueue q; // Check empty state. VERIFY(q.Empty()); - VERIFY_IS_EQUAL(0, q.Size()); + VERIFY_IS_EQUAL(0u, q.Size()); VERIFY_IS_EQUAL(0, q.PopFront()); std::vector stolen; VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen)); - VERIFY_IS_EQUAL(0, stolen.size()); + VERIFY_IS_EQUAL(0u, stolen.size()); // Push one front, pop one front. VERIFY_IS_EQUAL(0, q.PushFront(1)); VERIFY_IS_EQUAL(1, q.Size()); From 79b900375fc0bffd659b19f56818156942687b0c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 21 Apr 2016 11:58:27 -0700 Subject: [PATCH 4/7] Use index list for the striding benchmarks --- bench/tensors/tensor_benchmarks.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 90b9bc741..62533a608 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -201,9 +201,15 @@ template class BenchmarkSuite { size_b[1] = k_/2; TensorMap, Eigen::Aligned> B(b_, size_b); +#ifndef EIGEN_HAS_INDEX_LIST Eigen::array strides; strides[0] = 1; strides[1] = 2; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList, Eigen::type2index<2> > strides; +#endif StartBenchmarkTiming(); for (int iter = 0; iter < num_iters; ++iter) { From 33adce5c3abcf14b1da5d9ba6502530d140f5cb4 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 21 Apr 2016 11:59:58 -0700 Subject: [PATCH 5/7] Added the ability to switch to the new thread pool with a #define --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 41918eb19..fc03d84a7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -14,7 +14,11 @@ namespace Eigen { // Use the SimpleThreadPool by default. We'll switch to the new non blocking // thread pool later. +#ifdef EIGEN_USE_NONBLOCKING_THREAD_POOL +typedef NonBlockingThreadPool ThreadPool; +#else typedef SimpleThreadPool ThreadPool; +#endif // Barrier is an object that allows one or more threads to wait until From a3256d78d849b6978356a587038c3c62d4437bc9 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 21 Apr 2016 16:49:28 -0700 Subject: [PATCH 6/7] Prevent crash in CompleteOrthogonalDecomposition if object was default constructed. --- Eigen/src/QR/CompleteOrthogonalDecomposition.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h index e71944fd7..230d0d23c 100644 --- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -397,6 +397,10 @@ CompleteOrthogonalDecomposition& CompleteOrthogonalDecomposition< const Index rank = m_cpqr.rank(); const Index cols = matrix.cols(); + const Index rows = matrix.rows(); + m_zCoeffs.resize((std::min)(rows, cols)); + m_temp.resize(cols); + if (rank < cols) { // We have reduced the (permuted) matrix to the form // [R11 R12] From 4bbc97be5eda5dacd5ac44baa54f59a59176e12c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 21 Apr 2016 17:59:33 -0700 Subject: [PATCH 7/7] Provide access to the base threadpool classes --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index fc03d84a7..c02891465 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -15,8 +15,10 @@ namespace Eigen { // Use the SimpleThreadPool by default. We'll switch to the new non blocking // thread pool later. #ifdef EIGEN_USE_NONBLOCKING_THREAD_POOL +template using ThreadPoolTempl = NonBlockingThreadPoolTempl; typedef NonBlockingThreadPool ThreadPool; #else +template using ThreadPoolTempl = SimpleThreadPoolTempl; typedef SimpleThreadPool ThreadPool; #endif