diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index d1908a4c3..cbe416602 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -73,6 +73,7 @@ #include "src/Tensor/TensorEvaluator.h" #include "src/Tensor/TensorExpr.h" #include "src/Tensor/TensorReduction.h" +#include "src/Tensor/TensorArgMax.h" #include "src/Tensor/TensorConcatenation.h" #include "src/Tensor/TensorContraction.h" #include "src/Tensor/TensorContractionThreadPool.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h new file mode 100644 index 000000000..ee3bf7fe3 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -0,0 +1,288 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H +#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H + +namespace Eigen { +namespace internal { + +/** \class TensorIndexTuple + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor + Index Tuple class. + * + * + */ +template +struct traits > : public traits +{ + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Tuple Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorIndexTupleOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorIndexTupleOp type; +}; + +} // end namespace internal + +template +class TensorIndexTupleOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + typedef Tuple CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorIndexTupleOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = /*TensorEvaluator::PacketAccess*/ false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return CoeffReturnType(index, m_impl.coeff(index)); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + TensorEvaluator m_impl; +}; + +namespace internal { + +/** \class TensorTupleIndex + * \ingroup CXX11_Tensor_Module + * + * \brief Converts to Tensor > and reduces to Tensor. + * + */ +template +struct traits > : public traits +{ + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Index Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorTupleReducerOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorTupleReducerOp type; +}; + +} // end namespace internal + +template +class TensorTupleReducerOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + typedef Index CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, + const ReduceOp& reduce_op, + const int return_dim, + const Dims& reduce_dims) + : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + const ReduceOp& reduce_op() const { return m_reduce_op; } + + EIGEN_DEVICE_FUNC + const Dims& reduce_dims() const { return m_reduce_dims; } + + EIGEN_DEVICE_FUNC + int return_dim() const { return m_return_dim; } + + protected: + typename XprType::Nested m_xpr; + const ReduceOp m_reduce_op; + const int m_return_dim; + const Dims m_reduce_dims; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorTupleReducerOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename TensorIndexTupleOp::CoeffReturnType TupleType; + typedef typename TensorEvaluator >, Device>::Dimensions Dimensions; + typedef typename TensorEvaluator , Device>::Dimensions InputDimensions; + static const int NumDims = internal::array_size::value; + typedef array StrideDims; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = /*TensorEvaluator::PacketAccess*/ false, + BlockAccess = false, + Layout = TensorEvaluator >, Device>::Layout, + CoordAccess = false, // to be implemented + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_orig_impl(op.expression(), device), + m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), + m_return_dim(op.return_dim()), + m_strides(gen_strides(m_orig_impl.dimensions())), + m_stride_mod(gen_stride_mod(m_orig_impl.dimensions())), + m_stride_div(gen_stride_div()) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + const TupleType v = m_impl.coeff(index); + return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + private: + EIGEN_DEVICE_FUNC StrideDims gen_strides(const InputDimensions& dims) { + StrideDims strides; + if (m_return_dim < 0) return strides; // Won't be using these. + eigen_assert(m_return_dim < NumDims && + "Asking to convert index to a dimension outside of the rank"); + + // Calculate m_stride_div and m_stride_mod, which are used to + // calculate the value of an index w.r.t. the m_return_dim. + if (Layout == static_cast(ColMajor)) { + strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + strides[i] = strides[i-1] * dims[i-1]; + } + } else { + strides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + strides[i] = strides[i+1] * dims[i+1]; + } + } + return strides; + } + + EIGEN_DEVICE_FUNC Index gen_stride_mod(const InputDimensions& dims) { + if (Layout == static_cast(ColMajor)) { + return (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : dims.TotalSize(); + } else { + return (m_return_dim > 0) ? m_strides[m_return_dim - 1] : dims.TotalSize(); + } + } + + EIGEN_DEVICE_FUNC Index gen_stride_div() { + return m_strides[m_return_dim]; + } + + protected: + TensorEvaluator, Device> m_orig_impl; + TensorEvaluator >, Device> m_impl; + const int m_return_dim; + const StrideDims m_strides; + const Index m_stride_mod; + const Index m_stride_div; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 0e5e4b426..477e4a174 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -363,6 +363,58 @@ class TensorBase return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, const Derived> + argmax() const { + array in_dims; + for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMaxTupleReducer >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, const Derived> + argmin() const { + array in_dims; + for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMinTupleReducer >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, const Derived> + argmax(const int return_dim) const { + array in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMaxTupleReducer >(), return_dim, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, const Derived> + argmin(const int return_dim) const { + array in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMinTupleReducer >(), return_dim, in_dims); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReductionOp reduce(const Dims& dims, const Reducer& reducer) const { @@ -483,6 +535,13 @@ class TensorBase return TensorInflationOp(derived(), strides); } + // Returns a tensor containing index/value tuples + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorIndexTupleOp + index_tuples() const { + return TensorIndexTupleOp(derived()); + } + // Support for custom unary and binary operations template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 17b0e6153..c22444e6f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -23,6 +23,8 @@ template class TensorCwiseUnaryOp; template class TensorCwiseBinaryOp; template class TensorSelectOp; template class TensorReductionOp; +template class TensorIndexTupleOp; +template class TensorTupleReducerOp; template class TensorConcatenationOp; template class TensorContractionOp; template class TensorConversionOp; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index d9061c216..ed259399b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -219,6 +219,40 @@ template struct ProdReducer }; +// Argmin/Argmax reducers +template struct ArgMaxTupleReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t.second > accum->second) { *accum = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits::lowest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + +template struct ArgMinTupleReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const { + if (t.second < accum->second) { *accum = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits::highest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + + // Random number generation namespace { #ifdef __CUDA_ARCH__ diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 78feb85cd..7dfa04760 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -31,6 +31,60 @@ template <> struct max_n_1<0> { static const size_t size = 1; }; + + + +#if defined(EIGEN_HAS_CONSTEXPR) +#define EIGEN_CONSTEXPR constexpr +#else +#define EIGEN_CONSTEXPR +#endif + +// Tuple mimics std::pair but works on e.g. nvcc. +template struct Tuple { + public: + U first; + V second; + + typedef U first_type; + typedef V second_type; + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple() : first(), second() {} + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple(const U& f, const V& s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple& operator= (const Tuple& rhs) { + if (&rhs == this) return *this; + first = rhs.first; + second = rhs.second; + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void swap(Tuple& rhs) { + using numext::swap; + swap(first, rhs.first); + swap(second, rhs.second); + } +}; + +template +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator==(const Tuple& x, const Tuple& y) { + return (x.first == y.first && x.second == y.second); +} + +template +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator!=(const Tuple& x, const Tuple& y) { + return !(x == y); +} + +#undef EIGEN_CONSTEXPR + } // namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 7c8fb8dde..b161cb370 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -130,6 +130,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_image_patch "-std=c++0x") ei_add_test(cxx11_tensor_volume_patch "-std=c++0x") ei_add_test(cxx11_tensor_reduction "-std=c++0x") + ei_add_test(cxx11_tensor_argmax "-std=c++0x") ei_add_test(cxx11_tensor_shuffling "-std=c++0x") ei_add_test(cxx11_tensor_striding "-std=c++0x") ei_add_test(cxx11_tensor_thread_pool "-std=c++0x") @@ -148,5 +149,6 @@ if(EIGEN_TEST_CXX11) # ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x") # ei_add_test(cxx11_tensor_reduction_cuda "-std=c++0x") # ei_add_test(cxx11_tensor_random_cuda "-std=c++0x") +# ei_add_test(cxx11_tensor_argmax_cuda "-std=c++0x") endif() diff --git a/unsupported/test/cxx11_tensor_argmax.cpp b/unsupported/test/cxx11_tensor_argmax.cpp new file mode 100644 index 000000000..4c532409e --- /dev/null +++ b/unsupported/test/cxx11_tensor_argmax.cpp @@ -0,0 +1,294 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using Eigen::Tensor; +using Eigen::array; +using Eigen::Tuple; + +template +static void test_simple_index_tuples() +{ + Tensor tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + for (DenseIndex n = 0; n < 2*3*5*7; ++n) { + const Tuple& v = index_tuples.coeff(n); + VERIFY_IS_EQUAL(v.first, n); + VERIFY_IS_EQUAL(v.second, tensor.coeff(n)); + } +} + +template +static void test_index_tuples_dim() +{ + Tensor tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor, 4, DataLayout> index_tuples(2,3,5,7); + + index_tuples = tensor.index_tuples(); + + for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) { + const Tuple& v = index_tuples(n); //(i, j, k, l); + VERIFY_IS_EQUAL(v.first, n); + VERIFY_IS_EQUAL(v.second, tensor(n)); + } +} + +template +static void test_argmax_tuple_reducer() +{ + Tensor tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + Tensor, 1, DataLayout> reduced(1); + DimensionList dims; + reduced = index_tuples.reduce( + dims, internal::ArgMaxTupleReducer>()); + + Tensor maxi = tensor.maximum(); + + VERIFY_IS_EQUAL(maxi(0), reduced(0).second); + + array reduce_dims; + for (int d = 0; d < 3; ++d) reduce_dims[d] = d; + Tensor, 1, DataLayout> reduced_by_dims(7); + reduced_by_dims = index_tuples.reduce( + reduce_dims, internal::ArgMaxTupleReducer>()); + + Tensor max_by_dims = tensor.maximum(reduce_dims); + + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(max_by_dims(l), reduced_by_dims(l).second); + } +} + +template +static void test_argmin_tuple_reducer() +{ + Tensor tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + Tensor, 1, DataLayout> reduced(1); + DimensionList dims; + reduced = index_tuples.reduce( + dims, internal::ArgMinTupleReducer>()); + + Tensor mini = tensor.minimum(); + + VERIFY_IS_EQUAL(mini(0), reduced(0).second); + + array reduce_dims; + for (int d = 0; d < 3; ++d) reduce_dims[d] = d; + Tensor, 1, DataLayout> reduced_by_dims(7); + reduced_by_dims = index_tuples.reduce( + reduce_dims, internal::ArgMinTupleReducer>()); + + Tensor min_by_dims = tensor.minimum(reduce_dims); + + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(min_by_dims(l), reduced_by_dims(l).second); + } +} + +template +static void test_simple_argmax() +{ + Tensor tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + tensor(0,0,0,0) = 10.0; + + Tensor tensor_argmax(1); + + tensor_argmax = tensor.argmax(); + + VERIFY_IS_EQUAL(tensor_argmax(0), 0); + + tensor(1,2,4,6) = 20.0; + + tensor_argmax = tensor.argmax(); + + VERIFY_IS_EQUAL(tensor_argmax(0), 2*3*5*7 - 1); +} + +template +static void test_simple_argmin() +{ + Tensor tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + tensor(0,0,0,0) = -10.0; + + Tensor tensor_argmin(1); + + tensor_argmin = tensor.argmin(); + + VERIFY_IS_EQUAL(tensor_argmin(0), 0); + + tensor(1,2,4,6) = -20.0; + + tensor_argmin = tensor.argmin(); + + VERIFY_IS_EQUAL(tensor_argmin(0), 2*3*5*7 - 1); +} + +template +static void test_argmax_dim() +{ + Tensor tensor(2,3,5,7); + std::vector dims {2, 3, 5, 7}; + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor tensor_argmax; + array ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = 10.0; + } + } + } + } + + tensor_argmax = tensor.argmax(dim); + + VERIFY_IS_EQUAL(tensor_argmax.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + for (size_t n = 0; n < tensor_argmax.dimensions().TotalSize(); ++n) { + // Expect max to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmax.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = 20.0; + } + } + } + } + + tensor_argmax = tensor.argmax(dim); + + VERIFY_IS_EQUAL(tensor_argmax.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + for (size_t n = 0; n < tensor_argmax.dimensions().TotalSize(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmax.data()[n], tensor.dimension(dim) - 1); + } + } +} + +template +static void test_argmin_dim() +{ + Tensor tensor(2,3,5,7); + std::vector dims {2, 3, 5, 7}; + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor tensor_argmin; + array ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0 + tensor(ix) = -10.0; + } + } + } + } + + tensor_argmin = tensor.argmin(dim); + + VERIFY_IS_EQUAL(tensor_argmin.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + for (size_t n = 0; n < tensor_argmin.dimensions().TotalSize(); ++n) { + // Expect min to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmin.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0 + tensor(ix) = -20.0; + } + } + } + } + + tensor_argmin = tensor.argmin(dim); + + VERIFY_IS_EQUAL(tensor_argmin.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + for (size_t n = 0; n < tensor_argmin.dimensions().TotalSize(); ++n) { + // Expect min to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmin.data()[n], tensor.dimension(dim) - 1); + } + } +} + +void test_cxx11_tensor_argmax() +{ + CALL_SUBTEST(test_simple_index_tuples()); + CALL_SUBTEST(test_simple_index_tuples()); + CALL_SUBTEST(test_index_tuples_dim()); + CALL_SUBTEST(test_index_tuples_dim()); + CALL_SUBTEST(test_argmax_tuple_reducer()); + CALL_SUBTEST(test_argmax_tuple_reducer()); + CALL_SUBTEST(test_argmin_tuple_reducer()); + CALL_SUBTEST(test_argmin_tuple_reducer()); + CALL_SUBTEST(test_simple_argmax()); + CALL_SUBTEST(test_simple_argmax()); + CALL_SUBTEST(test_simple_argmin()); + CALL_SUBTEST(test_simple_argmin()); + CALL_SUBTEST(test_argmax_dim()); + CALL_SUBTEST(test_argmax_dim()); + CALL_SUBTEST(test_argmin_dim()); + CALL_SUBTEST(test_argmin_dim()); +} diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cpp b/unsupported/test/cxx11_tensor_argmax_cuda.cpp new file mode 100644 index 000000000..d37490d15 --- /dev/null +++ b/unsupported/test/cxx11_tensor_argmax_cuda.cpp @@ -0,0 +1,241 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// TODO(mdevin): Free the cuda memory. + +#define EIGEN_TEST_FUNC cxx11_tensor_cuda +#define EIGEN_USE_GPU + +#include "main.h" +#include + +using Eigen::Tensor; + +template +void test_cuda_simple_argmax() +{ + Tensor in(Eigen::array(72,53,97)); + Tensor out_max(Eigen::array(1)); + Tensor out_min(Eigen::array(1)); + in.setRandom(); + in *= in.constant(100.0); + in(0, 0, 0) = -1000.0; + in(71, 52, 96) = 1000.0; + + std::size_t in_bytes = in.size() * sizeof(double); + std::size_t out_bytes = out_max.size() * sizeof(DenseIndex); + + double* d_in; + DenseIndex* d_out_max; + DenseIndex* d_out_min; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out_max), out_bytes); + cudaMalloc((void**)(&d_out_min), out_bytes); + + cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap, Aligned > gpu_in(d_in, Eigen::array(72,53,97)); + Eigen::TensorMap, Aligned > gpu_out_max(d_out_max, Eigen::array(1)); + Eigen::TensorMap, Aligned > gpu_out_min(d_out_min, Eigen::array(1)); + + gpu_out_max.device(gpu_device) = gpu_in.argmax(); + gpu_out_min.device(gpu_device) = gpu_in.argmin(); + + assert(cudaMemcpyAsync(out_max.data(), d_out_max, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(out_min.data(), d_out_min, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(out_max(Eigen::array(0)), 72*53*97 - 1); + VERIFY_IS_EQUAL(out_min(Eigen::array(0)), 0); +} + +template +void test_cuda_argmax_dim() +{ + Tensor tensor(2,3,5,7); + std::vector dims; + dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7); + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + array out_shape; + for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1]; + + Tensor tensor_arg(out_shape); + + array ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = 10.0; + } + } + } + } + + std::size_t in_bytes = tensor.size() * sizeof(float); + std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex); + + float* d_in; + DenseIndex* d_out; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap, Aligned > gpu_in(d_in, Eigen::array(2, 3, 5, 7)); + Eigen::TensorMap, Aligned > gpu_out(d_out, out_shape); + + gpu_out.device(gpu_device) = gpu_in.argmax(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(tensor_arg.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + + for (size_t n = 0; n < tensor_arg.dimensions().TotalSize(); ++n) { + // Expect max to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = 20.0; + } + } + } + } + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + gpu_out.device(gpu_device) = gpu_in.argmax(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (size_t n = 0; n < tensor_arg.dimensions().TotalSize(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1); + } + } +} + +template +void test_cuda_argmin_dim() +{ + Tensor tensor(2,3,5,7); + std::vector dims; + dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7); + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + array out_shape; + for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1]; + + Tensor tensor_arg(out_shape); + + array ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = -10.0; + } + } + } + } + + std::size_t in_bytes = tensor.size() * sizeof(float); + std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex); + + float* d_in; + DenseIndex* d_out; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap, Aligned > gpu_in(d_in, Eigen::array(2, 3, 5, 7)); + Eigen::TensorMap, Aligned > gpu_out(d_out, out_shape); + + gpu_out.device(gpu_device) = gpu_in.argmin(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(tensor_arg.dimensions().TotalSize(), + size_t(2*3*5*7 / tensor.dimension(dim))); + + for (size_t n = 0; n < tensor_arg.dimensions().TotalSize(); ++n) { + // Expect min to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = -20.0; + } + } + } + } + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + gpu_out.device(gpu_device) = gpu_in.argmin(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (size_t n = 0; n < tensor_arg.dimensions().TotalSize(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1); + } + } +} + +void test_cxx11_tensor_cuda() +{ + CALL_SUBTEST(test_cuda_simple_argmax()); + CALL_SUBTEST(test_cuda_simple_argmax()); + CALL_SUBTEST(test_cuda_argmax_dim()); + CALL_SUBTEST(test_cuda_argmax_dim()); + CALL_SUBTEST(test_cuda_argmin_dim()); + CALL_SUBTEST(test_cuda_argmin_dim()); +}