Pulled latest updates from upstream

This commit is contained in:
Benoit Steiner
2016-04-29 13:41:26 -07:00
113 changed files with 3595 additions and 972 deletions

View File

@@ -116,6 +116,8 @@ if(EIGEN_TEST_CXX11)
set(CMAKE_CXX_STANDARD 11)
ei_add_test(cxx11_float16)
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_meta)
ei_add_test(cxx11_tensor_simple)
# ei_add_test(cxx11_tensor_symmetry)

View File

@@ -0,0 +1,140 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include "main.h"
#include <Eigen/CXX11/ThreadPool>
// Visual studio doesn't implement a rand_r() function since its
// implementation of rand() is already thread safe
int rand_reentrant(unsigned int* s) {
#ifdef EIGEN_COMP_MSVC_STRICT
EIGEN_UNUSED_VARIABLE(s);
return rand();
#else
return rand_r(s);
#endif
}
static void test_basic_eventcount()
{
std::vector<EventCount::Waiter> waiters(1);
EventCount ec(waiters);
EventCount::Waiter& w = waiters[0];
ec.Notify(false);
ec.Prewait(&w);
ec.Notify(true);
ec.CommitWait(&w);
ec.Prewait(&w);
ec.CancelWait(&w);
}
// Fake bounded counter-based queue.
struct TestQueue {
std::atomic<int> val_;
static const int kQueueSize = 10;
TestQueue() : val_() {}
~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); }
bool Push() {
int val = val_.load(std::memory_order_relaxed);
for (;;) {
VERIFY_GE(val, 0);
VERIFY_LE(val, kQueueSize);
if (val == kQueueSize) return false;
if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed))
return true;
}
}
bool Pop() {
int val = val_.load(std::memory_order_relaxed);
for (;;) {
VERIFY_GE(val, 0);
VERIFY_LE(val, kQueueSize);
if (val == 0) return false;
if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed))
return true;
}
}
bool Empty() { return val_.load(std::memory_order_relaxed) == 0; }
};
const int TestQueue::kQueueSize;
// A number of producers send messages to a set of consumers using a set of
// fake queues. Ensure that it does not crash, consumers don't deadlock and
// number of blocked and unblocked threads match.
static void test_stress_eventcount()
{
const int kThreads = std::thread::hardware_concurrency();
static const int kEvents = 1 << 16;
static const int kQueues = 10;
std::vector<EventCount::Waiter> waiters(kThreads);
EventCount ec(waiters);
TestQueue queues[kQueues];
std::vector<std::unique_ptr<std::thread>> producers;
for (int i = 0; i < kThreads; i++) {
producers.emplace_back(new std::thread([&ec, &queues]() {
unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
for (int j = 0; j < kEvents; j++) {
unsigned idx = rand_reentrant(&rnd) % kQueues;
if (queues[idx].Push()) {
ec.Notify(false);
continue;
}
EIGEN_THREAD_YIELD();
j--;
}
}));
}
std::vector<std::unique_ptr<std::thread>> consumers;
for (int i = 0; i < kThreads; i++) {
consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() {
EventCount::Waiter& w = waiters[i];
unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
for (int j = 0; j < kEvents; j++) {
unsigned idx = rand_reentrant(&rnd) % kQueues;
if (queues[idx].Pop()) continue;
j--;
ec.Prewait(&w);
bool empty = true;
for (int q = 0; q < kQueues; q++) {
if (!queues[q].Empty()) {
empty = false;
break;
}
}
if (!empty) {
ec.CancelWait(&w);
continue;
}
ec.CommitWait(&w);
}
}));
}
for (int i = 0; i < kThreads; i++) {
producers[i]->join();
consumers[i]->join();
}
}
void test_cxx11_eventcount()
{
CALL_SUBTEST(test_basic_eventcount());
CALL_SUBTEST(test_stress_eventcount());
}

View File

@@ -122,6 +122,8 @@ void test_comparison()
VERIFY(half(1.0f) != half(2.0f));
// Comparisons with NaNs and infinities.
#if !EIGEN_COMP_MSVC
// Visual Studio errors out on divisions by 0
VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
@@ -132,13 +134,26 @@ void test_comparison()
VERIFY(half(1.0) < half(1.0 / 0.0));
VERIFY(half(1.0) > half(-1.0 / 0.0));
#endif
}
void test_functions()
void test_basic_functions()
{
VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f);
VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f);
VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f);
VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f);
VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f);
VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f);
VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f);
VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f);
VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
@@ -146,10 +161,32 @@ void test_functions()
VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
}
void test_trigonometric_functions()
{
VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f)));
VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI)));
//VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2)));
//VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2)));
VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f)));
VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f)));
// VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI)));
VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2)));
VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2)));
VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f)));
VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f)));
// VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI)));
// VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2)));
//VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2)));
VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f)));
}
void test_cxx11_float16()
{
CALL_SUBTEST(test_conversion());
CALL_SUBTEST(test_arithmetic());
CALL_SUBTEST(test_comparison());
CALL_SUBTEST(test_functions());
CALL_SUBTEST(test_basic_functions());
CALL_SUBTEST(test_trigonometric_functions());
}

View File

@@ -10,7 +10,7 @@
#include "main.h"
#include <array>
#include <Eigen/CXX11/Core>
#include <Eigen/CXX11/src/util/CXX11Meta.h>
using Eigen::internal::is_same;
using Eigen::internal::type_list;

View File

@@ -0,0 +1,227 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include <cstdlib>
#include "main.h"
#include <Eigen/CXX11/ThreadPool>
// Visual studio doesn't implement a rand_r() function since its
// implementation of rand() is already thread safe
int rand_reentrant(unsigned int* s) {
#ifdef EIGEN_COMP_MSVC_STRICT
EIGEN_UNUSED_VARIABLE(s);
return rand();
#else
return rand_r(s);
#endif
}
void test_basic_runqueue()
{
RunQueue<int, 4> q;
// Check empty state.
VERIFY(q.Empty());
VERIFY_IS_EQUAL(0u, q.Size());
VERIFY_IS_EQUAL(0, q.PopFront());
std::vector<int> stolen;
VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(0u, stolen.size());
// Push one front, pop one front.
VERIFY_IS_EQUAL(0, q.PushFront(1));
VERIFY_IS_EQUAL(1, q.Size());
VERIFY_IS_EQUAL(1, q.PopFront());
VERIFY_IS_EQUAL(0, q.Size());
// Push front to overflow.
VERIFY_IS_EQUAL(0, q.PushFront(2));
VERIFY_IS_EQUAL(1, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(3));
VERIFY_IS_EQUAL(2, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(4));
VERIFY_IS_EQUAL(3, q.Size());
VERIFY_IS_EQUAL(0, q.PushFront(5));
VERIFY_IS_EQUAL(4, q.Size());
VERIFY_IS_EQUAL(6, q.PushFront(6));
VERIFY_IS_EQUAL(4, q.Size());
VERIFY_IS_EQUAL(5, q.PopFront());
VERIFY_IS_EQUAL(3, q.Size());
VERIFY_IS_EQUAL(4, q.PopFront());
VERIFY_IS_EQUAL(2, q.Size());
VERIFY_IS_EQUAL(3, q.PopFront());
VERIFY_IS_EQUAL(1, q.Size());
VERIFY_IS_EQUAL(2, q.PopFront());
VERIFY_IS_EQUAL(0, q.Size());
VERIFY_IS_EQUAL(0, q.PopFront());
// Push one back, pop one back.
VERIFY_IS_EQUAL(0, q.PushBack(7));
VERIFY_IS_EQUAL(1, q.Size());
VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(1, stolen.size());
VERIFY_IS_EQUAL(7, stolen[0]);
VERIFY_IS_EQUAL(0, q.Size());
stolen.clear();
// Push back to overflow.
VERIFY_IS_EQUAL(0, q.PushBack(8));
VERIFY_IS_EQUAL(1, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(9));
VERIFY_IS_EQUAL(2, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(10));
VERIFY_IS_EQUAL(3, q.Size());
VERIFY_IS_EQUAL(0, q.PushBack(11));
VERIFY_IS_EQUAL(4, q.Size());
VERIFY_IS_EQUAL(12, q.PushBack(12));
VERIFY_IS_EQUAL(4, q.Size());
// Pop back in halves.
VERIFY_IS_EQUAL(2, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(2, stolen.size());
VERIFY_IS_EQUAL(10, stolen[0]);
VERIFY_IS_EQUAL(11, stolen[1]);
VERIFY_IS_EQUAL(2, q.Size());
stolen.clear();
VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(1, stolen.size());
VERIFY_IS_EQUAL(9, stolen[0]);
VERIFY_IS_EQUAL(1, q.Size());
stolen.clear();
VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(1, stolen.size());
VERIFY_IS_EQUAL(8, stolen[0]);
stolen.clear();
VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
VERIFY_IS_EQUAL(0, stolen.size());
// Empty again.
VERIFY(q.Empty());
VERIFY_IS_EQUAL(0, q.Size());
}
// Empty tests that the queue is not claimed to be empty when is is in fact not.
// Emptiness property is crucial part of thread pool blocking scheme,
// so we go to great effort to ensure this property. We create a queue with
// 1 element and then push 1 element (either front or back at random) and pop
// 1 element (either front or back at random). So queue always contains at least
// 1 element, but otherwise changes chaotically. Another thread constantly tests
// that the queue is not claimed to be empty.
void test_empty_runqueue()
{
RunQueue<int, 4> q;
q.PushFront(1);
std::atomic<bool> done(false);
std::thread mutator([&q, &done]() {
unsigned rnd = 0;
std::vector<int> stolen;
for (int i = 0; i < 1 << 18; i++) {
if (rand_reentrant(&rnd) % 2)
VERIFY_IS_EQUAL(0, q.PushFront(1));
else
VERIFY_IS_EQUAL(0, q.PushBack(1));
if (rand_reentrant(&rnd) % 2)
VERIFY_IS_EQUAL(1, q.PopFront());
else {
for (;;) {
if (q.PopBackHalf(&stolen) == 1) {
stolen.clear();
break;
}
VERIFY_IS_EQUAL(0, stolen.size());
}
}
}
done = true;
});
while (!done) {
VERIFY(!q.Empty());
int size = q.Size();
VERIFY_GE(size, 1);
VERIFY_LE(size, 2);
}
VERIFY_IS_EQUAL(1, q.PopFront());
mutator.join();
}
// Stress is a chaotic random test.
// One thread (owner) calls PushFront/PopFront, other threads call PushBack/
// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass.
void test_stress_runqueue()
{
static const int kEvents = 1 << 18;
RunQueue<int, 8> q;
std::atomic<int> total(0);
std::vector<std::unique_ptr<std::thread>> threads;
threads.emplace_back(new std::thread([&q, &total]() {
int sum = 0;
int pushed = 1;
int popped = 1;
while (pushed < kEvents || popped < kEvents) {
if (pushed < kEvents) {
if (q.PushFront(pushed) == 0) {
sum += pushed;
pushed++;
}
}
if (popped < kEvents) {
int v = q.PopFront();
if (v != 0) {
sum -= v;
popped++;
}
}
}
total += sum;
}));
for (int i = 0; i < 2; i++) {
threads.emplace_back(new std::thread([&q, &total]() {
int sum = 0;
for (int j = 1; j < kEvents; j++) {
if (q.PushBack(j) == 0) {
sum += j;
continue;
}
EIGEN_THREAD_YIELD();
j--;
}
total += sum;
}));
threads.emplace_back(new std::thread([&q, &total]() {
int sum = 0;
std::vector<int> stolen;
for (int j = 1; j < kEvents;) {
if (q.PopBackHalf(&stolen) == 0) {
EIGEN_THREAD_YIELD();
continue;
}
while (stolen.size() && j < kEvents) {
int v = stolen.back();
stolen.pop_back();
VERIFY_IS_NOT_EQUAL(v, 0);
sum += v;
j++;
}
}
while (stolen.size()) {
int v = stolen.back();
stolen.pop_back();
VERIFY_IS_NOT_EQUAL(v, 0);
while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD();
}
total -= sum;
}));
}
for (size_t i = 0; i < threads.size(); i++) threads[i]->join();
VERIFY(q.Empty());
VERIFY(total.load() == 0);
}
void test_cxx11_runqueue()
{
CALL_SUBTEST_1(test_basic_runqueue());
CALL_SUBTEST_2(test_empty_runqueue());
CALL_SUBTEST_3(test_stress_runqueue());
}

View File

@@ -661,6 +661,9 @@ void test_cuda_digamma()
for (int i = 5; i < 7; ++i) {
VERIFY_IS_EQUAL(out(i), expected_out(i));
}
cudaFree(d_in);
cudaFree(d_out);
}
template <typename Scalar>
@@ -718,13 +721,17 @@ void test_cuda_zeta()
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
VERIFY_IS_EQUAL(out(0), expected_out(0));
VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3));
VERIFY((std::isnan)(out(3)));
for (int i = 1; i < 6; ++i) {
if (i != 3) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
}
cudaFree(d_in_x);
cudaFree(d_in_q);
cudaFree(d_out);
}
template <typename Scalar>
@@ -787,6 +794,10 @@ void test_cuda_polygamma()
for (int i = 0; i < 7; ++i) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
cudaFree(d_in_x);
cudaFree(d_in_n);
cudaFree(d_out);
}
template <typename Scalar>
@@ -826,9 +837,9 @@ void test_cuda_igamma()
Scalar* d_a;
Scalar* d_x;
Scalar* d_out;
cudaMalloc((void**)(&d_a), bytes);
cudaMalloc((void**)(&d_x), bytes);
cudaMalloc((void**)(&d_out), bytes);
assert(cudaMalloc((void**)(&d_a), bytes) == cudaSuccess);
assert(cudaMalloc((void**)(&d_x), bytes) == cudaSuccess);
assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
@@ -854,6 +865,10 @@ void test_cuda_igamma()
}
}
}
cudaFree(d_a);
cudaFree(d_x);
cudaFree(d_out);
}
template <typename Scalar>
@@ -920,6 +935,10 @@ void test_cuda_igammac()
}
}
}
cudaFree(d_a);
cudaFree(d_x);
cudaFree(d_out);
}
template <typename Scalar>
@@ -935,8 +954,8 @@ void test_cuda_erf(const Scalar stddev)
Scalar* d_in;
Scalar* d_out;
cudaMalloc((void**)(&d_in), bytes);
cudaMalloc((void**)(&d_out), bytes);
assert(cudaMalloc((void**)(&d_in), bytes) == cudaSuccess);
assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);

View File

@@ -20,6 +20,8 @@ static void test_0d()
TensorFixedSize<float, Sizes<> > scalar1;
TensorFixedSize<float, Sizes<>, RowMajor> scalar2;
VERIFY_IS_EQUAL(scalar1.rank(), 0);
VERIFY_IS_EQUAL(scalar1.size(), 1);
VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1);
scalar1() = 7.0;
scalar2() = 13.0;

View File

@@ -16,7 +16,7 @@ using Eigen::RowMajor;
static void test_tanh()
{
Tensor<float, 1> vec1({6});
Tensor<float, 1> vec1(6);
vec1.setRandom();
Tensor<float, 1> vec2 = vec1.tanh();
@@ -28,7 +28,7 @@ static void test_tanh()
static void test_sigmoid()
{
Tensor<float, 1> vec1({6});
Tensor<float, 1> vec1(6);
vec1.setRandom();
Tensor<float, 1> vec2 = vec1.sigmoid();

View File

@@ -14,8 +14,8 @@
static void test_simple()
{
Tensor<float, 1, ColMajor> vec1({6});
Tensor<float, 1, ColMajor, int> vec2({6});
Tensor<float, 1, ColMajor> vec1(6);
Tensor<float, 1, ColMajor, int> vec2(6);
vec1(0) = 4.0; vec2(0) = 0.0;
vec1(1) = 8.0; vec2(1) = 1.0;

View File

@@ -228,6 +228,42 @@ void test_cuda_reductions() {
gpu_device.deallocate(d_res_float);
}
void test_cuda_forced_evals() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
d_res_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_res_float.device(gpu_device) = gpu_float.abs();
gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
Tensor<float, 1> half_prec(num_elem);
Tensor<float, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking unary " << i << std::endl;
VERIFY_IS_APPROX(full_prec(i), half_prec(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_half);
gpu_device.deallocate(d_res_float);
}
#endif
@@ -246,6 +282,7 @@ void test_cxx11_tensor_of_float16_cuda()
CALL_SUBTEST_1(test_cuda_elementwise());
CALL_SUBTEST_2(test_cuda_contractions());
CALL_SUBTEST_3(test_cuda_reductions());
CALL_SUBTEST_4(test_cuda_forced_evals());
}
else {
std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl;

View File

@@ -792,7 +792,9 @@ void testNistMGH10(void)
MGH10_functor functor;
LevenbergMarquardt<MGH10_functor> lm(functor);
info = lm.minimize(x);
++g_test_level;
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
--g_test_level;
// was: VERIFY_IS_EQUAL(info, 1);
// check norm^2