From 03d19f3bae65d5dff1417cc4bd4ce32262cdaa3a Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 23 Jun 2008 11:23:05 +0000 Subject: [PATCH] quick temporary fix for a perf issue we just identified with vectorization.... now the sum benchmark runs 3x faster with vectorization than without. --- Eigen/src/Core/Sum.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h index 05a8722dc..c6b7cacce 100644 --- a/Eigen/src/Core/Sum.h +++ b/Eigen/src/Core/Sum.h @@ -194,17 +194,32 @@ struct ei_sum_impl // do the vectorizable part of the sum if(size >= packetSize) { + asm("#begin"); + PacketScalar packet_res; packet_res = mat.template packet(0, 0); int index; - for(index = packetSize; index(row, col)); + for(index = packetSize; index(row, col)); + } + } + else + { + for(index = packetSize; index(row, col)); + } } res = ei_predux(packet_res); + asm("#end"); // now we must do the rest without vectorization. if(alignedSize == size) return res;