diff --git a/test/adjoint.cpp b/test/adjoint.cpp
index 28562b7f8..881d63951 100644
--- a/test/adjoint.cpp
+++ b/test/adjoint.cpp
@@ -249,6 +249,45 @@ void inner_product_boundary_sizes() {
   }
 }
 
+// Test transposeInPlace at vectorization boundary sizes.
+// BlockedInPlaceTranspose uses PacketSize-blocked loops with a scalar remainder (line 273),
+// exercising off-by-one-prone transitions.
+template <typename Scalar>
+void transposeInPlace_boundary() {
+  const Index PS = internal::packet_traits<Scalar>::size;
+  // Sizes around packet boundaries where the blocked path's remainder handling is exercised.
+  const Index sizes[] = {1,      2,          3,      PS - 1,     PS,     PS + 1,    2 * PS - 1,
+                         2 * PS, 2 * PS + 1, 3 * PS, 3 * PS + 1, 4 * PS, 4 * PS + 1};
+  for (int si = 0; si < 13; ++si) {
+    Index n = sizes[si];
+    if (n <= 0) continue;
+    typedef Matrix<Scalar, Dynamic, Dynamic> Mat;
+
+    // Square transposeInPlace
+    Mat m1 = Mat::Random(n, n);
+    Mat m2 = m1;
+    m2.transposeInPlace();
+    VERIFY_IS_APPROX(m2, m1.transpose());
+    // Double transpose should return to original
+    m2.transposeInPlace();
+    VERIFY_IS_APPROX(m2, m1);
+  }
+
+  // Non-square transposeInPlace (resizable dynamic matrices)
+  const Index rect_sizes[][2] = {{2, 5}, {PS, 2 * PS + 1}, {3, 1}, {1, 7}, {2 * PS, PS + 1}};
+  for (int si = 0; si < 5; ++si) {
+    Index r = rect_sizes[si][0], c = rect_sizes[si][1];
+    if (r <= 0 || c <= 0) continue;
+    typedef Matrix<Scalar, Dynamic, Dynamic> Mat;
+    Mat m1 = Mat::Random(r, c);
+    Mat expected = m1.transpose();
+    Mat m2 = m1;
+    m2.transposeInPlace();
+    VERIFY_IS_APPROX(m2, expected);
+    VERIFY(m2.rows() == c && m2.cols() == r);
+  }
+}
+
 EIGEN_DECLARE_TEST(adjoint) {
   for (int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1(adjoint(Matrix<float, 1, 1>()));
@@ -281,4 +320,9 @@ EIGEN_DECLARE_TEST(adjoint) {
   CALL_SUBTEST_15(inner_product_boundary_sizes<double>());
   CALL_SUBTEST_16(inner_product_boundary_sizes<std::complex<float>>());
   CALL_SUBTEST_17(inner_product_boundary_sizes<std::complex<double>>());
+
+  // transposeInPlace at vectorization boundaries (deterministic, outside g_repeat).
+  CALL_SUBTEST_18(transposeInPlace_boundary<float>());
+  CALL_SUBTEST_18(transposeInPlace_boundary<double>());
+  CALL_SUBTEST_18(transposeInPlace_boundary<std::complex<float>>());
 }
diff --git a/test/array_reverse.cpp b/test/array_reverse.cpp
index 3120c3cae..f83d498c1 100644
--- a/test/array_reverse.cpp
+++ b/test/array_reverse.cpp
@@ -178,6 +178,47 @@ void bug1684() {
   // VERIFY_IS_APPROX(m2, m1.rowwise().reverse().eval());
 }
 
+// Test reverseInPlace at vectorization boundary sizes.
+// Vectorized swap used by reverseInPlace has remainder handling
+// that could fail at packet boundaries.
+template <typename Scalar>
+void reverseInPlace_boundary() {
+  const Index PS = internal::packet_traits<Scalar>::size;
+  const Index sizes[] = {1,      2,          3,      PS - 1,     PS,     PS + 1,    2 * PS - 1,
+                         2 * PS, 2 * PS + 1, 4 * PS, 4 * PS + 1, 8 * PS, 8 * PS + 1};
+  for (int si = 0; si < 13; ++si) {
+    Index n = sizes[si];
+    if (n <= 0) continue;
+    typedef Matrix<Scalar, Dynamic, 1> Vec;
+    typedef Matrix<Scalar, Dynamic, Dynamic> Mat;
+
+    // Vector reverseInPlace
+    Vec v1 = Vec::Random(n);
+    Vec v2 = v1;
+    v2.reverseInPlace();
+    for (Index k = 0; k < n; ++k) VERIFY_IS_APPROX(v2(k), v1(n - 1 - k));
+
+    // Matrix reverseInPlace (full)
+    Mat m1 = Mat::Random(n, n);
+    Mat m2 = m1;
+    m2.reverseInPlace();
+    for (Index j = 0; j < n; ++j)
+      for (Index i = 0; i < n; ++i) VERIFY_IS_APPROX(m2(i, j), m1(n - 1 - i, n - 1 - j));
+
+    // colwise reverseInPlace
+    m2 = m1;
+    m2.colwise().reverseInPlace();
+    for (Index j = 0; j < n; ++j)
+      for (Index i = 0; i < n; ++i) VERIFY_IS_APPROX(m2(i, j), m1(n - 1 - i, j));
+
+    // rowwise reverseInPlace
+    m2 = m1;
+    m2.rowwise().reverseInPlace();
+    for (Index j = 0; j < n; ++j)
+      for (Index i = 0; i < n; ++i) VERIFY_IS_APPROX(m2(i, j), m1(i, n - 1 - j));
+  }
+}
+
 EIGEN_DECLARE_TEST(array_reverse) {
   for (int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1(reverse(Matrix<float, 1, 1>()));
@@ -196,4 +237,9 @@ EIGEN_DECLARE_TEST(array_reverse) {
     CALL_SUBTEST_3(bug1684<0>());
   }
   CALL_SUBTEST_3(array_reverse_extra<0>());
+
+  // reverseInPlace at vectorization boundaries (deterministic, outside g_repeat).
+  CALL_SUBTEST_10(reverseInPlace_boundary<float>());
+  CALL_SUBTEST_10(reverseInPlace_boundary<double>());
+  CALL_SUBTEST_10(reverseInPlace_boundary<int>());
 }
diff --git a/test/diagonal.cpp b/test/diagonal.cpp
index a56118c38..e7c4bf29c 100644
--- a/test/diagonal.cpp
+++ b/test/diagonal.cpp
@@ -81,6 +81,79 @@ void diagonal_assert(const MatrixType& m) {
   VERIFY_RAISES_ASSERT(m1.diagonal(-(rows + 1)));
 }
 
+// Test that (A * B).diagonal() gives the same result as (A * B).eval().diagonal().
+// The diagonal-of-product path uses LazyProduct evaluation (see ProductEvaluators.h),
+// which avoids computing the full product. Verify this optimization is correct.
+template <typename Scalar>
+void diagonal_of_product() {
+  const Index PS = internal::packet_traits<Scalar>::size;
+  const Index sizes[] = {1, 2, 3, PS - 1, PS, PS + 1, 2 * PS - 1, 2 * PS, 2 * PS + 1, 4 * PS, 4 * PS + 1};
+  typedef Matrix<Scalar, Dynamic, Dynamic> Mat;
+  typedef Matrix<Scalar, Dynamic, 1> Vec;
+
+  for (int si = 0; si < 11; ++si) {
+    Index n = sizes[si];
+    if (n <= 0) continue;
+
+    Mat A = Mat::Random(n, n);
+    Mat B = Mat::Random(n, n);
+
+    // Lazy diagonal vs explicit product diagonal
+    Vec diag_lazy = (A * B).diagonal();
+    Vec diag_explicit = (A * B).eval().diagonal();
+    VERIFY_IS_APPROX(diag_lazy, diag_explicit);
+
+    // Also test non-square: A is m×k, B is k×n
+    for (int k : {1, 3, (int)n}) {
+      if (k <= 0) continue;
+      Mat C = Mat::Random(n, k);
+      Mat D = Mat::Random(k, n);
+      Vec diag_lazy2 = (C * D).diagonal();
+      Vec diag_explicit2 = (C * D).eval().diagonal();
+      VERIFY_IS_APPROX(diag_lazy2, diag_explicit2);
+    }
+  }
+}
+
+// Test .select() at vectorization boundary sizes.
+// select() uses CwiseTernaryOp which has packet-level evaluation with remainder handling.
+template <typename Scalar>
+void select_boundary() {
+  const Index PS = internal::packet_traits<Scalar>::size;
+  const Index sizes[] = {1, 2, 3, PS - 1, PS, PS + 1, 2 * PS - 1, 2 * PS, 2 * PS + 1, 4 * PS, 4 * PS + 1};
+  typedef Array<Scalar, Dynamic, 1> Arr;
+
+  for (int si = 0; si < 11; ++si) {
+    Index n = sizes[si];
+    if (n <= 0) continue;
+
+    Arr a = Arr::Random(n);
+    Arr b = Arr::Random(n);
+    auto cond = (a > Scalar(0));
+
+    // select with two arrays
+    Arr result = cond.select(a, b);
+    for (Index k = 0; k < n; ++k) {
+      Scalar expected = (a(k) > Scalar(0)) ? a(k) : b(k);
+      VERIFY_IS_APPROX(result(k), expected);
+    }
+
+    // select with scalar else
+    Arr result2 = cond.select(a, Scalar(0));
+    for (Index k = 0; k < n; ++k) {
+      Scalar expected = (a(k) > Scalar(0)) ? a(k) : Scalar(0);
+      VERIFY_IS_APPROX(result2(k), expected);
+    }
+
+    // select with scalar then
+    Arr result3 = cond.select(Scalar(42), b);
+    for (Index k = 0; k < n; ++k) {
+      Scalar expected = (a(k) > Scalar(0)) ? Scalar(42) : b(k);
+      VERIFY_IS_APPROX(result3(k), expected);
+    }
+  }
+}
+
 EIGEN_DECLARE_TEST(diagonal) {
   for (int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1(diagonal(Matrix<float, 1, 1>()));
@@ -99,4 +172,14 @@ EIGEN_DECLARE_TEST(diagonal) {
     CALL_SUBTEST_1(diagonal_assert(
         MatrixXf(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
   }
+
+  // Diagonal-of-product optimization (deterministic, outside g_repeat).
+  CALL_SUBTEST_3(diagonal_of_product<float>());
+  CALL_SUBTEST_3(diagonal_of_product<double>());
+  CALL_SUBTEST_3(diagonal_of_product<std::complex<float>>());
+
+  // Select at vectorization boundaries (deterministic, outside g_repeat).
+  CALL_SUBTEST_4(select_boundary<float>());
+  CALL_SUBTEST_4(select_boundary<double>());
+  CALL_SUBTEST_4(select_boundary<int>());
 }
diff --git a/test/product_symm.cpp b/test/product_symm.cpp
index 7c6db2e7e..0b929329f 100644
--- a/test/product_symm.cpp
+++ b/test/product_symm.cpp
@@ -110,6 +110,26 @@ void symm(int size = Size, int othersize = OtherSize) {
   }
 }
 
+// Test symmetric products at blocking boundary sizes.
+// The existing test uses random sizes; these deterministic sizes exercise
+// transitions in GEBP blocking (early-return at 48, block size changes).
+template <int>
+void product_symm_boundary() {
+  const int sizes[] = {1, 2, 3, 4, 8, 16, 47, 48, 49, 64, 96, 128};
+  for (int si = 0; si < 12; ++si) {
+    int n = sizes[si];
+
+    // double, matrix RHS
+    symm<double, Dynamic, Dynamic>(n, 5);
+    // double, vector RHS
+    symm<double, Dynamic, 1>(n);
+    // float, matrix RHS
+    symm<float, Dynamic, Dynamic>(n, 7);
+    // complex float, matrix RHS
+    symm<std::complex<float>, Dynamic, Dynamic>(n, 3);
+  }
+}
+
 EIGEN_DECLARE_TEST(product_symm) {
   for (int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST_1((symm<float, Dynamic, Dynamic>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE),
@@ -126,4 +146,7 @@ EIGEN_DECLARE_TEST(product_symm) {
     CALL_SUBTEST_7((symm<std::complex<float>, Dynamic, 1>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
     CALL_SUBTEST_8((symm<std::complex<double>, Dynamic, 1>(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
   }
+
+  // Deterministic blocking boundary tests (outside g_repeat).
+  CALL_SUBTEST_9(product_symm_boundary<0>());
 }
diff --git a/test/redux.cpp b/test/redux.cpp
index 70b7261b1..a952d67c9 100644
--- a/test/redux.cpp
+++ b/test/redux.cpp
@@ -312,6 +312,19 @@ EIGEN_DECLARE_TEST(redux) {
   CALL_SUBTEST_11(boolRedux(7, 13));
   CALL_SUBTEST_11(boolRedux(63, 63));
 
+  // Bool reductions at vectorization boundary sizes.
+  // all()/any()/count() use packet-level visitors with remainder handling.
+  {
+    // bool packets are typically 16 bytes (SSE) or 32 bytes (AVX).
+    // Test sizes around common packet sizes to catch off-by-one in remainder loops.
+    const Index bsizes[] = {1, 2, 3, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129};
+    for (int si = 0; si < 18; ++si) {
+      CALL_SUBTEST_11(boolRedux(bsizes[si], 1));  // column vector
+      CALL_SUBTEST_11(boolRedux(1, bsizes[si]));  // row vector
+      CALL_SUBTEST_11(boolRedux(bsizes[si], 3));  // thin matrix
+    }
+  }
+
   // Vectorization boundary sizes — deterministic, run once.
   // Integer types are excluded: full-range random ints overflow in sum/prod (UB).
   // Integer reductions are already tested by matrixRedux/vectorRedux with clamped values.