Add 5.0.1 release notes and a few unreleased features.

(cherry picked from commit 91526464aef156bb0f847db6e00f0b971f9d9ac8)
Enable tests on push
2026-04-10 11:34:33 +08:00 · 2025-11-08 12:44:56 -08:00 · 2025-11-06 09:31:16 -08:00 · 2025-11-05 13:34:05 -08:00 · 2025-11-05 13:33:49 -08:00 · 2025-11-05 13:32:28 -08:00
1041 changed files with 51014 additions and 41917 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -1,37 +0,0 @@
---
-# Conservative clang-tidy configuration for Eigen.
-#
-# Focuses on bug-finding checks with low false-positive rates.
-# Intentionally omits style-enforcement checks (modernize-*, google-*,
-# cppcoreguidelines-*) since Eigen has its own conventions and is a
-# heavily-templated math library where many "modern C++" idioms don't apply.
-
-Checks: >
-  -*,
-  bugprone-*,
-  -bugprone-narrowing-conversions,
-  -bugprone-easily-swappable-parameters,
-  -bugprone-implicit-widening-of-multiplication-result,
-  -bugprone-exception-escape,
-  misc-redundant-expression,
-  misc-unused-using-decls,
-  misc-misleading-identifier,
-  performance-for-range-copy,
-  performance-implicit-conversion-in-loop,
-  performance-unnecessary-copy-initialization,
-  performance-unnecessary-value-param,
-  readability-container-size-empty,
-  readability-duplicate-include,
-  readability-misleading-indentation,
-  readability-redundant-control-flow,
-  readability-redundant-smartptr-get,
-
-WarningsAsErrors: ''
-
-HeaderFilterRegex: 'Eigen/.*|test/.*|blas/.*|lapack/.*|unsupported/Eigen/.*'
-
-# Eigen uses its own assert macros.
-CheckOptions:
-  - key: bugprone-assert-side-effect.AssertMacros
-    value: 'eigen_assert,eigen_internal_assert,EIGEN_STATIC_ASSERT,VERIFY,VERIFY_IS_APPROX,VERIFY_IS_EQUAL,VERIFY_IS_MUCH_SMALLER_THAN,VERIFY_IS_NOT_APPROX,VERIFY_IS_NOT_EQUAL,VERIFY_IS_UNITARY,VERIFY_RAISES_ASSERT'
-...
--- a/.gitignore
+++ b/.gitignore
@@ -39,4 +39,3 @@ Makefile
 !scripts/buildtests.in
 !Eigen/Core
 !Eigen/src/Core
-CLAUDE.md
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -8,26 +8,13 @@
 # with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

 default:
+# automatically cancels a job when a new pipeline for the same branch is triggered
  interruptible: true

-# For MR pipelines, auto-cancel running jobs when new commits are pushed.
-# For scheduled (nightly) pipelines, never auto-cancel so all jobs run to
-# completion and all failures are visible for debugging.
-workflow:
-  auto_cancel:
-    on_new_commit: interruptible
-    on_job_failure: none
-  rules:
-    - if: $CI_PIPELINE_SOURCE == "schedule"
-      auto_cancel:
-        on_new_commit: none
-    - when: always
-
 stages:
  - checkformat
  - build
  - test
-  - benchmark
  - deploy

 variables:
@@ -48,5 +35,4 @@ include:
  - "/ci/build.windows.gitlab-ci.yml"
  - "/ci/test.linux.gitlab-ci.yml"
  - "/ci/test.windows.gitlab-ci.yml"
-  - "/ci/benchmark.gitlab-ci.yml"
  - "/ci/deploy.gitlab-ci.yml"
--- a/.gitlab/issue_templates/Bug
+++ b/.gitlab/issue_templates/Bug
@@ -1,37 +1,42 @@
 <!--
-Thank you for submitting an issue!
+Please read this!

-Before opening a new issue, please search for keywords in the existing [list of issues](https://gitlab.com/libeigen/eigen/-/issues?state=opened) to verify it isn't a duplicate.
-->
+Before opening a new issue, make sure to search for keywords in the issues
+filtered by "bug::confirmed" or "bug::unconfirmed" and "bugzilla" label:
+
+- https://gitlab.com/libeigen/eigen/-/issues?scope=all&utf8=%E2%9C%93&state=opened&label_name[]=bug%3A%3Aconfirmed
+- https://gitlab.com/libeigen/eigen/-/issues?scope=all&utf8=%E2%9C%93&state=opened&label_name[]=bug%3A%3Aunconfirmed
+- https://gitlab.com/libeigen/eigen/-/issues?scope=all&utf8=%E2%9C%93&state=opened&label_name[]=bugzilla
+
+and verify the issue you're about to submit isn't a duplicate. -->

 ### Summary
 <!-- Summarize the bug encountered concisely. -->

 ### Environment
-<!-- Please provide your development environment. -->
+<!-- Please provide your development environment here -->
 - **Operating System** : Windows/Linux
 - **Architecture** : x64/Arm64/PowerPC ...
- **Eigen Version** : 5.0.0
- **Compiler Version** : gcc-12.0
+- **Eigen Version** : 3.3.9
+- **Compiler Version** : Gcc7.0
 - **Compile Flags** : -O3 -march=native
 - **Vector Extension** : SSE/AVX/NEON ...

 ### Minimal Example
-<!--
-Please create a minimal reproducing example here that exhibits the problematic behavior.
-The example should be complete, in that it can fully build and run.  See the [the guidelines on stackoverflow](https://stackoverflow.com/help/minimal-reproducible-example) for how to create a good minimal example.
+<!-- If possible, please create a minimal example here that exhibits the problematic behavior.
+You can also link to [godbolt](https://godbolt.org). But please note that you need to click 
+the "Share" button in the top right-hand corner of the godbolt page where you reproduce the sample 
+code to get the share link instead of in your browser address bar. 

-You can also link to [godbolt](https://godbolt.org). Note that you need to click 
-the "Share" button in the top right-hand corner of the godbolt page to get the share link
-instead of the URL in your browser address bar. 
-->
+You can read [the guidelines on stackoverflow](https://stackoverflow.com/help/minimal-reproducible-example)
+on how to create a good minimal example. -->

 ```cpp
-// Insert  your code here.
+//show your code here
 ```

-### Steps to reproduce the issue
-<!-- Describe the necessary steps to reproduce the issue. -->
+### Steps to reproduce
+<!-- Describe how one can reproduce the issue - this is very important. Please use an ordered list. -->

 1. first step
 2. second step
@@ -44,16 +49,21 @@ instead of the URL in your browser address bar.
 <!-- Describe what you should see instead. -->

 ### Relevant logs
-<!-- Add relevant build logs or program output within blocks marked by " ``` " -->
+<!-- Add relevant code snippets or program output within blocks marked by " ``` " -->

-### [Optional] Benchmark scripts and results
+<!-- OPTIONAL: remove this section if you are not reporting a compilation warning issue.-->
+### Warning Messages
+<!-- Show us the warning messages you got! -->
+
+<!-- OPTIONAL: remove this section if you are not reporting a performance issue. -->
+### Benchmark scripts and results
 <!-- Please share any benchmark scripts - either standalone, or using [Google Benchmark](https://github.com/google/benchmark). -->

 ### Anything else that might help
-<!--
-It will be better to provide us more information to help narrow down the cause. 
+<!-- It will be better to provide us more information to help narrow down the cause. 
 Including but not limited to the following: 
 - lines of code that might help us diagnose the problem. 
 - potential ways to address the issue.
- last known working/first broken version (release number or commit hash).
--> 
+- last known working/first broken version (release number or commit hash). --> 
+
+- [ ] Have a plan to fix this issue.
--- a/.gitlab/issue_templates/Feature
+++ b/.gitlab/issue_templates/Feature
@@ -1,13 +1,6 @@
-<!--
-Thank you for submitting a Feature Request!
-
-If you want to run ideas by the maintainers and the Eigen community first, 
-you can chat about them on the [Eigen Discord server](https://discord.gg/2SkEJGqZjR).
-->
-
 ### Describe the feature you would like to be implemented.

-### Why Would such a feature be useful for other users?
+### Would such a feature be useful for other users? Why?

 ### Any hints on how to implement the requested feature?

--- a/.gitlab/merge_request_templates/Default.md
+++ b/.gitlab/merge_request_templates/Default.md
@@ -1,30 +0,0 @@
-<!--
-Thanks for contributing a merge request!
-
-We recommend that first-time contributors read our [contribution guidelines](https://eigen.tuxfamily.org/index.php?title=Contributing_to_Eigen).
-
-Before submitting the MR, please complete the following checks:
- Create one PR per feature or bugfix,
- Run the test suite to verify your changes.
-  See our [test guidelines](https://eigen.tuxfamily.org/index.php?title=Tests).
- Add tests to cover the bug addressed or any new feature.
- Document new features.  If it is a substantial change, add it to the [Changelog](https://gitlab.com/libeigen/eigen/-/blob/master/CHANGELOG.md).
- Leave the following box checked when submitting: `Allow commits from members who can merge to the target branch`.
-  This allows us to rebase and merge your change.
-
-Note that we are a team of volunteers; we appreciate your patience during the review process.
-->
-
-### Description
-<!--Please explain your changes.-->
-
-%{first_multiline_commit}
-
-### Reference issue
-<!--
-You can link to a specific issue using the gitlab syntax #<issue number>.
-If the MR fixes an issue, write "Fixes #<issue number>" to have the issue automatically closed on merge.
-->
-
-### Additional information
-<!--Any additional information you think is important.-->
--- a/.gitlab/merge_request_templates/Merge
+++ b/.gitlab/merge_request_templates/Merge
@@ -0,0 +1,26 @@
+<!-- 
+Thanks for contributing a merge request! Please name and fully describe your MR as you would for a commit message.
+If the MR fixes an issue, please include "Fixes #issue" in the commit message and the MR description.
+
+In addition, we recommend that first-time contributors read our [contribution guidelines](https://eigen.tuxfamily.org/index.php?title=Contributing_to_Eigen) and [git page](https://eigen.tuxfamily.org/index.php?title=Git), which will help you submit a more standardized MR.
+
+Before submitting the MR, you also need to complete the following checks:
+- Make one PR per feature/bugfix (don't mix multiple changes into one PR). Avoid committing unrelated changes.
+- Rebase before committing
+- For code changes, run the test suite (at least the tests that are likely affected by the change).
+  See our [test guidelines](https://eigen.tuxfamily.org/index.php?title=Tests).
+- If possible, add a test (both for bug-fixes as well as new features)
+- Make sure new features are documented
+
+Note that we are a team of volunteers; we appreciate your patience during the review process.
+
+Again, thanks for contributing! -->
+
+### Reference issue
+<!-- You can link to a specific issue using the gitlab syntax #<issue number>  -->
+
+### What does this implement/fix?
+<!--Please explain your changes.-->
+
+### Additional information
+<!--Any additional information you think is important.-->
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,5 @@
 # Changelog

-## [Unreleased]
-
-New features:
- ComplexQZ implementation [!1962]
- Generic clang vector extension backend [!2051]
-
 ## [5.0.1] - 2025-11-11

 A few bug-fixes from the master branch, including
@@ -51,14 +45,6 @@ This release marks a transition to [Semantic Versioning](https://semver.org/). P
 * Euler angles are now returned in a more canonical form, potentially resulting in a change of behavior [!1301, !1314].
 * Eigen's random number generation has changed, resulting in a change of behavior.  Please do not rely on specific random numbers from Eigen - these were never guaranteed to be consistent across Eigen versions, nor are they generally consistent across platforms [!1437].

-## [3.4.1] - 2025-09-30
-
-Many bug fixes have been backported from the main branch.
-
-A list of new issues addressed can be found via the [3.4.1](https://gitlab.com/libeigen/eigen/-/issues?state=all&label_name%5B%5D=3.4.1) label on GitLab.
-
-Check the [git commit history](https://gitlab.com/libeigen/eigen/-/commits/3.4.1) for the full list of changes.  
-
 ## [3.4.0] - 2021-08-18

 **Notice:** 3.4.x will be the last major release series of Eigen that will support c++03.
@@ -358,7 +344,7 @@ Changes since 3.3.7:
  * Commit dd6de618: Fix a bug with half-precision floats on GPUs.

 ## [3.3.7] - 2018-12-11
-
+¸¸¸¸
 Changes since 3.3.6:

 * #1643: Fix compilation with GCC>=6 and compiler optimization turned off.
@@ -542,7 +528,6 @@ Changes since 3.3.2:
  * #1378: fix doc (`DiagonalIndex` vs `Diagonal`).

 ## [3.3.2] - 2017-01-18
-
 Changes since 3.3.1:

 * General:
@@ -860,7 +845,6 @@ Changes since 3.2.8:
  * #1175: fix index type conversion warnings in sparse to dense conversion.

 ## [3.2.8] - 2016-02-16
-
 Changes since 3.2.7:

 * Main fixes and improvements:
@@ -1284,7 +1268,6 @@ Main changes since 3.2-beta1:
  * Many other fixes including #230, #482, #542, #561, #564, #565, #566, #578, #581, #595, #597, #598, #599, #605, #606, #615.

 ## [3.1.3] - 2013-04-16
-
 Changes since 3.1.2:

 * #526 - Fix linear vectorized transversal in linspace.
@@ -1368,7 +1351,6 @@ Changes since 3.1.0:
 * Fixed Sparse module compilation under MSVC 2005

 ## [3.0.6] - 2012-07-09
-
 Changes since 3.0.5:
 * #447 - fix infinite recursion in `ProductBase::coeff()`
 * #478 - fix RealSchur on a zero matrix
@@ -1525,7 +1507,6 @@ Main changes since 3.0:


 ## [3.0.4] - 2011-12-06
-
 Changes since 3.0.3:

 * #363 - check for integer overflow in size computations
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,11 +34,6 @@ if (POLICY CMP0177)
  cmake_policy(SET CMP0177 NEW)
 endif ()

-# Respect <PackageName>_ROOT variables.
-if (POLICY CMP0074)
-  cmake_policy(SET CMP0074 NEW)
-endif ()
-
 #==============================================================================
 # CMake Project.
 #==============================================================================
@@ -74,6 +69,8 @@ if (EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK)
  endif()
 endif()

+option(EIGEN_BUILD_BTL "Build benchmark suite" OFF)
+option(EIGEN_BUILD_SPBENCH "Build sparse benchmark suite" OFF)
 # Avoid building docs if included from another project.
 # Building documentation requires creating and running executables on the host
 # platform.  We shouldn't do this if cross-compiling.
@@ -90,7 +87,7 @@ if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows)
 endif()
 option(EIGEN_BUILD_CMAKE_PACKAGE "Enables the creation of EigenConfig.cmake and related files" ${PROJECT_IS_TOP_LEVEL})

-if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS)
+if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILT_BTL OR EIGEN_BUILD_BTL OR EIGEN_BUILD_SPBENCH OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS)
  set(EIGEN_IS_BUILDING_ ON)
 endif()

@@ -103,19 +100,19 @@ endif()
 file(READ "${PROJECT_SOURCE_DIR}/Eigen/Version" _eigen_version_header)
 if (NOT DEFINED EIGEN_WORLD_VERSION)
  string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}")
-  set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}" CACHE STRING "")
+  set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}")
 endif()
 if (NOT DEFINED EIGEN_MAJOR_VERSION)
  string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}")
-  set(EIGEN_MAJOR_VERSION "${CMAKE_MATCH_1}" CACHE STRING "")
+  set(EIGEN_MAJOR_VERSION "${CMAKE_MATCH_1}")
 endif()
 if (NOT DEFINED EIGEN_MINOR_VERSION)
  string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_version_match "${_eigen_version_header}")
-  set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}" CACHE STRING "")
+  set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}")
 endif()
 if (NOT DEFINED EIGEN_PATCH_VERSION)
  string(REGEX MATCH "define[ \t]+EIGEN_PATCH_VERSION[ \t]+([0-9]+)" _eigen_patch_version_match "${_eigen_version_header}")
-  set(EIGEN_PATCH_VERSION "${CMAKE_MATCH_1}" CACHE STRING "")
+  set(EIGEN_PATCH_VERSION "${CMAKE_MATCH_1}")
 endif()
 if (NOT DEFINED EIGEN_PRERELEASE_VERSION)
  set(EIGEN_PRERELEASE_VERSION "dev")
@@ -137,18 +134,18 @@ endif()
 if (NOT DEFINED EIGEN_BUILD_VERSION AND DEFINED EIGEN_GIT_REVNUM)
  string(SUBSTRING "${EIGEN_GIT_REVNUM}" 0 8 EIGEN_BUILD_VERSION)
 else()
-  set(EIGEN_BUILD_VERSION "" CACHE STRING "")
+  set(EIGEN_BUILD_VERSION "")
 endif()

 # The EIGEN_VERSION_NUMBER must be of the form <major.minor.patch>.
 # The EIGEN_VERSION_STRING can contain the preprelease/build strings.
-set(EIGEN_VERSION_NUMBER "${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}.${EIGEN_PATCH_VERSION}" CACHE STRING "")
-set(EIGEN_VERSION_STRING "${EIGEN_VERSION_NUMBER}" CACHE STRING "")
+set(EIGEN_VERSION_NUMBER "${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}.${EIGEN_PATCH_VERSION}")
+set(EIGEN_VERSION_STRING "${EIGEN_VERSION_NUMBER}")
 if (NOT "x${EIGEN_PRERELEASE_VERSION}" STREQUAL "x")
-  set(EIGEN_VERSION_STRING "${EIGEN_VERSION_STRING}-${EIGEN_PRERELEASE_VERSION}" CACHE STRING "")
+  set(EIGEN_VERSION_STRING "${EIGEN_VERSION_STRING}-${EIGEN_PRERELEASE_VERSION}")
 endif()
 if (NOT "x${EIGEN_BUILD_VERSION}" STREQUAL "x")
-  set(EIGEN_VERSION_STRING "${EIGEN_VERSION_STRING}+${EIGEN_BUILD_VERSION}" CACHE STRING "")
+  set(EIGEN_VERSION_STRING "${EIGEN_VERSION_STRING}+${EIGEN_BUILD_VERSION}")
 endif()


@@ -308,29 +305,17 @@ if (EIGEN_IS_BUILDING_)
  set(CMAKE_INCLUDE_CURRENT_DIR OFF)

  find_package(StandardMathLibrary)
-  find_package(AOCL QUIET)
  set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "")
-  if(AOCL_FOUND)
-    list(APPEND EIGEN_STANDARD_LIBRARIES_TO_LINK_TO ${AOCL_LIBRARIES})
-    if(AOCL_INCLUDE_DIRS)
-      include_directories(${AOCL_INCLUDE_DIRS})
+  if(NOT STANDARD_MATH_LIBRARY_FOUND)
+    message(FATAL_ERROR
+      "Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.")
+  else()
+    if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
+      set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}")
+    else()
+      set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}")
    endif()
  endif()
-
-  if(NOT STANDARD_MATH_LIBRARY_FOUND)
-  message(FATAL_ERROR
-    "Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.")
-  else()
-  if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
-    set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}")
-  else()
-    set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}")
-  endif()
-  # Clean up any leading/trailing whitespace in the variable to avoid CMP0004 errors
-  string(STRIP "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}" EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
- endif()
-
-
  if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
    message(STATUS "Standard libraries to link to explicitly: ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}")
  else()
@@ -414,7 +399,6 @@ if (EIGEN_BUILD_TESTING)
    ei_add_cxx_compiler_flag("-Wno-psabi")
    ei_add_cxx_compiler_flag("-Wno-variadic-macros")
    ei_add_cxx_compiler_flag("-Wno-long-long")
-    ei_add_cxx_compiler_flag("-Wno-pass-failed")          # disable clang's warning for unrolling when the loop count is dynamic.
    ei_add_cxx_compiler_flag("-fno-common")
    ei_add_cxx_compiler_flag("-fstrict-aliasing")
    ei_add_cxx_compiler_flag("-wd981")                    # disable ICC's "operands are evaluated in unspecified order" remark
@@ -425,17 +409,6 @@ if (EIGEN_BUILD_TESTING)
      ei_add_cxx_compiler_flag("-fno-check-new")
    endif()

-    # GCC 12+ emits false-positive -Warray-bounds, -Wmaybe-uninitialized,
-    # -Wstringop-overread, and -Wnonnull warnings at -O2/-O3 in heavily
-    # templated code with mixed static/dynamic sizes.  These are well-known
-    # compiler bugs (see GCC PR 109394, 106247, 105329, 98610, among others).
-    if (CMAKE_COMPILER_IS_GNUCXX)
-      ei_add_cxx_compiler_flag("-Wno-array-bounds")
-      ei_add_cxx_compiler_flag("-Wno-maybe-uninitialized")
-      ei_add_cxx_compiler_flag("-Wno-stringop-overread")
-      ei_add_cxx_compiler_flag("-Wno-nonnull")
-    endif()
-

    if(ANDROID_NDK)
      ei_add_cxx_compiler_flag("-pie")
@@ -672,7 +645,7 @@ if (EIGEN_BUILD_TESTING)
  endif()

  set(EIGEN_CUDA_CXX_FLAGS "" CACHE STRING "Additional flags to pass to the cuda compiler.")
-  set(EIGEN_CUDA_COMPUTE_ARCH 70 CACHE STRING "The CUDA compute architecture(s) to target when compiling CUDA code")
+  set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture(s) to target when compiling CUDA code")

  option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
  if(EIGEN_TEST_SYCL)
@@ -762,6 +735,15 @@ if(EIGEN_BUILD_DOC)
  add_subdirectory(doc EXCLUDE_FROM_ALL)
 endif()

+# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"?
+if(EIGEN_BUILD_BTL)
+  add_subdirectory(bench/btl EXCLUDE_FROM_ALL)
+endif()
+
+if(NOT WIN32 AND EIGEN_BUILD_SPBENCH)
+  add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
+endif()
+
 if (EIGEN_BUILD_DEMOS)
  add_subdirectory(demos EXCLUDE_FROM_ALL)
 endif()
--- a/COPYING.README
+++ b/COPYING.README
@@ -2,10 +2,5 @@ Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links:
  http://www.mozilla.org/MPL/2.0/
  http://www.mozilla.org/MPL/2.0/FAQ.html

-Some files contain third-party code under BSD, LGPL, Apache, or other
-MPL2-compatible licenses, hence the other COPYING.* files here.
-
-Note that some optional external dependencies (e.g. FFTW, MPFR C++)
-are distributed under different licenses, including the GPL. Refer to
-the individual source files and their respective COPYING files for
-details.
+Some files contain third-party code under BSD or other MPL2-compatible licenses,
+whence the other COPYING.* files here.
--- a/Eigen/Cholesky
+++ b/Eigen/Cholesky
@@ -14,6 +14,8 @@
 #include "src/Core/util/DisableStupidWarnings.h"

 /** \defgroup Cholesky_Module Cholesky module
+ *
+ *
 *
 * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
 * Those decompositions are also accessible via the following methods:
--- a/Eigen/CholmodSupport
+++ b/Eigen/CholmodSupport
@@ -26,7 +26,7 @@
 * For the sake of completeness, this module also propose the two following classes:
 * - class CholmodSimplicialLLT
 * - class CholmodSimplicialLDLT
- * Note that these classes do not bring any particular advantage compared to the built-in
+ * Note that these classes does not bring any particular advantage compared to the built-in
 * SimplicialLLT and SimplicialLDLT factorization classes.
 *
 * \code
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -36,6 +36,12 @@
 #include <new>
 #endif

+// Disable the ipa-cp-clone optimization flag with MinGW 6.x or older (enabled by default with -O3)
+// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
+#if EIGEN_COMP_MINGW && EIGEN_GNUC_STRICT_LESS_THAN(6, 0, 0)
+#pragma GCC optimize("-fno-ipa-cp-clone")
+#endif
+
 // Prevent ICC from specializing std::complex operators that silently fail
 // on device. This allows us to use our own device-compatible specializations
 // instead.
@@ -47,12 +53,10 @@
 // this include file manages BLAS and MKL related macros
 // and inclusion of their respective header files
 #include "src/Core/util/MKL_support.h"
-#include "src/Core/util/AOCL_Support.h"

-
-// EIGEN_HAS_GPU_FP16 is now always true when compiling with CUDA or HIP.
-// Use EIGEN_GPUCC (compile-time) or EIGEN_GPU_COMPILE_PHASE (device phase) instead.
-// TODO: Remove EIGEN_HAS_GPU_BF16 similarly once HIP bf16 guards are cleaned up.
+#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
+#define EIGEN_HAS_GPU_FP16
+#endif

 #if defined(EIGEN_HAS_CUDA_BF16) || defined(EIGEN_HAS_HIP_BF16)
 #define EIGEN_HAS_GPU_BF16
@@ -67,7 +71,8 @@
 #include <omp.h>
 #endif

-#if !EIGEN_COMP_ARM
+// MSVC for windows mobile does not have the errno.h file
+#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
 #define EIGEN_HAS_ERRNO
 #endif

@@ -118,18 +123,10 @@

 // required for __cpuid, needs to be included after cmath
 // also required for _BitScanReverse on Windows on ARM
-#if EIGEN_COMP_MSVC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM64)
+#if EIGEN_COMP_MSVC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM64) && !EIGEN_OS_WINCE
 #include <intrin.h>
 #endif

-// Required for querying cache sizes on Linux and macOS.
-#if EIGEN_OS_LINUX
-#include <unistd.h>
-#elif EIGEN_OS_MAC
-#include <sys/types.h>
-#include <sys/sysctl.h>
-#endif
-
 #if defined(EIGEN_USE_SYCL)
 #undef min
 #undef max
@@ -148,9 +145,19 @@
 #endif
 #endif

+#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || \
+    defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API ||  \
+    defined EIGEN2_SUPPORT
+// This will generate an error message:
+#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
+#endif
+
 namespace Eigen {

+// we use size_t frequently and we'll never remember to prepend it with std:: every time just to
+// ensure QNX/QCC support
 using std::size_t;
+// gcc 4.6.0 wants std:: for ptrdiff_t
 using std::ptrdiff_t;

 }  // namespace Eigen
@@ -168,8 +175,6 @@ using std::ptrdiff_t;
 #ifdef EIGEN_USE_LAPACKE
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
-#elif defined(EIGEN_LAPACKE_SYSTEM)
-#include <lapacke.h>
 #else
 #include "src/misc/lapacke.h"
 #endif
@@ -200,13 +205,6 @@ using std::ptrdiff_t;
 #include "src/Core/arch/Default/BFloat16.h"
 #include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h"

-#if defined(EIGEN_VECTORIZE_GENERIC) && !defined(EIGEN_DONT_VECTORIZE)
-#include "src/Core/arch/clang/PacketMath.h"
-#include "src/Core/arch/clang/TypeCasting.h"
-#include "src/Core/arch/clang/Complex.h"
-#include "src/Core/arch/clang/Reductions.h"
-#include "src/Core/arch/clang/MathFunctions.h"
-#else
 #if defined EIGEN_VECTORIZE_AVX512
 #include "src/Core/arch/SSE/PacketMath.h"
 #include "src/Core/arch/SSE/Reductions.h"
@@ -272,18 +270,6 @@ using std::ptrdiff_t;
 #include "src/Core/arch/SVE/PacketMath.h"
 #include "src/Core/arch/SVE/TypeCasting.h"
 #include "src/Core/arch/SVE/MathFunctions.h"
-#elif defined EIGEN_VECTORIZE_RVV10
-#include "src/Core/arch/RVV10/PacketMath.h"
-#include "src/Core/arch/RVV10/PacketMath4.h"
-#include "src/Core/arch/RVV10/PacketMath2.h"
-#include "src/Core/arch/RVV10/TypeCasting.h"
-#include "src/Core/arch/RVV10/MathFunctions.h"
-#if defined EIGEN_VECTORIZE_RVV10FP16
-#include "src/Core/arch/RVV10/PacketMathFP16.h"
-#endif
-#if defined EIGEN_VECTORIZE_RVV10BF16
-#include "src/Core/arch/RVV10/PacketMathBF16.h"
-#endif
 #elif defined EIGEN_VECTORIZE_ZVECTOR
 #include "src/Core/arch/ZVector/PacketMath.h"
 #include "src/Core/arch/ZVector/MathFunctions.h"
@@ -311,8 +297,6 @@ using std::ptrdiff_t;
 #endif
 #endif

-#endif  // #ifndef EIGEN_VECTORIZE_GENERIC
-
 #include "src/Core/arch/Default/Settings.h"
 // This file provides generic implementations valid for scalar as well
 #include "src/Core/arch/Default/GenericPacketMathFunctions.h"
@@ -356,6 +340,8 @@ using std::ptrdiff_t;
 #include "src/Core/DenseStorage.h"
 #include "src/Core/NestByValue.h"

+// #include "src/Core/ForceAlignedAccess.h"
+
 #include "src/Core/ReturnByValue.h"
 #include "src/Core/NoAlias.h"
 #include "src/Core/PlainObjectBase.h"
@@ -421,21 +407,17 @@ using std::ptrdiff_t;
 #include "src/Core/CoreIterators.h"
 #include "src/Core/ConditionEstimator.h"

-#if !defined(EIGEN_VECTORIZE_GENERIC)
 #if defined(EIGEN_VECTORIZE_VSX)
 #include "src/Core/arch/AltiVec/MatrixProduct.h"
 #elif defined EIGEN_VECTORIZE_NEON
 #include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
 #elif defined EIGEN_VECTORIZE_LSX
 #include "src/Core/arch/LSX/GeneralBlockPanelKernel.h"
-#elif defined EIGEN_VECTORIZE_RVV10
-#include "src/Core/arch/RVV10/GeneralBlockPanelKernel.h"
 #endif

 #if defined(EIGEN_VECTORIZE_AVX512)
 #include "src/Core/arch/AVX512/GemmKernel.h"
 #endif
-#endif

 #include "src/Core/Select.h"
 #include "src/Core/VectorwiseOp.h"
@@ -461,10 +443,6 @@ using std::ptrdiff_t;
 #include "src/Core/Assign_MKL.h"
 #endif

-#ifdef EIGEN_USE_AOCL_VML
-#include "src/Core/Assign_AOCL.h"
-#endif
-
 #include "src/Core/GlobalFunctions.h"
 // IWYU pragma: end_exports

--- a/Eigen/Dense
+++ b/Eigen/Dense
@@ -1,13 +1,3 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_DENSE_MODULE_H
-#define EIGEN_DENSE_MODULE_H
-
 #include "Core"
 #include "LU"
 #include "Cholesky"
@@ -15,5 +5,3 @@
 #include "SVD"
 #include "Geometry"
 #include "Eigenvalues"
-
-#endif  // EIGEN_DENSE_MODULE_H
--- a/Eigen/Eigen
+++ b/Eigen/Eigen
@@ -1,14 +1,2 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_EIGEN_MODULE_H
-#define EIGEN_EIGEN_MODULE_H
-
 #include "Dense"
 #include "Sparse"
-
-#endif  // EIGEN_EIGEN_MODULE_H
--- a/Eigen/Eigenvalues
+++ b/Eigen/Eigenvalues
@@ -11,13 +11,16 @@
 #include "Core"

 #include "Cholesky"
+#include "Jacobi"
+#include "Householder"
 #include "LU"
 #include "Geometry"
-#include "Sparse"  // Needed by ComplexQZ.

 #include "src/Core/util/DisableStupidWarnings.h"

 /** \defgroup Eigenvalues_Module Eigenvalues module
+ *
+ *
 *
 * This module mainly provides various eigenvalue solvers.
 * This module also provides some MatrixBase methods, including:
@@ -29,6 +32,8 @@
 * \endcode
 */

+#include "src/misc/RealSvd2x2.h"
+
 // IWYU pragma: begin_exports
 #include "src/Eigenvalues/Tridiagonalization.h"
 #include "src/Eigenvalues/RealSchur.h"
@@ -39,14 +44,11 @@
 #include "src/Eigenvalues/ComplexSchur.h"
 #include "src/Eigenvalues/ComplexEigenSolver.h"
 #include "src/Eigenvalues/RealQZ.h"
-#include "src/Eigenvalues/ComplexQZ.h"
 #include "src/Eigenvalues/GeneralizedEigenSolver.h"
 #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
 #ifdef EIGEN_USE_LAPACKE
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
-#elif defined(EIGEN_LAPACKE_SYSTEM)
-#include <lapacke.h>
 #else
 #include "src/misc/lapacke.h"
 #endif
--- a/Eigen/Geometry
+++ b/Eigen/Geometry
@@ -12,6 +12,7 @@

 #include "SVD"
 #include "LU"
+#include <limits>

 #include "src/Core/util/DisableStupidWarnings.h"

@@ -47,13 +48,10 @@
 #include "src/Geometry/AlignedBox.h"
 #include "src/Geometry/Umeyama.h"

-#ifndef EIGEN_VECTORIZE_GENERIC
-// TODO(rmlarsen): Make these work with generic vectorization if possible.
 // Use the SSE optimized version whenever possible.
 #if (defined EIGEN_VECTORIZE_SSE) || (defined EIGEN_VECTORIZE_NEON)
 #include "src/Geometry/arch/Geometry_SIMD.h"
 #endif
-#endif
 // IWYU pragma: end_exports

 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/Eigen/Householder
+++ b/Eigen/Householder
@@ -22,8 +22,8 @@

 // IWYU pragma: begin_exports
 #include "src/Householder/Householder.h"
-#include "src/Householder/BlockHouseholder.h"
 #include "src/Householder/HouseholderSequence.h"
+#include "src/Householder/BlockHouseholder.h"
 // IWYU pragma: end_exports

 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/Eigen/KLUSupport
+++ b/Eigen/KLUSupport
@@ -29,7 +29,7 @@ extern "C" {
 * \endcode
 *
 * In order to use this module, the klu and btf headers must be accessible from the include paths, and your binary must
- * be linked to the klu library and its dependencies. The dependencies depend on how KLU has been compiled. For a
+ * be linked to the klu library and its dependencies. The dependencies depend on how umfpack has been compiled. For a
 * cmake based project, you can use our FindKLU.cmake module to help you in this task.
 *
 */
--- a/Eigen/LU
+++ b/Eigen/LU
@@ -23,10 +23,10 @@
 * \endcode
 */

-// IWYU pragma: begin_exports
 #include "src/misc/Kernel.h"
 #include "src/misc/Image.h"
-#include "src/misc/RankRevealingBase.h"
+
+// IWYU pragma: begin_exports
 #include "src/LU/FullPivLU.h"
 #include "src/LU/PartialPivLU.h"
 #ifdef EIGEN_USE_LAPACKE
@@ -36,12 +36,9 @@
 #include "src/LU/Determinant.h"
 #include "src/LU/InverseImpl.h"

-#ifndef EIGEN_VECTORIZE_GENERIC
-// TODO(rmlarsen): Make these work with generic vectorization if possible.
 #if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_NEON
 #include "src/LU/arch/InverseSize4.h"
 #endif
-#endif
 // IWYU pragma: end_exports

 #include "src/Core/util/ReenableStupidWarnings.h"
--- a/Eigen/PaStiXSupport
+++ b/Eigen/PaStiXSupport
@@ -36,7 +36,7 @@ extern "C" {
 * \endcode
 *
 * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be
- * linked to the PaSTiX library and its dependencies. This wrapper requires PaStiX version 5.x compiled without MPI
+ * linked to the PaSTiX library and its dependencies. This wrapper resuires PaStiX version 5.x compiled without MPI
 * support. The dependencies depend on how PaSTiX has been compiled. For a cmake based project, you can use our
 * FindPaSTiX.cmake module to help you in this task.
 *
--- a/Eigen/QR
+++ b/Eigen/QR
@@ -11,11 +11,14 @@
 #include "Core"

 #include "Cholesky"
+#include "Jacobi"
 #include "Householder"

 #include "src/Core/util/DisableStupidWarnings.h"

 /** \defgroup QR_Module QR module
+ *
+ *
 *
 * This module provides various QR decompositions
 * This module also provides some MatrixBase methods, including:
@@ -28,8 +31,6 @@
 * \endcode
 */

-#include "src/misc/RankRevealingBase.h"
-
 // IWYU pragma: begin_exports
 #include "src/QR/HouseholderQR.h"
 #include "src/QR/FullPivHouseholderQR.h"
--- a/Eigen/QtAlignedMalloc
+++ b/Eigen/QtAlignedMalloc
@@ -14,11 +14,11 @@

 #include "src/Core/util/DisableStupidWarnings.h"

-inline void *qMalloc(std::size_t size) { return Eigen::internal::aligned_malloc(size); }
+void *qMalloc(std::size_t size) { return Eigen::internal::aligned_malloc(size); }

-inline void qFree(void *ptr) { Eigen::internal::aligned_free(ptr); }
+void qFree(void *ptr) { Eigen::internal::aligned_free(ptr); }

-inline void *qRealloc(void *ptr, std::size_t size) {
+void *qRealloc(void *ptr, std::size_t size) {
  void *newPtr = Eigen::internal::aligned_malloc(size);
  std::memcpy(newPtr, ptr, size);
  Eigen::internal::aligned_free(ptr);
--- a/Eigen/SPQRSupport
+++ b/Eigen/SPQRSupport
@@ -38,4 +38,4 @@

 #include "src/Core/util/ReenableStupidWarnings.h"

-#endif  // EIGEN_SPQRSUPPORT_MODULE_H
+#endif
--- a/Eigen/SVD
+++ b/Eigen/SVD
@@ -9,10 +9,14 @@
 #define EIGEN_SVD_MODULE_H

 #include "QR"
+#include "Householder"
+#include "Jacobi"

 #include "src/Core/util/DisableStupidWarnings.h"

 /** \defgroup SVD_Module SVD module
+ *
+ *
 *
 * This module provides SVD decomposition for matrices (both real and complex).
 * Two decomposition algorithms are provided:
@@ -29,6 +33,7 @@
 */

 // IWYU pragma: begin_exports
+#include "src/misc/RealSvd2x2.h"
 #include "src/SVD/UpperBidiagonalization.h"
 #include "src/SVD/SVDBase.h"
 #include "src/SVD/JacobiSVD.h"
@@ -36,8 +41,6 @@
 #ifdef EIGEN_USE_LAPACKE
 #ifdef EIGEN_USE_MKL
 #include "mkl_lapacke.h"
-#elif defined(EIGEN_LAPACKE_SYSTEM)
-#include <lapacke.h>
 #else
 #include "src/misc/lapacke.h"
 #endif
--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -12,7 +12,11 @@

 #include "src/Core/util/DisableStupidWarnings.h"

+#include <vector>
 #include <map>
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
 #include <numeric>

 /**
--- a/Eigen/SparseQR
+++ b/Eigen/SparseQR
@@ -35,4 +35,4 @@

 #include "src/Core/util/ReenableStupidWarnings.h"

-#endif  // EIGEN_SPARSEQR_MODULE_H
+#endif
--- a/Eigen/SuperLUSupport
+++ b/Eigen/SuperLUSupport
@@ -16,7 +16,6 @@
 #define EIGEN_EMPTY_WAS_ALREADY_DEFINED
 #endif

-// Required by SuperLU headers, which expect int_t to be defined as a global typedef.
 typedef int int_t;
 #include <slu_Cnames.h>
 #include <supermatrix.h>
--- a/Eigen/ThreadPool
+++ b/Eigen/ThreadPool
@@ -27,7 +27,7 @@

 #include <cstddef>
 #include <cstring>
-#include <ctime>
+#include <time.h>

 #include <vector>
 #include <atomic>
@@ -77,4 +77,4 @@

 #include "src/Core/util/ReenableStupidWarnings.h"

-#endif  // EIGEN_THREADPOOL_MODULE_H
+#endif  // EIGEN_CXX11_THREADPOOL_MODULE_H
--- a/Eigen/UmfPackSupport
+++ b/Eigen/UmfPackSupport
@@ -35,7 +35,7 @@ extern "C" {

 // IWYU pragma: begin_exports
 #include "src/UmfPackSupport/UmfPackSupport.h"
-// IWYU pragma: end_exports
+// IWYU pragma: endexports

 #include "src/Core/util/ReenableStupidWarnings.h"

--- a/Eigen/Version
+++ b/Eigen/Version
@@ -1,10 +1,3 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
 #ifndef EIGEN_VERSION_H
 #define EIGEN_VERSION_H

@@ -14,8 +7,8 @@
 #define EIGEN_MAJOR_VERSION 5
 #define EIGEN_MINOR_VERSION 0
 #define EIGEN_PATCH_VERSION 1
-#define EIGEN_PRERELEASE_VERSION "dev"
-#define EIGEN_BUILD_VERSION "master"
-#define EIGEN_VERSION_STRING "5.0.1-dev+master"
+#define EIGEN_PRERELEASE_VERSION ""
+#define EIGEN_BUILD_VERSION ""
+#define EIGEN_VERSION_STRING "5.0.1"

 #endif  // EIGEN_VERSION_H
--- a/Eigen/src/AccelerateSupport/AccelerateSupport.h
+++ b/Eigen/src/AccelerateSupport/AccelerateSupport.h
@@ -110,7 +110,7 @@ using AccelerateCholeskyAtA = AccelerateImpl<MatrixType, 0, SparseFactorizationC
 namespace internal {
 template <typename T>
 struct AccelFactorizationDeleter {
-  void operator()(T* sym) const {
+  void operator()(T* sym) {
    if (sym) {
      SparseCleanup(*sym);
      delete sym;
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -84,13 +84,7 @@ class LDLT : public SolverBase<LDLT<MatrixType_, UpLo_> > {
   * The default constructor is useful in cases in which the user intends to
   * perform decompositions via LDLT::compute(const MatrixType&).
   */
-  LDLT()
-      : m_matrix(),
-        m_l1_norm(0),
-        m_transpositions(),
-        m_sign(internal::ZeroSign),
-        m_isInitialized(false),
-        m_info(InvalidInput) {}
+  LDLT() : m_matrix(), m_transpositions(), m_sign(internal::ZeroSign), m_isInitialized(false) {}

  /** \brief Default Constructor with memory preallocation
   *
@@ -100,12 +94,10 @@ class LDLT : public SolverBase<LDLT<MatrixType_, UpLo_> > {
   */
  explicit LDLT(Index size)
      : m_matrix(size, size),
-        m_l1_norm(0),
        m_transpositions(size),
        m_temporary(size),
        m_sign(internal::ZeroSign),
-        m_isInitialized(false),
-        m_info(InvalidInput) {}
+        m_isInitialized(false) {}

  /** \brief Constructor with decomposition
   *
@@ -116,12 +108,10 @@ class LDLT : public SolverBase<LDLT<MatrixType_, UpLo_> > {
  template <typename InputType>
  explicit LDLT(const EigenBase<InputType>& matrix)
      : m_matrix(matrix.rows(), matrix.cols()),
-        m_l1_norm(0),
        m_transpositions(matrix.rows()),
        m_temporary(matrix.rows()),
        m_sign(internal::ZeroSign),
-        m_isInitialized(false),
-        m_info(InvalidInput) {
+        m_isInitialized(false) {
    compute(matrix.derived());
  }

@@ -135,12 +125,10 @@ class LDLT : public SolverBase<LDLT<MatrixType_, UpLo_> > {
  template <typename InputType>
  explicit LDLT(EigenBase<InputType>& matrix)
      : m_matrix(matrix.derived()),
-        m_l1_norm(0),
        m_transpositions(matrix.rows()),
        m_temporary(matrix.rows()),
        m_sign(internal::ZeroSign),
-        m_isInitialized(false),
-        m_info(InvalidInput) {
+        m_isInitialized(false) {
    compute(matrix.derived());
  }

@@ -203,7 +191,7 @@ class LDLT : public SolverBase<LDLT<MatrixType_, UpLo_> > {
   * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
   */
  template <typename Rhs>
-  inline Solve<LDLT, Rhs> solve(const MatrixBase<Rhs>& b) const;
+  inline const Solve<LDLT, Rhs> solve(const MatrixBase<Rhs>& b) const;
 #endif

  template <typename Derived>
@@ -225,7 +213,7 @@ class LDLT : public SolverBase<LDLT<MatrixType_, UpLo_> > {

  /** \returns the internal LDLT decomposition matrix
   *
-   * TODO: document the storage layout.
+   * TODO: document the storage layout
   */
  inline const MatrixType& matrixLDLT() const {
    eigen_assert(m_isInitialized && "LDLT is not initialized.");
@@ -491,7 +479,7 @@ LDLT<MatrixType, UpLo_>& LDLT<MatrixType, UpLo_>::compute(const EigenBase<InputT

  // Compute matrix L1 norm = max abs column sum.
  m_l1_norm = RealScalar(0);
-  // TODO: move this code to SelfAdjointView
+  // TODO move this code to SelfAdjointView
  for (Index col = 0; col < size; ++col) {
    RealScalar abs_col_sum;
    if (UpLo_ == Lower)
@@ -642,8 +630,8 @@ MatrixType LDLT<MatrixType, UpLo_>::reconstructedMatrix() const {
 * \sa MatrixBase::ldlt()
 */
 template <typename MatrixType, unsigned int UpLo>
-inline LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo> SelfAdjointView<MatrixType, UpLo>::ldlt()
-    const {
+inline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
+SelfAdjointView<MatrixType, UpLo>::ldlt() const {
  return LDLT<PlainObject, UpLo>(m_matrix);
 }

@@ -652,7 +640,7 @@ inline LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo> SelfA
 * \sa SelfAdjointView::ldlt()
 */
 template <typename Derived>
-inline LDLT<typename MatrixBase<Derived>::PlainObject> MatrixBase<Derived>::ldlt() const {
+inline const LDLT<typename MatrixBase<Derived>::PlainObject> MatrixBase<Derived>::ldlt() const {
  return LDLT<PlainObject>(derived());
 }

--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -86,7 +86,7 @@ class LLT : public SolverBase<LLT<MatrixType_, UpLo_> > {
   * The default constructor is useful in cases in which the user intends to
   * perform decompositions via LLT::compute(const MatrixType&).
   */
-  LLT() : m_matrix(), m_l1_norm(0), m_isInitialized(false), m_info(InvalidInput) {}
+  LLT() : m_matrix(), m_isInitialized(false) {}

  /** \brief Default Constructor with memory preallocation
   *
@@ -94,11 +94,10 @@ class LLT : public SolverBase<LLT<MatrixType_, UpLo_> > {
   * according to the specified problem \a size.
   * \sa LLT()
   */
-  explicit LLT(Index size) : m_matrix(size, size), m_l1_norm(0), m_isInitialized(false), m_info(InvalidInput) {}
+  explicit LLT(Index size) : m_matrix(size, size), m_isInitialized(false) {}

  template <typename InputType>
-  explicit LLT(const EigenBase<InputType>& matrix)
-      : m_matrix(matrix.rows(), matrix.cols()), m_l1_norm(0), m_isInitialized(false), m_info(InvalidInput) {
+  explicit LLT(const EigenBase<InputType>& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_isInitialized(false) {
    compute(matrix.derived());
  }

@@ -110,8 +109,7 @@ class LLT : public SolverBase<LLT<MatrixType_, UpLo_> > {
   * \sa LLT(const EigenBase&)
   */
  template <typename InputType>
-  explicit LLT(EigenBase<InputType>& matrix)
-      : m_matrix(matrix.derived()), m_l1_norm(0), m_isInitialized(false), m_info(InvalidInput) {
+  explicit LLT(EigenBase<InputType>& matrix) : m_matrix(matrix.derived()), m_isInitialized(false) {
    compute(matrix.derived());
  }

@@ -139,7 +137,7 @@ class LLT : public SolverBase<LLT<MatrixType_, UpLo_> > {
   * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
   */
  template <typename Rhs>
-  inline Solve<LLT, Rhs> solve(const MatrixBase<Rhs>& b) const;
+  inline const Solve<LLT, Rhs> solve(const MatrixBase<Rhs>& b) const;
 #endif

  template <typename Derived>
@@ -406,7 +404,7 @@ LLT<MatrixType, UpLo_>& LLT<MatrixType, UpLo_>::compute(const EigenBase<InputTyp

  // Compute matrix L1 norm = max abs column sum.
  m_l1_norm = RealScalar(0);
-  // TODO: move this code to SelfAdjointView
+  // TODO move this code to SelfAdjointView
  for (Index col = 0; col < size; ++col) {
    RealScalar abs_col_sum;
    if (UpLo_ == Lower)
@@ -497,7 +495,7 @@ MatrixType LLT<MatrixType, UpLo_>::reconstructedMatrix() const {
 * \sa SelfAdjointView::llt()
 */
 template <typename Derived>
-inline LLT<typename MatrixBase<Derived>::PlainObject> MatrixBase<Derived>::llt() const {
+inline const LLT<typename MatrixBase<Derived>::PlainObject> MatrixBase<Derived>::llt() const {
  return LLT<PlainObject>(derived());
 }

@@ -506,7 +504,7 @@ inline LLT<typename MatrixBase<Derived>::PlainObject> MatrixBase<Derived>::llt()
 * \sa SelfAdjointView::llt()
 */
 template <typename MatrixType, unsigned int UpLo>
-inline LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo> SelfAdjointView<MatrixType, UpLo>::llt()
+inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo> SelfAdjointView<MatrixType, UpLo>::llt()
    const {
  return LLT<PlainObject, UpLo>(m_matrix);
 }
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -360,7 +360,7 @@ class CholmodBase : public SparseSolverBase<Derived> {
      this->m_info = NumericalIssue;
      return;
    }
-    // TODO: optimize this copy by swapping when possible (be careful with alignment, etc.)
+    // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
    // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve
    dest = Matrix<Scalar, Dest::RowsAtCompileTime, Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),
                                                                                 b.rows(), b.cols());
@@ -386,7 +386,7 @@ class CholmodBase : public SparseSolverBase<Derived> {
      this->m_info = NumericalIssue;
      return;
    }
-    // TODO: optimize this copy by swapping when possible (be careful with alignment, etc.)
+    // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
    // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's
    // sparse solver)
    dest.derived() = viewAsEigen<typename DestDerived::Scalar, typename DestDerived::StorageIndex>(*x_cs);
--- a/Eigen/src/Core/ArithmeticSequence.h
+++ b/Eigen/src/Core/ArithmeticSequence.h
@@ -182,7 +182,7 @@ namespace placeholders {
 * \returns a symbolic ArithmeticSequence representing the last \a size elements with increment \a incr.
 *
 * It is a shortcut for: \code seqN(last-(size-fix<1>)*incr, size, incr) \endcode
- * \anchor Eigen_placeholders_lastN
+ *
 * \sa lastN(SizeType), seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */
 template <typename SizeType, typename IncrType>
 auto lastN(SizeType size, IncrType incr)
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -123,12 +123,12 @@ class Array : public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxR
   * \sa resize(Index,Index)
   */
 #ifdef EIGEN_INITIALIZE_COEFFS
-  EIGEN_DEVICE_FUNC constexpr Array() : Base() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array() : Base() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
 #else
-  EIGEN_DEVICE_FUNC constexpr Array() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array() = default;
 #endif
  /** \brief Move constructor */
-  EIGEN_DEVICE_FUNC constexpr Array(Array&&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(Array&&) = default;
  EIGEN_DEVICE_FUNC Array& operator=(Array&& other) noexcept(std::is_nothrow_move_assignable<Scalar>::value) {
    Base::operator=(std::move(other));
    return *this;
@@ -141,7 +141,7 @@ class Array : public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxR
   * This constructor is for 1D array or vectors with more than 4 coefficients.
   *
   * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this
-   * constructor must match the fixed number of rows (resp. columns) of \c *this.
+   * constructor must match the the fixed number of rows (resp. columns) of \c *this.
   *
   *
   * Example: \include Array_variadic_ctor_cxx11.cpp
@@ -178,7 +178,9 @@ class Array : public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxR
   *
   * \sa  Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args)
   */
-  EIGEN_DEVICE_FUNC constexpr Array(const std::initializer_list<std::initializer_list<Scalar>>& list) : Base(list) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(
+      const std::initializer_list<std::initializer_list<Scalar>>& list)
+      : Base(list) {}

 #ifndef EIGEN_PARSED_BY_DOXYGEN
  template <typename T>
@@ -237,7 +239,7 @@ class Array : public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxR
  }

  /** Copy constructor */
-  EIGEN_DEVICE_FUNC constexpr Array(const Array&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(const Array&) = default;

 private:
  struct PrivateType {};
@@ -245,7 +247,7 @@ class Array : public PlainObjectBase<Array<Scalar_, Rows_, Cols_, Options_, MaxR
 public:
  /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Array(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(
      const EigenBase<OtherDerived>& other,
      std::enable_if_t<internal::is_convertible<typename OtherDerived::Scalar, Scalar>::value, PrivateType> =
          PrivateType())
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -168,16 +168,19 @@ class ArrayBase : public DenseBase<Derived> {
  }

 public:
-  EIGEN_DEVICE_FUNC constexpr ArrayBase<Derived>& array() { return *this; }
-  EIGEN_DEVICE_FUNC constexpr const ArrayBase<Derived>& array() const { return *this; }
+  EIGEN_DEVICE_FUNC ArrayBase<Derived>& array() { return *this; }
+  EIGEN_DEVICE_FUNC const ArrayBase<Derived>& array() const { return *this; }

  /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
   * \sa MatrixBase::array() */
-  EIGEN_DEVICE_FUNC constexpr MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
-  EIGEN_DEVICE_FUNC constexpr const MatrixWrapper<const Derived> matrix() const {
+  EIGEN_DEVICE_FUNC MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
+  EIGEN_DEVICE_FUNC const MatrixWrapper<const Derived> matrix() const {
    return MatrixWrapper<const Derived>(derived());
  }

+  //     template<typename Dest>
+  //     inline void evalTo(Dest& dst) const { dst = matrix(); }
+
 protected:
  EIGEN_DEFAULT_COPY_CONSTRUCTOR(ArrayBase)
  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(ArrayBase)
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -21,7 +21,7 @@ namespace Eigen {
 * \brief Expression of a mathematical vector or matrix as an array object
 *
 * This class is the return type of MatrixBase::array(), and most of the time
- * this is the only way it is used.
+ * this is the only way it is use.
 *
 * \sa MatrixBase::array(), class MatrixWrapper
 */
@@ -54,8 +54,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > {

  using Base::coeffRef;

-  EIGEN_DEVICE_FUNC constexpr explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix)
-      : m_expression(matrix) {}
+  EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}

  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); }
  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); }
@@ -76,7 +75,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > {
    dst = m_expression;
  }

-  EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t<NestedExpressionType>& nestedExpression() const {
+  EIGEN_DEVICE_FUNC const internal::remove_all_t<NestedExpressionType>& nestedExpression() const {
    return m_expression;
  }

@@ -97,7 +96,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> > {
 * \brief Expression of an array as a mathematical vector or matrix
 *
 * This class is the return type of ArrayBase::matrix(), and most of the time
- * this is the only way it is used.
+ * this is the only way it is use.
 *
 * \sa MatrixBase::matrix(), class ArrayWrapper
 */
@@ -130,7 +129,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > {

  using Base::coeffRef;

-  EIGEN_DEVICE_FUNC constexpr explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+  EIGEN_DEVICE_FUNC explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}

  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); }
  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); }
@@ -146,7 +145,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> > {

  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_expression.coeffRef(index); }

-  EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t<NestedExpressionType>& nestedExpression() const {
+  EIGEN_DEVICE_FUNC const internal::remove_all_t<NestedExpressionType>& nestedExpression() const {
    return m_expression;
  }

--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -19,8 +19,7 @@ namespace Eigen {

 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::lazyAssign(
-    const DenseBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::lazyAssign(const DenseBase<OtherDerived>& other) {
  enum { SameType = internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value };

  EIGEN_STATIC_ASSERT_LVALUE(Derived)
@@ -37,43 +36,40 @@ EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::laz

 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(
-    const DenseBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other) {
  internal::call_assignment(derived(), other.derived());
  return derived();
 }

 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other) {
  internal::call_assignment(derived(), other.derived());
  return derived();
 }

 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other) {
  internal::call_assignment(derived(), other.derived());
  return derived();
 }

 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(
-    const DenseBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other) {
  internal::call_assignment(derived(), other.derived());
  return derived();
 }

 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(
-    const EigenBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other) {
  internal::call_assignment(derived(), other.derived());
  return derived();
 }

 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(
    const ReturnByValue<OtherDerived>& other) {
  other.derived().evalTo(derived());
  return derived();
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -63,7 +63,7 @@ struct copy_using_evaluator_traits {
  static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize);
  static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;

-  // TODO: distinguish between linear traversal and inner-traversal packet types.
+  // TODO distinguish between linear traversal and inner-traversals
  using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
  using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;

@@ -474,8 +474,8 @@ struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling
  static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
  static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
  static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
-  static constexpr bool Alignable = (DstAlignment >= RequestedAlignment) ||
-                                    (static_cast<std::size_t>(RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
+  static constexpr bool Alignable =
+      (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
  static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
  static constexpr bool DstIsAligned = DstAlignment >= Alignment;
  static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
@@ -587,8 +587,8 @@ struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, NoUnrolling>
  static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
  static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
  static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
-  static constexpr bool Alignable = (DstAlignment >= RequestedAlignment) ||
-                                    (static_cast<std::size_t>(RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
+  static constexpr bool Alignable =
+      (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
  static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
  static constexpr bool DstIsAligned = DstAlignment >= Alignment;
  static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
@@ -654,15 +654,15 @@ struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, InnerUnrolli
 template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
 class generic_dense_assignment_kernel {
 protected:
-  using DstXprType = typename DstEvaluatorTypeT::XprType;
-  using SrcXprType = typename SrcEvaluatorTypeT::XprType;
+  typedef typename DstEvaluatorTypeT::XprType DstXprType;
+  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;

 public:
-  using DstEvaluatorType = DstEvaluatorTypeT;
-  using SrcEvaluatorType = SrcEvaluatorTypeT;
-  using Scalar = typename DstEvaluatorType::Scalar;
-  using AssignmentTraits = copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor>;
-  using PacketType = typename AssignmentTraits::PacketType;
+  typedef DstEvaluatorTypeT DstEvaluatorType;
+  typedef SrcEvaluatorTypeT SrcEvaluatorType;
+  typedef typename DstEvaluatorType::Scalar Scalar;
+  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
+  typedef typename AssignmentTraits::PacketType PacketType;

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst,
                                                                                  const SrcEvaluatorType& src,
@@ -681,8 +681,8 @@ class generic_dense_assignment_kernel {
  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); }
  EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); }

-  EIGEN_DEVICE_FUNC constexpr DstEvaluatorType& dstEvaluator() noexcept { return m_dst; }
-  EIGEN_DEVICE_FUNC constexpr const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; }
+  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; }
+  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; }

  /// Assign src(row,col) to dst(row,col) through the assignment functor.
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) {
@@ -690,7 +690,7 @@ class generic_dense_assignment_kernel {
  }

  /// \sa assignCoeff(Index,Index)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index index) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) {
    m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
  }

@@ -741,7 +741,7 @@ class generic_dense_assignment_kernel {
  }

  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
-    using Traits = typename DstEvaluatorType::ExpressionTraits;
+    typedef typename DstEvaluatorType::ExpressionTraits Traits;
    return int(Traits::RowsAtCompileTime) == 1          ? 0
           : int(Traits::ColsAtCompileTime) == 1        ? inner
           : int(DstEvaluatorType::Flags) & RowMajorBit ? outer
@@ -749,7 +749,7 @@ class generic_dense_assignment_kernel {
  }

  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) {
-    using Traits = typename DstEvaluatorType::ExpressionTraits;
+    typedef typename DstEvaluatorType::ExpressionTraits Traits;
    return int(Traits::ColsAtCompileTime) == 1          ? 0
           : int(Traits::RowsAtCompileTime) == 1        ? inner
           : int(DstEvaluatorType::Flags) & RowMajorBit ? inner
@@ -762,7 +762,7 @@ class generic_dense_assignment_kernel {
  DstEvaluatorType& m_dst;
  const SrcEvaluatorType& m_src;
  const Functor& m_functor;
-  // TODO: find a way to avoid the needs of the original expression
+  // TODO find a way to avoid the needs of the original expression
  DstXprType& m_dstExpr;
 };

@@ -774,13 +774,13 @@ template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Funct
 class restricted_packet_dense_assignment_kernel
    : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> {
 protected:
-  using Base = generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>;
+  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;

 public:
-  using Scalar = typename Base::Scalar;
-  using DstXprType = typename Base::DstXprType;
-  using AssignmentTraits = copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4>;
-  using PacketType = typename AssignmentTraits::PacketType;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::DstXprType DstXprType;
+  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
+  typedef typename AssignmentTraits::PacketType PacketType;

  EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT& dst, const SrcEvaluatorTypeT& src,
                                                              const Functor& func, DstXprType& dstExpr)
@@ -804,27 +804,15 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprTyp
                                                                       const internal::assign_op<T1, T2>& /*func*/) {
  Index dstRows = src.rows();
  Index dstCols = src.cols();
-  if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) {
-#ifdef EIGEN_NO_AUTOMATIC_RESIZING
-    eigen_assert(
-        (dst.size() == 0 || (DstXprType::IsVectorAtCompileTime ? (dst.size() == src.size())
-                                                               : (dst.rows() == dstRows && dst.cols() == dstCols))) &&
-        "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
-    if (dst.size() == 0) {
-      dst.resize(dstRows, dstCols);
-    }
-#else
-    dst.resize(dstRows, dstCols);
-    eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
-#endif
-  }
+  if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols);
+  eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
 }

 template <typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src,
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src,
                                                                                const Functor& func) {
-  using DstEvaluatorType = evaluator<DstXprType>;
-  using SrcEvaluatorType = evaluator<SrcXprType>;
+  typedef evaluator<DstXprType> DstEvaluatorType;
+  typedef evaluator<SrcXprType> SrcEvaluatorType;

  SrcEvaluatorType srcEvaluator(src);

@@ -834,14 +822,14 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr void call_dense_assignment_loop(

  DstEvaluatorType dstEvaluator(dst);

-  using Kernel = generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor>;
+  typedef generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor> Kernel;
  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());

  dense_assignment_loop<Kernel>::run(kernel);
 }

 template <typename DstXprType, typename SrcXprType>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) {
  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>());
 }

@@ -861,11 +849,11 @@ struct EigenBase2EigenBase {};

 template <typename, typename>
 struct AssignmentKind {
-  using Kind = EigenBase2EigenBase;
+  typedef EigenBase2EigenBase Kind;
 };
 template <>
 struct AssignmentKind<DenseShape, DenseShape> {
-  using Kind = Dense2Dense;
+  typedef Dense2Dense Kind;
 };

 // This is the main assignment class
@@ -920,11 +908,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Ds
                      int(Dst::SizeAtCompileTime) != 1
  };

-  using ActualDstTypeCleaned = std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst>;
-  using ActualDstType = std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&>;
+  typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst> ActualDstTypeCleaned;
+  typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&> ActualDstType;
  ActualDstType actualDst(dst);

-  // TODO: check whether this is the right place to perform these checks:
+  // TODO check whether this is the right place to perform these checks:
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
  EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
@@ -935,9 +923,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Ds
 template <typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src,
                                                                                      const Func& func) {
-  using DstEvaluatorType = evaluator<Dst>;
-  using SrcEvaluatorType = evaluator<Src>;
-  using Kernel = restricted_packet_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Func>;
+  typedef evaluator<Dst> DstEvaluatorType;
+  typedef evaluator<Src> SrcEvaluatorType;
+  typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Func> Kernel;

  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
@@ -959,7 +947,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Ds
 template <typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src,
                                                                                           const Func& func) {
-  // TODO: check whether this is the right place to perform these checks:
+  // TODO check whether this is the right place to perform these checks:
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src)
  EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
@@ -1019,7 +1007,7 @@ struct Assignment<DstXprType, CwiseNullaryOp<scalar_zero_op<typename DstXprType:
 };

 // Generic assignment through evalTo.
-// TODO: evaluate whether this generic evalTo-based assignment path is still needed.
+// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
 // Note that the last template argument "Weak" is needed to make it possible to perform
 // both partial specialization+SFINAE without ambiguous specialization
 template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
--- a/Eigen/src/Core/Assign_AOCL.h
+++ b/Eigen/src/Core/Assign_AOCL.h
@@ -1,301 +0,0 @@
-/*
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at https://mozilla.org/MPL/2.0/.
- *
- * Assign_AOCL.h - AOCL Vectorized Math Dispatch Layer for Eigen
- *
- * Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
- *
- * Description:
- * ------------
- * This file implements a high-performance dispatch layer that automatically
- * routes Eigen's element-wise mathematical operations to AMD Optimizing CPU
- * Libraries (AOCL) Vector Math Library (VML) functions when beneficial for
- * performance.
- *
- * The dispatch system uses C++ template specialization to intercept Eigen's
- * assignment operations and redirect them to AOCL's VRDA functions, which
- * provide optimized implementations for AMD Zen architectures.
- *
- * Key Features:
- * -------------
- * 1. Automatic Dispatch: Seamlessly routes supported operations to AOCL without
- *    requiring code changes in user applications
- *
- * 2. Performance Optimization: Uses AOCL VRDA functions optimized for Zen
- * family processors with automatic SIMD instruction selection (AVX2, AVX-512)
- *
- * 3. Threshold-Based Activation: Only activates for vectors larger than
- *    EIGEN_AOCL_VML_THRESHOLD (default: 128 elements) to avoid overhead on
- * small vectors
- *
- * 4. Precision-Specific Handling:
- *    - Double precision: AOCL VRDA vectorized functions
- *    - Single precision: Scalar fallback (preserves correctness)
- *
- * 5. Memory Layout Compatibility: Ensures direct memory access and compatible
- *    storage orders between source and destination for optimal performance
- *
- * Supported Operations:
- * ---------------------
- * UNARY OPERATIONS (vector → vector):
- * - Transcendental: exp(), sin(), cos(), sqrt(), log(), log10(), log2()
- *
- * BINARY OPERATIONS (vector op vector → vector):
- * - Arithmetic: +, *, pow()
- *
- * Template Specialization Mechanism:
- * -----------------------------------
- * The system works by specializing Eigen's Assignment template for:
- * 1. CwiseUnaryOp with scalar_*_op functors (unary operations)
- * 2. CwiseBinaryOp with scalar_*_op functors (binary operations)
- * 3. Dense2Dense assignment context with AOCL-compatible traits
- *
- * Dispatch conditions (all must be true):
- * - Source and destination have DirectAccessBit (contiguous memory)
- * - Compatible storage orders (both row-major or both column-major)
- * - Vector size ≥ EIGEN_AOCL_VML_THRESHOLD or Dynamic size
- * - Supported data type (currently double precision for VRDA)
- *
- * Integration Example:
- * --------------------
- * // Standard Eigen code - no changes required
- * VectorXd x = VectorXd::Random(10000);
- * VectorXd y = VectorXd::Random(10000);
- * VectorXd result;
- *
- * // These operations are automatically dispatched to AOCL:
- * result = x.array().exp();              // → amd_vrda_exp()
- * result = x.array().sin();              // → amd_vrda_sin()
- * result = x.array() + y.array();        // → amd_vrda_add()
- * result = x.array().pow(y.array());     // → amd_vrda_pow()
- *
- * Configuration:
- * --------------
- * Required preprocessor definitions:
- * - EIGEN_USE_AOCL_ALL or EIGEN_USE_AOCL_MT: Enable AOCL integration
- * - EIGEN_USE_AOCL_VML: Enable Vector Math Library dispatch
- *
- * Compilation Requirements:
- * -------------------------
- * Include paths:
- * - AOCL headers: -I${AOCL_ROOT}/include
- * - Eigen headers: -I/path/to/eigen
- *
- * Link libraries:
- * - AOCL MathLib: -lamdlibm
- * - Standard math: -lm
- *
- * Compiler flags:
- * - Optimization: -O3 (required for inlining)
- * - Architecture: -march=znver5 or -march=native
- * - Vectorization: -mfma -mavx512f (if supported)
- *
- * Platform Support:
- * ------------------
- * - Primary: Linux x86_64 with AMD Zen family processors
- * - Compilers: GCC 8+, Clang 10+, AOCC (recommended)
- * - AOCL Version: 4.0+ (with VRDA support)
- *
- * Error Handling:
- * ---------------
- * - Graceful fallback to scalar operations for unsupported configurations
- * - Compile-time detection of AOCL availability
- * - Runtime size and alignment validation with eigen_assert()
- *
- * Developer:
- * ----------
- * Name: Sharad Saurabh Bhaskar
- * Email: shbhaska@amd.com
- * Organization: Advanced Micro Devices, Inc.
- */
-
-
-#ifndef EIGEN_ASSIGN_AOCL_H
-#define EIGEN_ASSIGN_AOCL_H
-
-namespace Eigen {
-namespace internal {
-
-// Traits for unary operations.
-template <typename Dst, typename Src> class aocl_assign_traits {
-private:
-  enum {
-    DstHasDirectAccess = !!(Dst::Flags & DirectAccessBit),
-    SrcHasDirectAccess = !!(Src::Flags & DirectAccessBit),
-    StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
-    InnerSize = Dst::IsVectorAtCompileTime   ? int(Dst::SizeAtCompileTime)
-                : (Dst::Flags & RowMajorBit) ? int(Dst::ColsAtCompileTime)
-                                             : int(Dst::RowsAtCompileTime),
-    LargeEnough =
-        (InnerSize == Dynamic) || (InnerSize >= EIGEN_AOCL_VML_THRESHOLD)
-  };
-
-public:
-  enum {
-    EnableAoclVML = DstHasDirectAccess && SrcHasDirectAccess &&
-                    StorageOrdersAgree && LargeEnough,
-    Traversal = LinearTraversal
-  };
-};
-
-// Traits for binary operations (e.g., add, pow).
-template <typename Dst, typename Lhs, typename Rhs>
-class aocl_assign_binary_traits {
-private:
-  enum {
-    DstHasDirectAccess = !!(Dst::Flags & DirectAccessBit),
-    LhsHasDirectAccess = !!(Lhs::Flags & DirectAccessBit),
-    RhsHasDirectAccess = !!(Rhs::Flags & DirectAccessBit),
-    StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Lhs::IsRowMajor)) &&
-                         (int(Dst::IsRowMajor) == int(Rhs::IsRowMajor)),
-    InnerSize = Dst::IsVectorAtCompileTime   ? int(Dst::SizeAtCompileTime)
-                : (Dst::Flags & RowMajorBit) ? int(Dst::ColsAtCompileTime)
-                                             : int(Dst::RowsAtCompileTime),
-    LargeEnough =
-        (InnerSize == Dynamic) || (InnerSize >= EIGEN_AOCL_VML_THRESHOLD)
-  };
-
-public:
-  enum {
-    EnableAoclVML = DstHasDirectAccess && LhsHasDirectAccess &&
-                    RhsHasDirectAccess && StorageOrdersAgree && LargeEnough
-  };
-};
-
-// Unary operation dispatch for float (scalar fallback).
-#define EIGEN_AOCL_VML_UNARY_CALL_FLOAT(EIGENOP)                               \
-  template <typename DstXprType, typename SrcXprNested>                        \
-  struct Assignment<                                                           \
-      DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<float>, SrcXprNested>,    \
-      assign_op<float, float>, Dense2Dense,                                    \
-      std::enable_if_t<                                                        \
-          aocl_assign_traits<DstXprType, SrcXprNested>::EnableAoclVML>> {      \
-    typedef CwiseUnaryOp<scalar_##EIGENOP##_op<float>, SrcXprNested>           \
-        SrcXprType;                                                            \
-    static void run(DstXprType &dst, const SrcXprType &src,                    \
-                    const assign_op<float, float> &) {                         \
-      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());      \
-      Eigen::Index n = dst.size();                                             \
-      if (n <= 0)                                                              \
-        return;                                                                \
-      const float *input =                                                     \
-          reinterpret_cast<const float *>(src.nestedExpression().data());      \
-      float *output = reinterpret_cast<float *>(dst.data());                   \
-      for (Eigen::Index i = 0; i < n; ++i) {                                   \
-        output[i] = std::EIGENOP(input[i]);                                    \
-      }                                                                        \
-    }                                                                          \
-  };
-
-// Unary operation dispatch for double (AOCL vectorized).
-#define EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(EIGENOP, AOCLOP)                      \
-  template <typename DstXprType, typename SrcXprNested>                        \
-  struct Assignment<                                                           \
-      DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<double>, SrcXprNested>,   \
-      assign_op<double, double>, Dense2Dense,                                  \
-      std::enable_if_t<                                                        \
-          aocl_assign_traits<DstXprType, SrcXprNested>::EnableAoclVML>> {      \
-    typedef CwiseUnaryOp<scalar_##EIGENOP##_op<double>, SrcXprNested>          \
-        SrcXprType;                                                            \
-    static void run(DstXprType &dst, const SrcXprType &src,                    \
-                    const assign_op<double, double> &) {                       \
-      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());      \
-      Eigen::Index n = dst.size();                                             \
-      eigen_assert(n <= INT_MAX && "AOCL does not support arrays larger than INT_MAX"); \
-      if (n <= 0)                                                              \
-        return;                                                                \
-      const double *input =                                                    \
-          reinterpret_cast<const double *>(src.nestedExpression().data());     \
-      double *output = reinterpret_cast<double *>(dst.data());                 \
-      int aocl_n = internal::convert_index<int>(n);                            \
-      AOCLOP(aocl_n, const_cast<double *>(input), output);                     \
-    }                                                                          \
-  };
-
-// Instantiate unary calls for float (scalar).
-// EIGEN_AOCL_VML_UNARY_CALL_FLOAT(exp)
-
-// Instantiate unary calls for double (AOCL vectorized).
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(exp2, amd_vrda_exp2)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(exp, amd_vrda_exp)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(sin, amd_vrda_sin)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(cos, amd_vrda_cos)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(sqrt, amd_vrda_sqrt)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(cbrt, amd_vrda_cbrt)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(abs, amd_vrda_fabs)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log, amd_vrda_log)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log10, amd_vrda_log10)
-EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log2, amd_vrda_log2)
-
-// Binary operation dispatch for float (scalar fallback).
-#define EIGEN_AOCL_VML_BINARY_CALL_FLOAT(EIGENOP, STDFUNC)                     \
-  template <typename DstXprType, typename LhsXprNested, typename RhsXprNested> \
-  struct Assignment<                                                           \
-      DstXprType,                                                              \
-      CwiseBinaryOp<scalar_##EIGENOP##_op<float, float>, LhsXprNested,         \
-                    RhsXprNested>,                                             \
-      assign_op<float, float>, Dense2Dense,                                    \
-      std::enable_if_t<aocl_assign_binary_traits<                              \
-          DstXprType, LhsXprNested, RhsXprNested>::EnableAoclVML>> {           \
-    typedef CwiseBinaryOp<scalar_##EIGENOP##_op<float, float>, LhsXprNested,   \
-                          RhsXprNested>                                        \
-        SrcXprType;                                                            \
-    static void run(DstXprType &dst, const SrcXprType &src,                    \
-                    const assign_op<float, float> &) {                         \
-      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());      \
-      Eigen::Index n = dst.size();                                             \
-      if (n <= 0)                                                              \
-        return;                                                                \
-      const float *lhs = reinterpret_cast<const float *>(src.lhs().data());    \
-      const float *rhs = reinterpret_cast<const float *>(src.rhs().data());    \
-      float *output = reinterpret_cast<float *>(dst.data());                   \
-      for (Eigen::Index i = 0; i < n; ++i) {                                   \
-        output[i] = STDFUNC(lhs[i], rhs[i]);                                   \
-      }                                                                        \
-    }                                                                          \
-  };
-
-// Binary operation dispatch for double (AOCL vectorized).
-#define EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(EIGENOP, AOCLOP)                     \
-  template <typename DstXprType, typename LhsXprNested, typename RhsXprNested> \
-  struct Assignment<                                                           \
-      DstXprType,                                                              \
-      CwiseBinaryOp<scalar_##EIGENOP##_op<double, double>, LhsXprNested,       \
-                    RhsXprNested>,                                             \
-      assign_op<double, double>, Dense2Dense,                                  \
-      std::enable_if_t<aocl_assign_binary_traits<                              \
-          DstXprType, LhsXprNested, RhsXprNested>::EnableAoclVML>> {           \
-    typedef CwiseBinaryOp<scalar_##EIGENOP##_op<double, double>, LhsXprNested, \
-                          RhsXprNested>                                        \
-        SrcXprType;                                                            \
-    static void run(DstXprType &dst, const SrcXprType &src,                    \
-                    const assign_op<double, double> &) {                       \
-      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());      \
-      Eigen::Index n = dst.size();                                             \
-      eigen_assert(n <= INT_MAX && "AOCL does not support arrays larger than INT_MAX"); \
-      if (n <= 0)                                                              \
-        return;                                                                \
-      const double *lhs = reinterpret_cast<const double *>(src.lhs().data());  \
-      const double *rhs = reinterpret_cast<const double *>(src.rhs().data());  \
-      double *output = reinterpret_cast<double *>(dst.data());                 \
-      int aocl_n = internal::convert_index<int>(n);                            \
-      AOCLOP(aocl_n, const_cast<double *>(lhs), const_cast<double *>(rhs), output); \
-    }                                                                          \
-  };
-
-// Instantiate binary calls for float (scalar).
-// EIGEN_AOCL_VML_BINARY_CALL_FLOAT(sum, std::plus<float>)  // Using
-// scalar_sum_op for addition EIGEN_AOCL_VML_BINARY_CALL_FLOAT(pow, std::pow)
-
-// Instantiate binary calls for double (AOCL vectorized).
-EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(sum, amd_vrda_add) // Using scalar_sum_op for addition
-EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(pow, amd_vrda_pow)
-EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(max, amd_vrda_fmax)
-EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(min, amd_vrda_fmin)
-
-} // namespace internal
-} // namespace Eigen
-
-#endif // EIGEN_ASSIGN_AOCL_H
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -121,14 +121,14 @@ class Block

  /** Column or Row constructor
   */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Block(XprType& xpr, Index i) : Impl(xpr, i) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index i) : Impl(xpr, i) {
    eigen_assert((i >= 0) && (((BlockRows == 1) && (BlockCols == XprType::ColsAtCompileTime) && i < xpr.rows()) ||
                              ((BlockRows == XprType::RowsAtCompileTime) && (BlockCols == 1) && i < xpr.cols())));
  }

  /** Fixed-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol)
      : Impl(xpr, startRow, startCol) {
    EIGEN_STATIC_ASSERT(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic,
                        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
@@ -138,8 +138,8 @@ class Block

  /** Dynamic-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol, Index blockRows,
-                                                        Index blockCols)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol, Index blockRows,
+                                              Index blockCols)
      : Impl(xpr, startRow, startCol, blockRows, blockCols) {
    eigen_assert((RowsAtCompileTime == Dynamic || RowsAtCompileTime == blockRows) &&
                 (ColsAtCompileTime == Dynamic || ColsAtCompileTime == blockCols));
@@ -175,11 +175,11 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
 public:
  typedef Impl Base;
  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index i) : Impl(xpr, i) {}
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index i) : Impl(xpr, i) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol)
      : Impl(xpr, startRow, startCol) {}
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol,
-                                                            Index blockRows, Index blockCols)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows,
+                                                  Index blockCols)
      : Impl(xpr, startRow, startCol, blockRows, blockCols) {}
 };

@@ -196,9 +196,11 @@ class BlockImpl_dense : public internal::dense_xpr_base<Block<XprType, BlockRows
  EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)

+  // class InnerIterator; // FIXME apparently never used
+
  /** Column or Row constructor
   */
-  EIGEN_DEVICE_FUNC constexpr BlockImpl_dense(XprType& xpr, Index i)
+  EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index i)
      : m_xpr(xpr),
        // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
        // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1,
@@ -211,17 +213,17 @@ class BlockImpl_dense : public internal::dense_xpr_base<Block<XprType, BlockRows

  /** Fixed-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+  EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), m_blockRows(BlockRows), m_blockCols(BlockCols) {}

  /** Dynamic-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows,
-                                              Index blockCols)
+  EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows,
+                                           Index blockCols)
      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), m_blockRows(blockRows), m_blockCols(blockCols) {}

-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_blockRows.value(); }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_blockCols.value(); }
+  EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
+  EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }

  EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) {
    EIGEN_STATIC_ASSERT_LVALUE(XprType)
@@ -287,9 +289,9 @@ class BlockImpl_dense : public internal::dense_xpr_base<Block<XprType, BlockRows

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE XprType& nestedExpression() { return m_xpr; }

-  EIGEN_DEVICE_FUNC constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); }

-  EIGEN_DEVICE_FUNC constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); }

 protected:
  XprTypeNested m_xpr;
@@ -378,18 +380,18 @@ class BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel, true>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE XprType& nestedExpression() { return m_xpr; }

  /** \sa MapBase::innerStride() */
-  EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index innerStride() const noexcept {
    return internal::traits<BlockType>::HasSameStorageOrderAsXprType ? m_xpr.innerStride() : m_xpr.outerStride();
  }

  /** \sa MapBase::outerStride() */
-  EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const noexcept {
    return internal::traits<BlockType>::HasSameStorageOrderAsXprType ? m_xpr.outerStride() : m_xpr.innerStride();
  }

-  EIGEN_DEVICE_FUNC constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); }

-  EIGEN_DEVICE_FUNC constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); }

 #ifndef __SUNPRO_CC
  // FIXME sunstudio is not friendly with the above friend...
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@@ -31,7 +31,7 @@ template <typename XprType>
 struct CommaInitializer {
  typedef typename XprType::Scalar Scalar;

-  EIGEN_DEVICE_FUNC constexpr CommaInitializer(XprType& xpr, const Scalar& s)
+  EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const Scalar& s)
      : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) {
    eigen_assert(m_xpr.rows() > 0 && m_xpr.cols() > 0 && "Cannot comma-initialize a 0x0 matrix (operator<<)");
    m_xpr.coeffRef(0, 0) = s;
@@ -48,6 +48,7 @@ struct CommaInitializer {

  /* Copy/Move constructor which transfers ownership. This is crucial in
   * absence of return value optimization to avoid assertions during destruction. */
+  // FIXME in C++11 mode this could be replaced by a proper RValue constructor
  EIGEN_DEVICE_FUNC inline CommaInitializer(const CommaInitializer& o)
      : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
    // Mark original object as finished. In absence of R-value references we need to const_cast:
--- a/Eigen/src/Core/ConditionEstimator.h
+++ b/Eigen/src/Core/ConditionEstimator.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@gmail.com)
+// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com)
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -40,17 +40,18 @@ struct rcond_compute_sign<Vector, Vector, false> {
 * \a matrix that implements .solve() and .adjoint().solve() methods.
 *
 * This function implements Algorithms 4.1 and 5.1 from
- *   Higham, "Experience with a Matrix Norm Estimator",
- *   SIAM J. Sci. Stat. Comput., 11(4):804-809, 1990.
- * with Higham's alternating-sign safety-net estimate from
- *   Higham and Tisseur, "A Block Algorithm for Matrix 1-Norm Estimation,
- *   with an Application to 1-Norm Pseudospectra", SIAM J. Matrix Anal. Appl.,
- *   21(4):1185-1201, 2000.
+ *   http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf
+ * which also forms the basis for the condition number estimators in
+ * LAPACK. Since at most 10 calls to the solve method of dec are
+ * performed, the total cost is O(dims^2), as opposed to O(dims^3)
+ * needed to compute the inverse matrix explicitly.
 *
- * The Hager/Higham gradient ascent uses at most 5 iterations of 2 solves
- * each, giving a total cost of O(n^2).
+ * The most common usage is in estimating the condition number
+ * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be
+ * computed directly in O(n^2) operations.
 *
- * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, LLT.
+ * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and
+ * LLT.
 *
 * \sa FullPivLU, PartialPivLU, LDLT, LLT.
 */
@@ -65,7 +66,7 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp

  eigen_assert(dec.rows() == dec.cols());
  const Index n = dec.rows();
-  if (n == 0) return RealScalar(0);
+  if (n == 0) return 0;

    // Disable Index to float conversion warning
 #ifdef __INTEL_COMPILER
@@ -79,12 +80,14 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp

  // lower_bound is a lower bound on
  //   ||inv(matrix)||_1  = sup_v ||inv(matrix) v||_1 / ||v||_1
-  // and is the objective maximized by the supergradient ascent algorithm below.
+  // and is the objective maximized by the ("super-") gradient ascent
+  // algorithm below.
  RealScalar lower_bound = v.template lpNorm<1>();
  if (n == 1) return lower_bound;

-  // Gradient ascent: the optimum is achieved at a unit vector e_j. Each
-  // iteration follows the supergradient to find which unit vector to probe next.
+  // Gradient ascent algorithm follows: We know that the optimum is achieved at
+  // one of the simplices v = e_i, so in each iteration we follow a
+  // super-gradient to move towards the optimal one.
  RealScalar old_lower_bound = lower_bound;
  Vector sign_vector(n);
  Vector old_sign_vector;
@@ -93,21 +96,21 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp
  for (int k = 0; k < 4; ++k) {
    sign_vector = internal::rcond_compute_sign<Vector, RealVector, is_complex>::run(v);
    if (k > 0 && !is_complex && sign_vector == old_sign_vector) {
-      // Break if the sign vector stagnated.
+      // Break if the solution stagnated.
      break;
    }
-    // Supergradient: z = A^{-T} * sign(v), pick argmax |z_i|.
+    // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )|
    v = dec.adjoint().solve(sign_vector);
    v.real().cwiseAbs().maxCoeff(&v_max_abs_index);
    if (v_max_abs_index == old_v_max_abs_index) {
-      // Optimality: supergradient points to the same unit vector.
+      // Break if the solution stagnated.
      break;
    }
-    // Probe the best unit vector: v = A^{-1} * e_j.
-    v = dec.solve(Vector::Unit(n, v_max_abs_index));
+    // Move to the new simplex e_j, where j = v_max_abs_index.
+    v = dec.solve(Vector::Unit(n, v_max_abs_index));  // v = inv(matrix) * e_j.
    lower_bound = v.template lpNorm<1>();
    if (lower_bound <= old_lower_bound) {
-      // No improvement from the gradient step.
+      // Break if the gradient step did not increase the lower_bound.
      break;
    }
    if (!is_complex) {
@@ -116,19 +119,25 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp
    old_v_max_abs_index = v_max_abs_index;
    old_lower_bound = lower_bound;
  }
-  // Higham's alternating-sign estimate: an independent safety-net that catches
-  // cases where the gradient ascent converges to a local maximum due to exact
-  // cancellation patterns (especially with permutations and backsubstitutions).
-  //   v_i = (-1)^i * (1 + i/(n-1)), then estimate = 2*||A^{-1}*v||_1 / (3*n).
+  // The following calculates an independent estimate of ||matrix||_1 by
+  // multiplying matrix by a vector with entries of slowly increasing
+  // magnitude and alternating sign:
+  //   v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1.
+  // This improvement to Hager's algorithm above is due to Higham. It was
+  // added to make the algorithm more robust in certain corner cases where
+  // large elements in the matrix might otherwise escape detection due to
+  // exact cancellation (especially when op and op_adjoint correspond to a
+  // sequence of backsubstitutions and permutations), which could cause
+  // Hager's algorithm to vastly underestimate ||matrix||_1.
  Scalar alternating_sign(RealScalar(1));
  for (Index i = 0; i < n; ++i) {
-    // The static_cast is needed when Scalar is complex and RealScalar uses expression templates.
+    // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates
    v[i] = alternating_sign * static_cast<RealScalar>(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));
    alternating_sign = -alternating_sign;
  }
  v = dec.solve(v);
-  const RealScalar alt_est = (RealScalar(2) * v.template lpNorm<1>()) / (RealScalar(3) * RealScalar(n));
-  return numext::maxi(lower_bound, alt_est);
+  const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n));
+  return numext::maxi(lower_bound, alternate_lower_bound);
 }

 /** \brief Reciprocal condition number estimator.
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
--- a/Eigen/src/Core/CoreIterators.h
+++ b/Eigen/src/Core/CoreIterators.h
@@ -57,7 +57,7 @@ class InnerIterator {
    m_iter.operator+=(i);
    return *this;
  }
-  EIGEN_STRONG_INLINE InnerIterator operator+(Index i) const {
+  EIGEN_STRONG_INLINE InnerIterator operator+(Index i) {
    InnerIterator result(*this);
    result += i;
    return result;
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -98,33 +98,33 @@ class CwiseBinaryOp : public CwiseBinaryOpImpl<BinaryOp, LhsType, RhsType,
  typedef std::remove_reference_t<RhsNested> RhsNested_;

 #if EIGEN_COMP_MSVC
-  // Required for Visual Studio, which may fail to inline the copy constructor otherwise.
+  // Required for Visual Studio or the Copy constructor will probably not get inlined!
  EIGEN_STRONG_INLINE CwiseBinaryOp(const CwiseBinaryOp<BinaryOp, LhsType, RhsType>&) = default;
 #endif

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs,
-                                                                const BinaryOp& func = BinaryOp())
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs,
+                                                      const BinaryOp& func = BinaryOp())
      : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) {
    eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
  }

-  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept {
    // return the fixed size type if available to enable compile time optimizations
    return internal::traits<internal::remove_all_t<LhsNested>>::RowsAtCompileTime == Dynamic ? m_rhs.rows()
                                                                                             : m_lhs.rows();
  }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept {
    // return the fixed size type if available to enable compile time optimizations
    return internal::traits<internal::remove_all_t<LhsNested>>::ColsAtCompileTime == Dynamic ? m_rhs.cols()
                                                                                             : m_lhs.cols();
  }

  /** \returns the left hand side nested expression */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const LhsNested_& lhs() const { return m_lhs; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNested_& lhs() const { return m_lhs; }
  /** \returns the right hand side nested expression */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const RhsNested_& rhs() const { return m_rhs; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNested_& rhs() const { return m_rhs; }
  /** \returns the functor representing the binary operation */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const BinaryOp& functor() const { return m_functor; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const BinaryOp& functor() const { return m_functor; }

 protected:
  LhsNested m_lhs;
@@ -145,7 +145,7 @@ class CwiseBinaryOpImpl : public internal::generic_xpr_base<CwiseBinaryOp<Binary
 */
 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Derived& MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived>& other) {
  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar, typename OtherDerived::Scalar>());
  return derived();
 }
@@ -156,7 +156,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Derived& MatrixBase<Derived>::operator-=(c
 */
 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Derived& MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) {
  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar, typename OtherDerived::Scalar>());
  return derived();
 }
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -66,21 +66,21 @@ class CwiseNullaryOp : public internal::dense_xpr_base<CwiseNullaryOp<NullaryOp,
  typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
  EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)

-  EIGEN_DEVICE_FUNC constexpr CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
+  EIGEN_DEVICE_FUNC CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
      : m_rows(rows), m_cols(cols), m_functor(func) {
    eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 &&
                 (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
  }
-  EIGEN_DEVICE_FUNC constexpr CwiseNullaryOp(Index size, const NullaryOp& func = NullaryOp())
+  EIGEN_DEVICE_FUNC CwiseNullaryOp(Index size, const NullaryOp& func = NullaryOp())
      : CwiseNullaryOp(RowsAtCompileTime == 1 ? 1 : size, RowsAtCompileTime == 1 ? size : 1, func) {
    EIGEN_STATIC_ASSERT(CwiseNullaryOp::IsVectorAtCompileTime, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX);
  }

-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows.value(); }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols.value(); }

  /** \returns the functor representing the nullary operation */
-  EIGEN_DEVICE_FUNC constexpr const NullaryOp& functor() const { return m_functor; }
+  EIGEN_DEVICE_FUNC const NullaryOp& functor() const { return m_functor; }

 protected:
  const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
--- a/Eigen/src/Core/CwiseTernaryOp.h
+++ b/Eigen/src/Core/CwiseTernaryOp.h
@@ -118,7 +118,7 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<TernaryOp, Arg1Type, Arg2Type,
    eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && a1.rows() == a3.rows() && a1.cols() == a3.cols());
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Index rows() const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const {
    // return the fixed size type if available to enable compile time
    // optimizations
    if (internal::traits<internal::remove_all_t<Arg1Nested>>::RowsAtCompileTime == Dynamic &&
@@ -130,7 +130,7 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<TernaryOp, Arg1Type, Arg2Type,
    else
      return m_arg1.rows();
  }
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Index cols() const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const {
    // return the fixed size type if available to enable compile time
    // optimizations
    if (internal::traits<internal::remove_all_t<Arg1Nested>>::ColsAtCompileTime == Dynamic &&
@@ -144,13 +144,13 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<TernaryOp, Arg1Type, Arg2Type,
  }

  /** \returns the first argument nested expression */
-  EIGEN_DEVICE_FUNC constexpr const Arg1Nested_& arg1() const { return m_arg1; }
+  EIGEN_DEVICE_FUNC const Arg1Nested_& arg1() const { return m_arg1; }
  /** \returns the first argument nested expression */
-  EIGEN_DEVICE_FUNC constexpr const Arg2Nested_& arg2() const { return m_arg2; }
+  EIGEN_DEVICE_FUNC const Arg2Nested_& arg2() const { return m_arg2; }
  /** \returns the third argument nested expression */
-  EIGEN_DEVICE_FUNC constexpr const Arg3Nested_& arg3() const { return m_arg3; }
+  EIGEN_DEVICE_FUNC const Arg3Nested_& arg3() const { return m_arg3; }
  /** \returns the functor representing the ternary operation */
-  EIGEN_DEVICE_FUNC constexpr const TernaryOp& functor() const { return m_functor; }
+  EIGEN_DEVICE_FUNC const TernaryOp& functor() const { return m_functor; }

 protected:
  Arg1Nested m_arg1;
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -57,26 +57,22 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
  typedef typename internal::ref_selector<XprType>::type XprTypeNested;
  typedef internal::remove_all_t<XprType> NestedExpression;

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit CwiseUnaryOp(const XprType& xpr,
-                                                                        const UnaryOp& func = UnaryOp())
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
      : m_xpr(xpr), m_functor(func) {}

-  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_xpr.rows(); }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_xpr.cols(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_xpr.rows(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_xpr.cols(); }

  /** \returns the functor representing the unary operation */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const UnaryOp& functor() const { return m_functor; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& functor() const { return m_functor; }

  /** \returns the nested expression */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const internal::remove_all_t<XprTypeNested>& nestedExpression()
-      const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const internal::remove_all_t<XprTypeNested>& nestedExpression() const {
    return m_xpr;
  }

  /** \returns the nested expression */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE internal::remove_all_t<XprTypeNested>& nestedExpression() {
-    return m_xpr;
-  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::remove_all_t<XprTypeNested>& nestedExpression() { return m_xpr; }

 protected:
  XprTypeNested m_xpr;
--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -140,24 +140,22 @@ class CwiseUnaryView : public internal::CwiseUnaryViewImpl<ViewOp, MatrixType, S
  typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
  typedef internal::remove_all_t<MatrixType> NestedExpression;

-  explicit EIGEN_DEVICE_FUNC constexpr inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
+  explicit EIGEN_DEVICE_FUNC inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
      : m_matrix(mat), m_functor(func) {}

  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)

-  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_matrix.rows(); }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_matrix.cols(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_matrix.rows(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_matrix.cols(); }

  /** \returns the functor representing unary operation */
-  EIGEN_DEVICE_FUNC constexpr const ViewOp& functor() const { return m_functor; }
+  EIGEN_DEVICE_FUNC const ViewOp& functor() const { return m_functor; }

  /** \returns the nested expression */
-  EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t<MatrixTypeNested>& nestedExpression() const {
-    return m_matrix;
-  }
+  EIGEN_DEVICE_FUNC const internal::remove_all_t<MatrixTypeNested>& nestedExpression() const { return m_matrix; }

  /** \returns the nested expression */
-  EIGEN_DEVICE_FUNC constexpr std::remove_reference_t<MatrixTypeNested>& nestedExpression() { return m_matrix; }
+  EIGEN_DEVICE_FUNC std::remove_reference_t<MatrixTypeNested>& nestedExpression() { return m_matrix; }

 protected:
  MatrixTypeNested m_matrix;
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -260,21 +260,21 @@ class DenseBase

  /** Copies \a other into *this. \returns a reference to *this. */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other);

  /** Special case of the template operator=, in order to prevent the compiler
   * from generating a default operator= (issue hit with g++ 4.1)
   */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other);

  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr Derived& operator=(const EigenBase<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase<OtherDerived>& other);

  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr Derived& operator+=(const EigenBase<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC Derived& operator+=(const EigenBase<OtherDerived>& other);

  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr Derived& operator-=(const EigenBase<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC Derived& operator-=(const EigenBase<OtherDerived>& other);

  template <typename OtherDerived>
  EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue<OtherDerived>& func);
@@ -283,7 +283,7 @@ class DenseBase
   * Copies \a other into *this without evaluating other. \returns a reference to *this. */
  template <typename OtherDerived>
  /** \deprecated */
-  EIGEN_DEPRECATED EIGEN_DEVICE_FUNC constexpr Derived& lazyAssign(const DenseBase<OtherDerived>& other);
+  EIGEN_DEPRECATED EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase<OtherDerived>& other);

  EIGEN_DEVICE_FUNC CommaInitializer<Derived> operator<<(const Scalar& s);

@@ -348,13 +348,13 @@ class DenseBase
  EIGEN_DEVICE_FUNC Derived& setRandom();

  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr bool isApprox(const DenseBase<OtherDerived>& other,
-                                            const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-  EIGEN_DEVICE_FUNC constexpr bool isMuchSmallerThan(
-      const RealScalar& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+  EIGEN_DEVICE_FUNC bool isApprox(const DenseBase<OtherDerived>& other,
+                                  const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+  EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const RealScalar& other,
+                                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr bool isMuchSmallerThan(
-      const DenseBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+  EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
+                                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;

  EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value,
                                            const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
@@ -366,13 +366,13 @@ class DenseBase
  EIGEN_DEVICE_FUNC inline bool hasNaN() const;
  EIGEN_DEVICE_FUNC inline bool allFinite() const;

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other);
-  template <bool Enable = internal::complex_array_access<Scalar>::value, typename = std::enable_if_t<Enable>>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator*=(const RealScalar& other);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other);
+  template <bool Enable = !internal::is_same<Scalar, RealScalar>::value, typename = std::enable_if_t<Enable>>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const RealScalar& other);

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other);
-  template <bool Enable = internal::complex_array_access<Scalar>::value, typename = std::enable_if_t<Enable>>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator/=(const RealScalar& other);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other);
+  template <bool Enable = !internal::is_same<Scalar, RealScalar>::value, typename = std::enable_if_t<Enable>>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const RealScalar& other);

  typedef internal::add_const_on_value_type_t<typename internal::eval<Derived>::type> EvalReturnType;
  /** \returns the matrix or vector obtained by evaluating this expression.
@@ -409,7 +409,7 @@ class DenseBase
    call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
  }

-  EIGEN_DEVICE_FUNC constexpr inline const NestByValue<Derived> nestByValue() const;
+  EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
  EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
  EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
  template <bool Enable>
@@ -431,7 +431,8 @@ class DenseBase

  // By default, the fastest version with undefined NaN propagation semantics is
  // used.
-  // TODO(rmlarsen): Replace with default template argument (C++14 is now the minimum standard).
+  // TODO(rmlarsen): Replace with default template argument when we move to
+  // c++11 or beyond.
  EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar minCoeff() const {
    return minCoeff<PropagateFast>();
  }
@@ -448,7 +449,7 @@ class DenseBase
  template <int NaNPropagation, typename IndexType>
  EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;

-  // TODO(rmlarsen): Replace these methods with a default template argument (C++14 is now the minimum standard).
+  // TODO(rmlarsen): Replace these methods with a default template argument.
  template <typename IndexType>
  EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const {
    return minCoeff<PropagateFast>(row, col);
@@ -523,25 +524,25 @@ class DenseBase
  static const RandomReturnType Random();

  template <typename ThenDerived, typename ElseDerived>
-  inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp<
-      internal::scalar_boolean_select_op<typename DenseBase<ThenDerived>::Scalar,
-                                         typename DenseBase<ElseDerived>::Scalar, Scalar>,
-      ThenDerived, ElseDerived, Derived>
-  select(const DenseBase<ThenDerived>& thenMatrix, const DenseBase<ElseDerived>& elseMatrix) const;
+  inline EIGEN_DEVICE_FUNC
+      CwiseTernaryOp<internal::scalar_boolean_select_op<typename DenseBase<ThenDerived>::Scalar,
+                                                        typename DenseBase<ElseDerived>::Scalar, Scalar>,
+                     ThenDerived, ElseDerived, Derived>
+      select(const DenseBase<ThenDerived>& thenMatrix, const DenseBase<ElseDerived>& elseMatrix) const;

  template <typename ThenDerived>
-  inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp<
-      internal::scalar_boolean_select_op<typename DenseBase<ThenDerived>::Scalar,
-                                         typename DenseBase<ThenDerived>::Scalar, Scalar>,
-      ThenDerived, typename DenseBase<ThenDerived>::ConstantReturnType, Derived>
-  select(const DenseBase<ThenDerived>& thenMatrix, const typename DenseBase<ThenDerived>::Scalar& elseScalar) const;
+  inline EIGEN_DEVICE_FUNC
+      CwiseTernaryOp<internal::scalar_boolean_select_op<typename DenseBase<ThenDerived>::Scalar,
+                                                        typename DenseBase<ThenDerived>::Scalar, Scalar>,
+                     ThenDerived, typename DenseBase<ThenDerived>::ConstantReturnType, Derived>
+      select(const DenseBase<ThenDerived>& thenMatrix, const typename DenseBase<ThenDerived>::Scalar& elseScalar) const;

  template <typename ElseDerived>
-  inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp<
-      internal::scalar_boolean_select_op<typename DenseBase<ElseDerived>::Scalar,
-                                         typename DenseBase<ElseDerived>::Scalar, Scalar>,
-      typename DenseBase<ElseDerived>::ConstantReturnType, ElseDerived, Derived>
-  select(const typename DenseBase<ElseDerived>::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+  inline EIGEN_DEVICE_FUNC
+      CwiseTernaryOp<internal::scalar_boolean_select_op<typename DenseBase<ElseDerived>::Scalar,
+                                                        typename DenseBase<ElseDerived>::Scalar, Scalar>,
+                     typename DenseBase<ElseDerived>::ConstantReturnType, ElseDerived, Derived>
+      select(const typename DenseBase<ElseDerived>::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;

  template <int p>
  RealScalar lpNorm() const;
@@ -579,12 +580,12 @@ class DenseBase
 #else
  typedef std::conditional_t<(Flags & DirectAccessBit) == DirectAccessBit,
                             internal::pointer_based_stl_iterator<Derived>,
-                             internal::generic_randaccess_stl_iterator<Derived>>
+                             internal::generic_randaccess_stl_iterator<Derived> >
      iterator_type;

  typedef std::conditional_t<(Flags & DirectAccessBit) == DirectAccessBit,
                             internal::pointer_based_stl_iterator<const Derived>,
-                             internal::generic_randaccess_stl_iterator<const Derived>>
+                             internal::generic_randaccess_stl_iterator<const Derived> >
      const_iterator_type;

  // Stl-style iterators are supported only for vectors.
@@ -614,7 +615,6 @@ class DenseBase
 #define EIGEN_DOC_UNARY_ADDONS(X, Y)
 #include "../plugins/CommonCwiseUnaryOps.inc"
 #include "../plugins/BlockMethods.inc"
-// Defines operator()(const RowIndices&, const ColIndices&) and other indexed view methods.
 #include "../plugins/IndexedViewMethods.inc"
 #include "../plugins/ReshapedMethods.inc"
 #ifdef EIGEN_DENSEBASE_PLUGIN
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -67,14 +67,14 @@ class DenseCoeffsBase<Derived, ReadOnlyAccessors> : public EigenBase<Derived> {
  using Base::rows;
  using Base::size;

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const {
    return int(Derived::RowsAtCompileTime) == 1   ? 0
           : int(Derived::ColsAtCompileTime) == 1 ? inner
           : int(Derived::Flags) & RowMajorBit    ? outer
                                                  : inner;
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const {
    return int(Derived::ColsAtCompileTime) == 1   ? 0
           : int(Derived::RowsAtCompileTime) == 1 ? inner
           : int(Derived::Flags) & RowMajorBit    ? inner
@@ -95,12 +95,12 @@ class DenseCoeffsBase<Derived, ReadOnlyAccessors> : public EigenBase<Derived> {
   *
   * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
   */
-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType coeff(Index row, Index col) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const {
    eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
    return internal::evaluator<Derived>(derived()).coeff(row, col);
  }

-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType coeffByOuterInner(Index outer, Index inner) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeffByOuterInner(Index outer, Index inner) const {
    return coeff(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner));
  }

@@ -108,19 +108,11 @@ class DenseCoeffsBase<Derived, ReadOnlyAccessors> : public EigenBase<Derived> {
   *
   * \sa operator()(Index,Index), operator[](Index)
   */
-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType operator()(Index row, Index col) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator()(Index row, Index col) const {
    eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
    return coeff(row, col);
  }

-#ifdef EIGEN_MULTIDIMENSIONAL_SUBSCRIPT
-  /** \returns the coefficient at given the given row and column.
-   *
-   * \sa operator[](Index,Index), operator[](Index)
-   */
-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType operator[](Index row, Index col) const { return operator()(row, col); }
-#endif
-
  /** Short version: don't use this function, use
   * \link operator[](Index) const \endlink instead.
   *
@@ -136,7 +128,7 @@ class DenseCoeffsBase<Derived, ReadOnlyAccessors> : public EigenBase<Derived> {
   * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
   */

-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType coeff(Index index) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const {
    EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
                        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
    eigen_internal_assert(index >= 0 && index < size());
@@ -151,7 +143,7 @@ class DenseCoeffsBase<Derived, ReadOnlyAccessors> : public EigenBase<Derived> {
   * z() const, w() const
   */

-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType operator[](Index index) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator[](Index index) const {
    EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
    eigen_assert(index >= 0 && index < size());
@@ -168,32 +160,32 @@ class DenseCoeffsBase<Derived, ReadOnlyAccessors> : public EigenBase<Derived> {
   * z() const, w() const
   */

-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType operator()(Index index) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator()(Index index) const {
    eigen_assert(index >= 0 && index < size());
    return coeff(index);
  }

  /** equivalent to operator[](0).  */

-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType x() const { return (*this)[0]; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType x() const { return (*this)[0]; }

  /** equivalent to operator[](1).  */

-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType y() const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType y() const {
    EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS);
    return (*this)[1];
  }

  /** equivalent to operator[](2).  */

-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType z() const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType z() const {
    EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS);
    return (*this)[2];
  }

  /** equivalent to operator[](3).  */

-  EIGEN_DEVICE_FUNC constexpr CoeffReturnType w() const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType w() const {
    EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS);
    return (*this)[3];
  }
@@ -311,12 +303,12 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
   *
   * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
   */
-  EIGEN_DEVICE_FUNC constexpr Scalar& coeffRef(Index row, Index col) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) {
    eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
    return internal::evaluator<Derived>(derived()).coeffRef(row, col);
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRefByOuterInner(Index outer, Index inner) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRefByOuterInner(Index outer, Index inner) {
    return coeffRef(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner));
  }

@@ -324,19 +316,12 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
   *
   * \sa operator[](Index)
   */
-  EIGEN_DEVICE_FUNC constexpr Scalar& operator()(Index row, Index col) {
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator()(Index row, Index col) {
    eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
    return coeffRef(row, col);
  }

-#ifdef EIGEN_MULTIDIMENSIONAL_SUBSCRIPT
-  /** \returns a reference to the coefficient at given the given row and column.
-   *
-   * \sa operator[](Index)
-   */
-  EIGEN_DEVICE_FUNC constexpr Scalar& operator[](Index row, Index col) { return operator()(row, col); }
-#endif
-
  /** Short version: don't use this function, use
   * \link operator[](Index) \endlink instead.
   *
@@ -352,7 +337,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
   * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
   */

-  EIGEN_DEVICE_FUNC constexpr Scalar& coeffRef(Index index) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) {
    EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
                        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
    eigen_internal_assert(index >= 0 && index < size());
@@ -366,7 +351,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
   * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
   */

-  EIGEN_DEVICE_FUNC constexpr Scalar& operator[](Index index) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator[](Index index) {
    EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
    eigen_assert(index >= 0 && index < size());
@@ -382,32 +367,32 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
   * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
   */

-  EIGEN_DEVICE_FUNC constexpr Scalar& operator()(Index index) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator()(Index index) {
    eigen_assert(index >= 0 && index < size());
    return coeffRef(index);
  }

  /** equivalent to operator[](0).  */

-  EIGEN_DEVICE_FUNC constexpr Scalar& x() { return (*this)[0]; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& x() { return (*this)[0]; }

  /** equivalent to operator[](1).  */

-  EIGEN_DEVICE_FUNC constexpr Scalar& y() {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& y() {
    EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS);
    return (*this)[1];
  }

  /** equivalent to operator[](2).  */

-  EIGEN_DEVICE_FUNC constexpr Scalar& z() {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& z() {
    EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS);
    return (*this)[2];
  }

  /** equivalent to operator[](3).  */

-  EIGEN_DEVICE_FUNC constexpr Scalar& w() {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& w() {
    EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS);
    return (*this)[3];
  }
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -54,7 +54,7 @@ template <typename T, int Size, int MatrixOrArrayOptions,
 struct plain_array {
  EIGEN_ALIGN_TO_BOUNDARY(Alignment) T array[Size];
 #if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR)
-  EIGEN_DEVICE_FUNC constexpr plain_array() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default;
 #else
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() {
    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment)
@@ -68,7 +68,7 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 0> {
  // on some 32-bit platforms, stack-allocated arrays are aligned to 4 bytes, not the preferred alignment of T
  EIGEN_ALIGN_TO_BOUNDARY(alignof(T)) T array[Size];
 #if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR)
-  EIGEN_DEVICE_FUNC constexpr plain_array() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default;
 #else
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() { EIGEN_MAKE_STACK_ALLOCATION_ASSERT(Size * sizeof(T)) }
 #endif
@@ -92,8 +92,8 @@ class DenseStorage_impl {

 public:
 #ifndef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
 #else
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
@@ -103,18 +103,19 @@ class DenseStorage_impl {
    smart_copy(other.m_data.array, other.m_data.array + Size, m_data.array);
  }
 #endif
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) {
    numext::swap(m_data, other.m_data);
  }
-  EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
-  EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * Cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/,
+                                                                          Index /*cols*/) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; }
 };
 template <typename T, int Size, int Cols, int Options>
 class DenseStorage_impl<T, Size, Dynamic, Cols, Options> {
@@ -122,7 +123,7 @@ class DenseStorage_impl<T, Size, Dynamic, Cols, Options> {
  Index m_rows = 0;

 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other)
      : m_rows(other.m_rows) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size())
@@ -131,7 +132,7 @@ class DenseStorage_impl<T, Size, Dynamic, Cols, Options> {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index /*cols*/)
      : m_rows(rows) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
-    EIGEN_UNUSED_VARIABLE(size);
+    EIGEN_UNUSED_VARIABLE(size)
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) {
    smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array);
@@ -142,13 +143,17 @@ class DenseStorage_impl<T, Size, Dynamic, Cols, Options> {
    swap_plain_array(m_data, other.m_data, size(), other.size());
    numext::swap(m_rows, other.m_rows);
  }
-  EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; }
-  EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * Cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) {
+    m_rows = rows;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) {
+    m_rows = rows;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; }
 };
 template <typename T, int Size, int Rows, int Options>
 class DenseStorage_impl<T, Size, Rows, Dynamic, Options> {
@@ -156,7 +161,7 @@ class DenseStorage_impl<T, Size, Rows, Dynamic, Options> {
  Index m_cols = 0;

 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other)
      : m_cols(other.m_cols) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size())
@@ -165,7 +170,7 @@ class DenseStorage_impl<T, Size, Rows, Dynamic, Options> {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index /*rows*/, Index cols)
      : m_cols(cols) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
-    EIGEN_UNUSED_VARIABLE(size);
+    EIGEN_UNUSED_VARIABLE(size)
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) {
    smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array);
@@ -176,13 +181,17 @@ class DenseStorage_impl<T, Size, Rows, Dynamic, Options> {
    swap_plain_array(m_data, other.m_data, size(), other.size());
    numext::swap(m_cols, other.m_cols);
  }
-  EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; }
-  EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * m_cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) {
+    m_cols = cols;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) {
+    m_cols = cols;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; }
 };
 template <typename T, int Size, int Options>
 class DenseStorage_impl<T, Size, Dynamic, Dynamic, Options> {
@@ -191,7 +200,7 @@ class DenseStorage_impl<T, Size, Dynamic, Dynamic, Options> {
  Index m_cols = 0;

 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other)
      : m_rows(other.m_rows), m_cols(other.m_cols) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size())
@@ -200,7 +209,7 @@ class DenseStorage_impl<T, Size, Dynamic, Dynamic, Options> {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index cols)
      : m_rows(rows), m_cols(cols) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
-    EIGEN_UNUSED_VARIABLE(size);
+    EIGEN_UNUSED_VARIABLE(size)
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) {
    smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array);
@@ -213,72 +222,87 @@ class DenseStorage_impl<T, Size, Dynamic, Dynamic, Options> {
    numext::swap(m_rows, other.m_rows);
    numext::swap(m_cols, other.m_cols);
  }
-  EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) {
    m_rows = rows;
    m_cols = cols;
  }
-  EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index rows, Index cols) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) {
    m_rows = rows;
    m_cols = cols;
  }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * m_cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; }
 };
 // null matrix variants
 template <typename T, int Rows, int Cols, int Options>
 class DenseStorage_impl<T, 0, Rows, Cols, Options> {
 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
-  EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl&) {}
-  EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
-  EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * Cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return nullptr; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return nullptr; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl&) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/,
+                                                                          Index /*cols*/) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; }
 };
 template <typename T, int Cols, int Options>
 class DenseStorage_impl<T, 0, Dynamic, Cols, Options> {
  Index m_rows = 0;

 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index rows, Index /*cols*/) : m_rows(rows) {}
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
-  EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_rows, other.m_rows); }
-  EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; }
-  EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * Cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return nullptr; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return nullptr; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index /*cols*/)
+      : m_rows(rows) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept {
+    numext::swap(m_rows, other.m_rows);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) {
+    m_rows = rows;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) {
+    m_rows = rows;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; }
 };
 template <typename T, int Rows, int Options>
 class DenseStorage_impl<T, 0, Rows, Dynamic, Options> {
  Index m_cols = 0;

 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index cols) : m_cols(cols) {}
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
-  EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_cols, other.m_cols); }
-  EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; }
-  EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * m_cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return nullptr; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return nullptr; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index cols)
+      : m_cols(cols) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept {
+    numext::swap(m_cols, other.m_cols);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) {
+    m_cols = cols;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) {
+    m_cols = cols;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; }
 };
 template <typename T, int Options>
 class DenseStorage_impl<T, 0, Dynamic, Dynamic, Options> {
@@ -286,27 +310,28 @@ class DenseStorage_impl<T, 0, Dynamic, Dynamic, Options> {
  Index m_cols = 0;

 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
-  EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index cols)
+      : m_rows(rows), m_cols(cols) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept {
    numext::swap(m_rows, other.m_rows);
    numext::swap(m_cols, other.m_cols);
  }
-  EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) {
    m_rows = rows;
    m_cols = cols;
  }
-  EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index rows, Index cols) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) {
    m_rows = rows;
    m_cols = cols;
  }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * m_cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return nullptr; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return nullptr; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; }
 };
 // fixed-size matrix with dynamic memory allocation not currently supported
 template <typename T, int Rows, int Cols, int Options>
@@ -320,7 +345,7 @@ class DenseStorage_impl<T, Dynamic, Dynamic, Cols, Options> {

 public:
  static constexpr int Size = Dynamic;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other)
      : m_data(conditional_aligned_new_auto<T, Align>(other.size())), m_rows(other.m_rows) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size())
@@ -330,7 +355,7 @@ class DenseStorage_impl<T, Dynamic, Dynamic, Cols, Options> {
      : m_data(conditional_aligned_new_auto<T, Align>(size)), m_rows(rows) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
  }
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept
      : m_data(other.m_data), m_rows(other.m_rows) {
    other.m_data = nullptr;
    other.m_rows = 0;
@@ -341,11 +366,11 @@ class DenseStorage_impl<T, Dynamic, Dynamic, Cols, Options> {
    smart_copy(other.m_data, other.m_data + other.size(), m_data);
    return *this;
  }
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept {
    this->swap(other);
    return *this;
  }
-  EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept {
    numext::swap(m_data, other.m_data);
    numext::swap(m_rows, other.m_rows);
  }
@@ -362,11 +387,11 @@ class DenseStorage_impl<T, Dynamic, Dynamic, Cols, Options> {
    }
    m_rows = rows;
  }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * Cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return m_data; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; }
 };
 template <typename T, int Rows, int Options>
 class DenseStorage_impl<T, Dynamic, Rows, Dynamic, Options> {
@@ -376,7 +401,7 @@ class DenseStorage_impl<T, Dynamic, Rows, Dynamic, Options> {

 public:
  static constexpr int Size = Dynamic;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other)
      : m_data(conditional_aligned_new_auto<T, Align>(other.size())), m_cols(other.m_cols) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size())
@@ -386,7 +411,7 @@ class DenseStorage_impl<T, Dynamic, Rows, Dynamic, Options> {
      : m_data(conditional_aligned_new_auto<T, Align>(size)), m_cols(cols) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
  }
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept
      : m_data(other.m_data), m_cols(other.m_cols) {
    other.m_data = nullptr;
    other.m_cols = 0;
@@ -397,11 +422,11 @@ class DenseStorage_impl<T, Dynamic, Rows, Dynamic, Options> {
    smart_copy(other.m_data, other.m_data + other.size(), m_data);
    return *this;
  }
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept {
    this->swap(other);
    return *this;
  }
-  EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept {
    numext::swap(m_data, other.m_data);
    numext::swap(m_cols, other.m_cols);
  }
@@ -418,11 +443,11 @@ class DenseStorage_impl<T, Dynamic, Rows, Dynamic, Options> {
    }
    m_cols = cols;
  }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * m_cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return m_data; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; }
 };
 template <typename T, int Options>
 class DenseStorage_impl<T, Dynamic, Dynamic, Dynamic, Options> {
@@ -433,7 +458,7 @@ class DenseStorage_impl<T, Dynamic, Dynamic, Dynamic, Options> {

 public:
  static constexpr int Size = Dynamic;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other)
      : m_data(conditional_aligned_new_auto<T, Align>(other.size())), m_rows(other.m_rows), m_cols(other.m_cols) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size())
@@ -443,7 +468,7 @@ class DenseStorage_impl<T, Dynamic, Dynamic, Dynamic, Options> {
      : m_data(conditional_aligned_new_auto<T, Align>(size)), m_rows(rows), m_cols(cols) {
    EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
  }
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept
      : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {
    other.m_data = nullptr;
    other.m_rows = 0;
@@ -455,11 +480,11 @@ class DenseStorage_impl<T, Dynamic, Dynamic, Dynamic, Options> {
    smart_copy(other.m_data, other.m_data + other.size(), m_data);
    return *this;
  }
-  EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept {
    this->swap(other);
    return *this;
  }
-  EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept {
    numext::swap(m_data, other.m_data);
    numext::swap(m_rows, other.m_rows);
    numext::swap(m_cols, other.m_cols);
@@ -479,11 +504,11 @@ class DenseStorage_impl<T, Dynamic, Dynamic, Dynamic, Options> {
    m_rows = rows;
    m_cols = cols;
  }
-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
-  EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * m_cols; }
-  EIGEN_DEVICE_FUNC constexpr T* data() { return m_data; }
-  EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; }
 };
 template <typename T, int Size, int Rows, int Cols>
 struct use_default_move {
@@ -512,14 +537,15 @@ class DenseStorage : public internal::DenseStorage_impl<T, Size, Rows, Cols, Opt
  using Base = internal::DenseStorage_impl<T, Size, Rows, Cols, Options>;

 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage() = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) : Base(size, rows, cols) {}
-  EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols)
+      : Base(size, rows, cols) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default;
  // if DenseStorage meets the requirements of use_default_move, then use the move construction and move assignment
  // operation defined in DenseStorage_impl, or the compiler-generated version if none is defined
-  EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&&) = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&&) = default;
 };
 template <typename T, int Size, int Rows, int Cols, int Options>
 class DenseStorage<T, Size, Rows, Cols, Options, false>
@@ -527,15 +553,16 @@ class DenseStorage<T, Size, Rows, Cols, Options, false>
  using Base = internal::DenseStorage_impl<T, Size, Rows, Cols, Options>;

 public:
-  EIGEN_DEVICE_FUNC constexpr DenseStorage() = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default;
-  EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) : Base(size, rows, cols) {}
-  EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols)
+      : Base(size, rows, cols) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default;
  // if DenseStorage does not meet the requirements of use_default_move, then defer to the copy construction and copy
  // assignment behavior
-  EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&& other)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&& other)
      : DenseStorage(static_cast<const DenseStorage&>(other)) {}
-  EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&& other) {
    *this = other;
    return *this;
  }
--- a/Eigen/src/Core/DeviceWrapper.h
+++ b/Eigen/src/Core/DeviceWrapper.h
@@ -87,7 +87,7 @@ template <typename Kernel, typename Device, int Traversal = Kernel::AssignmentTr
          int Unrolling = Kernel::AssignmentTraits::Unrolling>
 struct dense_assignment_loop_with_device {
  using Base = dense_assignment_loop<Kernel, Traversal, Unrolling>;
-  static EIGEN_DEVICE_FUNC constexpr void run(Kernel& kernel, Device&) { Base::run(kernel); }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Device&) { Base::run(kernel); }
 };

 // entry point for a generic expression with device
@@ -104,7 +104,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(De
  using ActualDstType = std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&>;
  ActualDstType actualDst(dst.derived());

-  // TODO: check whether this is the right place to perform these checks:
+  // TODO check whether this is the right place to perform these checks:
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
  EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -71,14 +71,14 @@ class Diagonal : public internal::dense_xpr_base<Diagonal<MatrixType, DiagIndex_
  typedef typename internal::dense_xpr_base<Diagonal>::type Base;
  EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)

-  EIGEN_DEVICE_FUNC constexpr explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex)
+  EIGEN_DEVICE_FUNC explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex)
      : m_matrix(matrix), m_index(a_index) {
    eigen_assert(a_index <= m_matrix.cols() && -a_index <= m_matrix.rows());
  }

  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)

-  EIGEN_DEVICE_FUNC constexpr inline Index rows() const {
+  EIGEN_DEVICE_FUNC inline Index rows() const {
    return m_index.value() < 0 ? numext::mini<Index>(m_matrix.cols(), m_matrix.rows() + m_index.value())
                               : numext::mini<Index>(m_matrix.rows(), m_matrix.cols() - m_index.value());
  }
@@ -91,12 +91,8 @@ class Diagonal : public internal::dense_xpr_base<Diagonal<MatrixType, DiagIndex_

  typedef std::conditional_t<internal::is_lvalue<MatrixType>::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue;

-  EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() {
-    return rows() > 0 ? &(m_matrix.coeffRef(rowOffset(), colOffset())) : nullptr;
-  }
-  EIGEN_DEVICE_FUNC inline const Scalar* data() const {
-    return rows() > 0 ? &(m_matrix.coeffRef(rowOffset(), colOffset())) : nullptr;
-  }
+  EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
+  EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }

  EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index) {
    EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
@@ -124,12 +120,11 @@ class Diagonal : public internal::dense_xpr_base<Diagonal<MatrixType, DiagIndex_
    return m_matrix.coeff(idx + rowOffset(), idx + colOffset());
  }

-  EIGEN_DEVICE_FUNC constexpr inline const internal::remove_all_t<typename MatrixType::Nested>& nestedExpression()
-      const {
+  EIGEN_DEVICE_FUNC inline const internal::remove_all_t<typename MatrixType::Nested>& nestedExpression() const {
    return m_matrix;
  }

-  EIGEN_DEVICE_FUNC constexpr inline Index index() const { return m_index.value(); }
+  EIGEN_DEVICE_FUNC inline Index index() const { return m_index.value(); }

 protected:
  typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
@@ -137,11 +132,15 @@ class Diagonal : public internal::dense_xpr_base<Diagonal<MatrixType, DiagIndex_

 private:
  // some compilers may fail to optimize std::max etc in case of compile-time constants...
-  EIGEN_DEVICE_FUNC constexpr Index absDiagIndex() const noexcept {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index absDiagIndex() const noexcept {
    return m_index.value() > 0 ? m_index.value() : -m_index.value();
  }
-  EIGEN_DEVICE_FUNC constexpr Index rowOffset() const noexcept { return m_index.value() > 0 ? 0 : -m_index.value(); }
-  EIGEN_DEVICE_FUNC constexpr Index colOffset() const noexcept { return m_index.value() > 0 ? m_index.value() : 0; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowOffset() const noexcept {
+    return m_index.value() > 0 ? 0 : -m_index.value();
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colOffset() const noexcept {
+    return m_index.value() > 0 ? m_index.value() : 0;
+  }
  // trigger a compile-time error if someone try to call packet
  template <int LoadMode>
  typename MatrixType::PacketReturnType packet(Index) const;
@@ -158,13 +157,13 @@ class Diagonal : public internal::dense_xpr_base<Diagonal<MatrixType, DiagIndex_
 *
 * \sa class Diagonal */
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr typename MatrixBase<Derived>::DiagonalReturnType MatrixBase<Derived>::diagonal() {
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalReturnType MatrixBase<Derived>::diagonal() {
  return DiagonalReturnType(derived());
 }

 /** This is the const version of diagonal(). */
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr const typename MatrixBase<Derived>::ConstDiagonalReturnType MatrixBase<Derived>::diagonal()
+EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::ConstDiagonalReturnType MatrixBase<Derived>::diagonal()
    const {
  return ConstDiagonalReturnType(derived());
 }
@@ -181,14 +180,13 @@ EIGEN_DEVICE_FUNC constexpr const typename MatrixBase<Derived>::ConstDiagonalRet
 *
 * \sa MatrixBase::diagonal(), class Diagonal */
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr Diagonal<Derived, DynamicIndex> MatrixBase<Derived>::diagonal(Index index) {
+EIGEN_DEVICE_FUNC inline Diagonal<Derived, DynamicIndex> MatrixBase<Derived>::diagonal(Index index) {
  return Diagonal<Derived, DynamicIndex>(derived(), index);
 }

 /** This is the const version of diagonal(Index). */
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr const Diagonal<const Derived, DynamicIndex> MatrixBase<Derived>::diagonal(
-    Index index) const {
+EIGEN_DEVICE_FUNC inline const Diagonal<const Derived, DynamicIndex> MatrixBase<Derived>::diagonal(Index index) const {
  return Diagonal<const Derived, DynamicIndex>(derived(), index);
 }

@@ -205,14 +203,14 @@ EIGEN_DEVICE_FUNC constexpr const Diagonal<const Derived, DynamicIndex> MatrixBa
 * \sa MatrixBase::diagonal(), class Diagonal */
 template <typename Derived>
 template <int Index_>
-EIGEN_DEVICE_FUNC constexpr Diagonal<Derived, Index_> MatrixBase<Derived>::diagonal() {
+EIGEN_DEVICE_FUNC inline Diagonal<Derived, Index_> MatrixBase<Derived>::diagonal() {
  return Diagonal<Derived, Index_>(derived());
 }

 /** This is the const version of diagonal<int>(). */
 template <typename Derived>
 template <int Index_>
-EIGEN_DEVICE_FUNC constexpr const Diagonal<const Derived, Index_> MatrixBase<Derived>::diagonal() const {
+EIGEN_DEVICE_FUNC inline const Diagonal<const Derived, Index_> MatrixBase<Derived>::diagonal() const {
  return Diagonal<const Derived, Index_>(derived());
 }

--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -61,7 +61,7 @@ class DiagonalBase : public EigenBase<Derived> {
  /**
   * Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type,
   * not an expression.
-   * \returns A dense matrix, with its diagonal entries set from the derived object. */
+   * \returns A dense matrix, with its diagonal entries set from the the derived object. */
  EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); }

  /** \returns a reference to the derived object's vector of diagonal coefficients. */
@@ -184,22 +184,21 @@ class DiagonalMatrix : public DiagonalBase<DiagonalMatrix<Scalar_, SizeAtCompile

 public:
  /** const version of diagonal(). */
-  EIGEN_DEVICE_FUNC constexpr inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
+  EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
  /** \returns a reference to the stored vector of diagonal coefficients. */
-  EIGEN_DEVICE_FUNC constexpr inline DiagonalVectorType& diagonal() { return m_diagonal; }
+  EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return m_diagonal; }

  /** Default constructor without initialization */
-  EIGEN_DEVICE_FUNC constexpr inline DiagonalMatrix() {}
+  EIGEN_DEVICE_FUNC inline DiagonalMatrix() {}

  /** Constructs a diagonal matrix with given dimension  */
-  EIGEN_DEVICE_FUNC constexpr explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
+  EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}

  /** 2D constructor. */
-  EIGEN_DEVICE_FUNC constexpr inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x, y) {}
+  EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x, y) {}

  /** 3D constructor. */
-  EIGEN_DEVICE_FUNC constexpr inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z)
-      : m_diagonal(x, y, z) {}
+  EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x, y, z) {}

  /** \brief Construct a diagonal matrix with fixed size from an arbitrary number of coefficients.
   *
@@ -210,8 +209,8 @@ class DiagonalMatrix : public DiagonalBase<DiagonalMatrix<Scalar_, SizeAtCompile
   * \sa DiagonalMatrix(const Scalar&, const Scalar&, const Scalar&)
   */
  template <typename... ArgTypes>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE DiagonalMatrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,
-                                                                 const ArgTypes&... args)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DiagonalMatrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,
+                                                       const ArgTypes&... args)
      : m_diagonal(a0, a1, a2, args...) {}

  /** \brief Constructs a DiagonalMatrix and initializes it by elements given by an initializer list of initializer
@@ -222,12 +221,11 @@ class DiagonalMatrix : public DiagonalBase<DiagonalMatrix<Scalar_, SizeAtCompile
      : m_diagonal(list) {}

  /** \brief Constructs a DiagonalMatrix from an r-value diagonal vector type */
-  EIGEN_DEVICE_FUNC constexpr explicit inline DiagonalMatrix(DiagonalVectorType&& diag) : m_diagonal(std::move(diag)) {}
+  EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(DiagonalVectorType&& diag) : m_diagonal(std::move(diag)) {}

  /** Copy constructor. */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other)
-      : m_diagonal(other.diagonal()) {}
+  EIGEN_DEVICE_FUNC inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}

 #ifndef EIGEN_PARSED_BY_DOXYGEN
  /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */
@@ -236,8 +234,7 @@ class DiagonalMatrix : public DiagonalBase<DiagonalMatrix<Scalar_, SizeAtCompile

  /** generic constructor from expression of the diagonal coefficients */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other)
-      : m_diagonal(other) {}
+  EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other) {}

  /** Copy operator. */
  template <typename OtherDerived>
@@ -328,11 +325,10 @@ class DiagonalWrapper : public DiagonalBase<DiagonalWrapper<DiagonalVectorType_>
 #endif

  /** Constructor from expression of diagonal coefficients to wrap. */
-  EIGEN_DEVICE_FUNC constexpr explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal)
-      : m_diagonal(a_diagonal) {}
+  EIGEN_DEVICE_FUNC explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}

  /** \returns a const reference to the wrapped expression of diagonal coefficients. */
-  EIGEN_DEVICE_FUNC constexpr const DiagonalVectorType& diagonal() const { return m_diagonal; }
+  EIGEN_DEVICE_FUNC const DiagonalVectorType& diagonal() const { return m_diagonal; }

 protected:
  typename DiagonalVectorType::Nested m_diagonal;
@@ -348,7 +344,7 @@ class DiagonalWrapper : public DiagonalBase<DiagonalWrapper<DiagonalVectorType_>
 * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
 **/
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr const DiagonalWrapper<const Derived> MatrixBase<Derived>::asDiagonal() const {
+EIGEN_DEVICE_FUNC inline const DiagonalWrapper<const Derived> MatrixBase<Derived>::asDiagonal() const {
  return DiagonalWrapper<const Derived>(derived());
 }

@@ -376,55 +372,6 @@ bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const {
  return true;
 }

-/** \returns DiagonalWrapper.
- *
- * Example: \include MatrixBase_diagonalView.cpp
- * Output: \verbinclude MatrixBase_diagonalView.out
- *
- * \sa diagonalView()
- */
-
-/** This is the non-const version of diagonalView() with DiagIndex_ . */
-template <typename Derived>
-template <int DiagIndex_>
-EIGEN_DEVICE_FUNC constexpr DiagonalWrapper<Diagonal<Derived, DiagIndex_>> MatrixBase<Derived>::diagonalView() {
-  typedef Diagonal<Derived, DiagIndex_> DiagType;
-  typedef DiagonalWrapper<DiagType> ReturnType;
-  DiagType diag(this->derived());
-  return ReturnType(diag);
-}
-
-/** This is the const version of diagonalView() with DiagIndex_ . */
-template <typename Derived>
-template <int DiagIndex_>
-EIGEN_DEVICE_FUNC constexpr DiagonalWrapper<Diagonal<const Derived, DiagIndex_>> MatrixBase<Derived>::diagonalView()
-    const {
-  typedef Diagonal<const Derived, DiagIndex_> DiagType;
-  typedef DiagonalWrapper<DiagType> ReturnType;
-  DiagType diag(this->derived());
-  return ReturnType(diag);
-}
-
-/** This is the non-const version of diagonalView() with dynamic index. */
-template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr DiagonalWrapper<Diagonal<Derived, DynamicIndex>> MatrixBase<Derived>::diagonalView(
-    Index index) {
-  typedef Diagonal<Derived, DynamicIndex> DiagType;
-  typedef DiagonalWrapper<DiagType> ReturnType;
-  DiagType diag(this->derived(), index);
-  return ReturnType(diag);
-}
-
-/** This is the const version of diagonalView() with dynamic index. */
-template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr DiagonalWrapper<Diagonal<const Derived, DynamicIndex>> MatrixBase<Derived>::diagonalView(
-    Index index) const {
-  typedef Diagonal<const Derived, DynamicIndex> DiagType;
-  typedef DiagonalWrapper<DiagType> ReturnType;
-  DiagType diag(this->derived(), index);
-  return ReturnType(diag);
-}
-
 namespace internal {

 template <>
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -20,14 +20,15 @@ namespace internal {
 template <typename Derived, typename Scalar = typename traits<Derived>::Scalar>
 struct squared_norm_impl {
  using Real = typename NumTraits<Scalar>::Real;
-  static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Real run(const Derived& a) {
-    return a.realView().cwiseAbs2().sum();
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) {
+    Scalar result = a.unaryExpr(squared_norm_functor<Scalar>()).sum();
+    return numext::real(result) + numext::imag(result);
  }
 };

 template <typename Derived>
 struct squared_norm_impl<Derived, bool> {
-  static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); }
 };

 }  // end namespace internal
@@ -45,7 +46,7 @@ struct squared_norm_impl<Derived, bool> {
 */
 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,
                                  typename internal::traits<OtherDerived>::Scalar>::ReturnType
    MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const {
@@ -56,19 +57,19 @@ EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE

 /** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm.
 * In both cases, it consists in the sum of the square of all the matrix entries.
- * For vectors, this is also equal to the dot product of \c *this with itself.
+ * For vectors, this is also equals to the dot product of \c *this with itself.
 *
 * \sa dot(), norm(), lpNorm()
 */
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 MatrixBase<Derived>::squaredNorm() const {
  return internal::squared_norm_impl<Derived>::run(derived());
 }

 /** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
 * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
- * For vectors, this is also equal to the square root of the dot product of \c *this with itself.
+ * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
 *
 * \sa lpNorm(), dot(), squaredNorm()
 */
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -53,7 +53,7 @@ struct EigenBase {
  EIGEN_DEVICE_FUNC inline constexpr Derived& const_cast_derived() const {
    return *static_cast<Derived*>(const_cast<EigenBase*>(this));
  }
-  EIGEN_DEVICE_FUNC constexpr inline const Derived& const_derived() const { return *static_cast<const Derived*>(this); }
+  EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast<const Derived*>(this); }

  /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return derived().rows(); }
@@ -65,13 +65,13 @@ struct EigenBase {

  /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
  template <typename Dest>
-  EIGEN_DEVICE_FUNC constexpr inline void evalTo(Dest& dst) const {
+  EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const {
    derived().evalTo(dst);
  }

  /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
  template <typename Dest>
-  EIGEN_DEVICE_FUNC constexpr inline void addTo(Dest& dst) const {
+  EIGEN_DEVICE_FUNC inline void addTo(Dest& dst) const {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
    typename Dest::PlainObject res(rows(), cols());
@@ -81,7 +81,7 @@ struct EigenBase {

  /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
  template <typename Dest>
-  EIGEN_DEVICE_FUNC constexpr inline void subTo(Dest& dst) const {
+  EIGEN_DEVICE_FUNC inline void subTo(Dest& dst) const {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
    typename Dest::PlainObject res(rows(), cols());
@@ -91,7 +91,7 @@ struct EigenBase {

  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
  template <typename Dest>
-  EIGEN_DEVICE_FUNC constexpr inline void applyThisOnTheRight(Dest& dst) const {
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
    dst = dst * this->derived();
@@ -99,7 +99,7 @@ struct EigenBase {

  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
  template <typename Dest>
-  EIGEN_DEVICE_FUNC constexpr inline void applyThisOnTheLeft(Dest& dst) const {
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
    dst = this->derived() * dst;
@@ -125,21 +125,21 @@ struct EigenBase {
 */
 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived>& other) {
  call_assignment(derived(), other.derived());
  return derived();
 }

 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived>& other) {
  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar, typename OtherDerived::Scalar>());
  return derived();
 }

 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived>& other) {
+EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived>& other) {
  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar, typename OtherDerived::Scalar>());
  return derived();
 }
--- a/Eigen/src/Core/Fill.h
+++ b/Eigen/src/Core/Fill.h
@@ -20,14 +20,11 @@ namespace internal {
 template <typename Xpr>
 struct eigen_fill_helper : std::false_type {};

-// Only enable std::fill_n for trivially copyable scalars.  GCC's libstdc++
-// fill_n pessimizes non-trivially-copyable types (extra moves per iteration),
-// causing measurable regressions for types like AutoDiffScalar (issue #2956).
 template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-struct eigen_fill_helper<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> : std::is_trivially_copyable<Scalar> {};
+struct eigen_fill_helper<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> : std::true_type {};

 template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
-struct eigen_fill_helper<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> : std::is_trivially_copyable<Scalar> {};
+struct eigen_fill_helper<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> : std::true_type {};

 template <typename Xpr, int BlockRows, int BlockCols>
 struct eigen_fill_helper<Block<Xpr, BlockRows, BlockCols, /*InnerPanel*/ true>> : eigen_fill_helper<Xpr> {};
@@ -63,12 +60,12 @@ struct eigen_fill_impl<Xpr, /*use_fill*/ false> {
  using Func = scalar_constant_op<Scalar>;
  using PlainObject = typename Xpr::PlainObject;
  using Constant = typename PlainObject::ConstantReturnType;
-  static EIGEN_DEVICE_FUNC constexpr void run(Xpr& dst, const Scalar& val) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const Scalar& val) {
    const Constant src(dst.rows(), dst.cols(), val);
    run(dst, src);
  }
  template <typename SrcXpr>
-  static EIGEN_DEVICE_FUNC constexpr void run(Xpr& dst, const SrcXpr& src) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) {
    call_dense_assignment_loop(dst, src, assign_op<Scalar, Scalar>());
  }
 };
@@ -96,10 +93,8 @@ struct eigen_fill_impl<Xpr, /*use_fill*/ true> {

 template <typename Xpr>
 struct eigen_memset_helper {
-  using Scalar = typename Xpr::Scalar;
-  static constexpr bool value = std::is_trivially_copyable<Scalar>::value &&
-                                !static_cast<bool>(NumTraits<Scalar>::RequireInitialization) &&
-                                eigen_fill_helper<Xpr>::value;
+  static constexpr bool value =
+      std::is_trivially_copyable<typename Xpr::Scalar>::value && eigen_fill_helper<Xpr>::value;
 };

 template <typename Xpr>
@@ -107,12 +102,12 @@ struct eigen_zero_impl<Xpr, /*use_memset*/ false> {
  using Scalar = typename Xpr::Scalar;
  using PlainObject = typename Xpr::PlainObject;
  using Zero = typename PlainObject::ZeroReturnType;
-  static EIGEN_DEVICE_FUNC constexpr void run(Xpr& dst) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst) {
    const Zero src(dst.rows(), dst.cols());
    run(dst, src);
  }
  template <typename SrcXpr>
-  static EIGEN_DEVICE_FUNC constexpr void run(Xpr& dst, const SrcXpr& src) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) {
    call_dense_assignment_loop(dst, src, assign_op<Scalar, Scalar>());
  }
 };
--- a/Eigen/src/Core/FindCoeff.h
+++ b/Eigen/src/Core/FindCoeff.h
@@ -34,11 +34,11 @@ struct max_coeff_functor {

 template <typename Scalar>
 struct max_coeff_functor<Scalar, PropagateNaN, false> {
-  EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) const {
+  EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) {
    return (candidate > incumbent) || ((candidate != candidate) && (incumbent == incumbent));
  }
  template <typename Packet>
-  EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const {
+  EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) {
    return pandnot(pcmp_lt_or_nan(incumbent, candidate), pisnan(incumbent));
  }
  template <typename Packet>
@@ -79,11 +79,11 @@ struct min_coeff_functor {

 template <typename Scalar>
 struct min_coeff_functor<Scalar, PropagateNaN, false> {
-  EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) const {
+  EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) {
    return (candidate < incumbent) || ((candidate != candidate) && (incumbent == incumbent));
  }
  template <typename Packet>
-  EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const {
+  EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) {
    return pandnot(pcmp_lt_or_nan(candidate, incumbent), pisnan(incumbent));
  }
  template <typename Packet>
--- a/Eigen/src/Core/ForceAlignedAccess.h
+++ b/Eigen/src/Core/ForceAlignedAccess.h
@@ -39,7 +39,7 @@ class ForceAlignedAccess : public internal::dense_xpr_base<ForceAlignedAccess<Ex
  typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
  EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)

-  EIGEN_DEVICE_FUNC explicit constexpr ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
+  EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}

  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); }
  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); }
@@ -103,6 +103,25 @@ inline ForceAlignedAccess<Derived> MatrixBase<Derived>::forceAlignedAccess() {
  return ForceAlignedAccess<Derived>(derived());
 }

+/** \returns an expression of *this with forced aligned access if \a Enable is true.
+ * \sa forceAlignedAccess(), class ForceAlignedAccess
+ */
+template <typename Derived>
+template <bool Enable>
+inline add_const_on_value_type_t<std::conditional_t<Enable, ForceAlignedAccess<Derived>, Derived&>>
+MatrixBase<Derived>::forceAlignedAccessIf() const {
+  return derived();  // FIXME This should not work but apparently is never used
+}
+
+/** \returns an expression of *this with forced aligned access if \a Enable is true.
+ * \sa forceAlignedAccess(), class ForceAlignedAccess
+ */
+template <typename Derived>
+template <bool Enable>
+inline std::conditional_t<Enable, ForceAlignedAccess<Derived>, Derived&> MatrixBase<Derived>::forceAlignedAccessIf() {
+  return derived();  // FIXME This should not work but apparently is never used
+}
+
 }  // end namespace Eigen

 #endif  // EIGEN_FORCEALIGNEDACCESS_H
--- a/Eigen/src/Core/Fuzzy.h
+++ b/Eigen/src/Core/Fuzzy.h
@@ -86,8 +86,8 @@ struct isMuchSmallerThan_scalar_selector<Derived, true> {
 */
 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr bool DenseBase<Derived>::isApprox(const DenseBase<OtherDerived>& other,
-                                                              const RealScalar& prec) const {
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox(const DenseBase<OtherDerived>& other,
+                                                    const RealScalar& prec) const {
  return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
 }

@@ -105,8 +105,8 @@ EIGEN_DEVICE_FUNC constexpr bool DenseBase<Derived>::isApprox(const DenseBase<Ot
 * \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
 */
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr bool DenseBase<Derived>::isMuchSmallerThan(const typename NumTraits<Scalar>::Real& other,
-                                                                       const RealScalar& prec) const {
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(const typename NumTraits<Scalar>::Real& other,
+                                                             const RealScalar& prec) const {
  return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);
 }

@@ -122,8 +122,8 @@ EIGEN_DEVICE_FUNC constexpr bool DenseBase<Derived>::isMuchSmallerThan(const typ
 */
 template <typename Derived>
 template <typename OtherDerived>
-EIGEN_DEVICE_FUNC constexpr bool DenseBase<Derived>::isMuchSmallerThan(const DenseBase<OtherDerived>& other,
-                                                                       const RealScalar& prec) const {
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(const DenseBase<OtherDerived>& other,
+                                                             const RealScalar& prec) const {
  return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
 }

--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -89,7 +89,7 @@ struct product_type {
 /* The following allows to select the kind of product at compile time
 * based on the three dimensions of the product.
 * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
-// FIXME: the current compile-time product-type mapping may not be optimal.
+// FIXME I'm not sure the current mapping is the ideal one.
 template <int M, int N>
 struct product_type_selector<M, N, 1> {
  enum { ret = OuterProduct };
@@ -193,11 +193,12 @@ struct product_type_selector<Large, Large, Small> {
 *  Implementation of Inner Vector Vector Product
 ***********************************************************************/

-// FIXME: consider returning a Scalar instead of a 1x1 matrix for inner products.
-// Pro: more natural for the user.
-// Con: in a meta-unrolled algorithm a matrix-matrix product may reduce to a
-// row-vector times column-vector product. To handle this, we could specialize
-// Block<MatrixType,1,1> with operator=(Scalar x).
+// FIXME : maybe the "inner product" could return a Scalar
+// instead of a 1x1 matrix ??
+// Pro: more natural for the user
+// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
+// product ends up to a row-vector times col-vector product... To tackle this use
+// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);

 /***********************************************************************
 *  Implementation of Outer Vector Vector Product
@@ -207,7 +208,7 @@ struct product_type_selector<Large, Large, Small> {
 *  Implementation of General Matrix Vector Product
 ***********************************************************************/

-/*  According to the shape/flags of the matrix we have to distinguish 3 different cases:
+/*  According to the shape/flags of the matrix we have to distinghish 3 different cases:
 *   1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
 *   2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
 *   3 - all other cases are handled using a simple loop along the outer-storage direction.
@@ -228,7 +229,7 @@ struct gemv_static_vector_if;

 template <typename Scalar, int Size, int MaxSize>
 struct gemv_static_vector_if<Scalar, Size, MaxSize, false> {
-  EIGEN_DEVICE_FUNC constexpr Scalar* data() {
+  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() {
    eigen_internal_assert(false && "should never be called");
    return 0;
  }
@@ -236,19 +237,19 @@ struct gemv_static_vector_if<Scalar, Size, MaxSize, false> {

 template <typename Scalar, int Size>
 struct gemv_static_vector_if<Scalar, Size, Dynamic, true> {
-  EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; }
+  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; }
 };

 template <typename Scalar, int Size, int MaxSize>
 struct gemv_static_vector_if<Scalar, Size, MaxSize, true> {
 #if EIGEN_MAX_STATIC_ALIGN_BYTES != 0
  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax> m_data;
-  constexpr Scalar* data() { return m_data.array; }
+  EIGEN_STRONG_INLINE constexpr Scalar* data() { return m_data.array; }
 #else
  // Some architectures cannot align on the stack,
  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0> m_data;
-  constexpr Scalar* data() {
+  EIGEN_STRONG_INLINE constexpr Scalar* data() {
    return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) +
                                     EIGEN_MAX_ALIGN_BYTES);
  }
@@ -292,7 +293,7 @@ struct gemv_dense_selector<OnTheRight, ColMajor, true> {
    typedef std::conditional_t<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr> ActualDest;

    enum {
-      // FIXME: find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
      EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime == 1),
      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
@@ -375,7 +376,7 @@ struct gemv_dense_selector<OnTheRight, RowMajor, true> {
    ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);

    enum {
-      // FIXME: find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
      DirectlyUseRhs =
          ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime == 0
@@ -416,7 +417,7 @@ struct gemv_dense_selector<OnTheRight, ColMajor, false> {
  static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
    EIGEN_STATIC_ASSERT((!nested_eval<Lhs, 1>::Evaluate),
                        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
-    // TODO: if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory,
+    // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory,
    // otherwise use a temp
    typename nested_eval<Rhs, 1>::type actual_rhs(rhs);
    const Index size = rhs.rows();
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -57,12 +57,13 @@ struct default_packet_traits {
    HasConj = 1,
    HasSetLinear = 1,
    HasSign = 1,
-    HasAbsDiff = 1,
    // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
    // types
    HasRound = 1,

    HasArg = 0,
+    HasAbsDiff = 0,
+    HasBlend = 0,
    // This flag is used to indicate whether packet comparison is supported.
    // pcmp_eq and pcmp_lt should be defined for it to be true.
    HasCmp = 0,
@@ -87,8 +88,6 @@ struct default_packet_traits {
    HasATanh = 0,
    HasSinh = 0,
    HasCosh = 0,
-    HasASinh = 0,
-    HasACosh = 0,
    HasTanh = 0,
    HasLGamma = 0,
    HasDiGamma = 0,
@@ -118,7 +117,6 @@ struct packet_traits : default_packet_traits {
  enum {
    HasAdd = 0,
    HasSub = 0,
-    HasAbsDiff = 0,
    HasMul = 0,
    HasNegate = 0,
    HasAbs = 0,
@@ -133,18 +131,17 @@ struct packet_traits : default_packet_traits {
 template <typename T>
 struct packet_traits<const T> : packet_traits<T> {};

-struct default_unpacket_traits {
-  enum { vectorizable = false, masked_load_available = false, masked_store_available = false };
-};
-
 template <typename T>
-struct unpacket_traits : default_unpacket_traits {
+struct unpacket_traits {
  typedef T type;
  typedef T half;
  typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
  enum {
    size = 1,
    alignment = alignof(T),
+    vectorizable = false,
+    masked_load_available = false,
+    masked_store_available = false
  };
 };

@@ -612,7 +609,7 @@ EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, con
  return cond ? a : b;
 }

-/** \internal \returns the min or max of \a a and \a b (coeff-wise)
+/** \internal \returns the min or of \a a and \a b (coeff-wise)
    If either \a a or \a b are NaN, the result is implementation defined. */
 template <int NaNPropagation, bool IsInteger>
 struct pminmax_impl {
@@ -650,7 +647,7 @@ struct pminmax_impl<PropagateNumbers, false> {
 #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); }

 /** \internal \returns the min of \a a and \a b  (coeff-wise).
-    If \a a or \a b is NaN, the return value is implementation defined. */
+    If \a a or \b b is NaN, the return value is implementation defined. */
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
  return numext::mini(a, b);
@@ -665,7 +662,7 @@ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
 }

 /** \internal \returns the max of \a a and \a b  (coeff-wise)
-    If \a a or \a b is NaN, the return value is implementation defined. */
+    If \a a or \b b is NaN, the return value is implementation defined. */
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
  return numext::maxi(a, b);
@@ -751,15 +748,9 @@ EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent)

 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
 template <typename Packet>
-EIGEN_DEVICE_FUNC inline std::enable_if_t<NumTraits<typename unpacket_traits<Packet>::type>::IsInteger, Packet>
-pabsdiff(const Packet& a, const Packet& b) {
+EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
  return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
 }
-template <typename Packet>
-EIGEN_DEVICE_FUNC inline std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsInteger, Packet>
-pabsdiff(const Packet& a, const Packet& b) {
-  return pabs(psub(a, b));
-}

 /** \internal \returns a packet version of \a *from, from must be properly aligned */
 template <typename Packet>
@@ -823,24 +814,10 @@ EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::ty
 template <typename Packet, typename BitsType>
 EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);

-template <typename Scalar, std::enable_if_t<std::is_trivially_copyable<Scalar>::value, int> = 0>
-EIGEN_DEVICE_FUNC inline Scalar pload1_scalar(const Scalar* a) {
-  Scalar scalar;
-  EIGEN_USING_STD(memcpy)
-  memcpy(&scalar, a, sizeof(Scalar));
-  return scalar;
-}
-
-template <typename Scalar, std::enable_if_t<!std::is_trivially_copyable<Scalar>::value, int> = 0>
-EIGEN_DEVICE_FUNC inline Scalar pload1_scalar(const Scalar* a) {
-  return Scalar(*a);
-}
-
 /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
-  using Scalar = typename unpacket_traits<Packet>::type;
-  return pset1<Packet>(pload1_scalar<Scalar>(a));
+  return pset1<Packet>(*a);
 }

 /** \internal \returns a packet with elements of \a *from duplicated.
@@ -850,7 +827,7 @@ EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::t
 */
 template <typename Packet>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
-  return pload1<Packet>(from);
+  return *from;
 }

 /** \internal \returns a packet with elements of \a *from quadrupled.
@@ -1026,26 +1003,12 @@ EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
  return a;
 }

-/** \internal \returns \a a with real and imaginary parts flipped (for complex types only) */
+/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
  return Packet(numext::imag(a), numext::real(a));
 }

-/** \internal \returns \a a with real part duplicated (for complex types only) */
-// TODO(rmlarsen): Define and use in all complex backends.
-template <typename Packet>
-EIGEN_DEVICE_FUNC inline Packet pdupreal(const Packet& a) {
-  return Packet(numext::real(a), numext::real(a));
-}
-
-/** \internal \returns \a a with imaginary part duplicated (for complex types only) */
-// TODO(rmlarsen): Define and use in all complex backends.
-template <typename Packet>
-EIGEN_DEVICE_FUNC inline Packet pdupimag(const Packet& a) {
-  return Packet(numext::imag(a), numext::imag(a));
-}
-
 /**************************
 * Special math functions
 ***************************/
@@ -1134,20 +1097,6 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet&
  return atanh(a);
 }

-/** \internal \returns the inverse hyperbolic sine of \a a (coeff-wise) */
-template <typename Packet>
-EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasinh(const Packet& a) {
-  EIGEN_USING_STD(asinh);
-  return asinh(a);
-}
-
-/** \internal \returns the inverse hyperbolic cosine of \a a (coeff-wise) */
-template <typename Packet>
-EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacosh(const Packet& a) {
-  EIGEN_USING_STD(acosh);
-  return acosh(a);
-}
-
 /** \internal \returns the exp of \a a (coeff-wise) */
 template <typename Packet>
 EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
@@ -1276,7 +1225,7 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Pac
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
                                            typename unpacket_traits<Packet>::half, Packet>
-predux_half(const Packet& a) {
+predux_half_dowto4(const Packet& a) {
  return a;
 }

@@ -1359,7 +1308,9 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const
 /** \internal \returns true if all coeffs of \a a means "true"
 * It is supposed to be called on values returned by pcmp_*.
 */
-// TODO: implement predux_all when needed.
+// not needed yet
+// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
+// { return bool(a); }

 /** \internal \returns true if any coeffs of \a a means "true"
 * It is supposed to be called on values returned by pcmp_*.
@@ -1392,7 +1343,7 @@ struct pmadd_impl {
    return psub(c, pmul(a, b));
  }
  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
-    return pnegate(padd(pmul(a, b), c));
+    return pnegate(pmadd(a, b, c));
  }
 };

@@ -1526,11 +1477,26 @@ struct PacketBlock {
  Packet packet[N];
 };

-template <typename Packet, int size = 1>
-EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, size>& /*kernel*/) {
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
 }

+/***************************************************************************
+ * Selector, i.e. vector of N boolean values used to select (i.e. blend)
+ * words from 2 packets.
+ ***************************************************************************/
+template <size_t N>
+struct Selector {
+  bool select[N];
+};
+
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
+                                       const Packet& thenPacket, const Packet& elsePacket) {
+  return ifPacket.select[0] ? thenPacket : elsePacket;
+}
+
 /** \internal \returns 1 / a (coeff-wise) */
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -130,12 +130,12 @@ using GlobalUnaryPowReturnType = std::enable_if_t<
 */
 #ifdef EIGEN_PARSED_BY_DOXYGEN
 template <typename Derived, typename ScalarExponent>
-EIGEN_DEVICE_FUNC constexpr inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(
-    const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
+EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(const Eigen::ArrayBase<Derived>& x,
+                                                                                     const ScalarExponent& exponent);
 #else
 template <typename Derived, typename ScalarExponent>
-EIGEN_DEVICE_FUNC constexpr inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(
-    const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
+EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(const Eigen::ArrayBase<Derived>& x,
+                                                                                     const ScalarExponent& exponent) {
  return GlobalUnaryPowReturnType<Derived, ScalarExponent>(
      x.derived(), internal::scalar_unary_pow_op<typename Derived::Scalar, ScalarExponent>(exponent));
 }
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@@ -65,7 +65,7 @@ struct IOFormat {
        fill(_fill),
        precision(_precision),
        flags(_flags) {
-    // TODO: check if rowPrefix, rowSuffix or rowSeparator contains a newline
+    // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
    // don't add rowSpacer if columns are not to be aligned
    if ((flags & DontAlignCols)) return;
    int i = int(matPrefix.length()) - 1;
--- a/Eigen/src/Core/IndexedView.h
+++ b/Eigen/src/Core/IndexedView.h
@@ -59,7 +59,7 @@ struct traits<IndexedView<XprType, RowIndices, ColIndices>> : traits<XprType> {
    ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike,
    ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock),

-    // FIXME: we deal with compile-time strides if and only if we have DirectAccessBit flag,
+    // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag,
    // but this is too strict regarding negative strides...
    DirectAccessMask = (int(InnerIncr) != Undefined && int(OuterIncr) != Undefined && InnerIncr >= 0 && OuterIncr >= 0)
                           ? DirectAccessBit
@@ -259,27 +259,26 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
    Alignment = 0
  };

-  EIGEN_DEVICE_FUNC constexpr explicit unary_evaluator(const XprType& xpr)
-      : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) {
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) {
    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
  }

  typedef typename XprType::Scalar Scalar;
  typedef typename XprType::CoeffReturnType CoeffReturnType;

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
                 m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
    return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
                 m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
    return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
    EIGEN_STATIC_ASSERT_LVALUE(XprType)
    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
@@ -288,7 +287,7 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
    return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const {
    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
@@ -296,7 +295,7 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
    return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
@@ -311,7 +310,9 @@ struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>

 // Catch assignments to an IndexedView.
 template <typename ArgType, typename RowIndices, typename ColIndices>
-struct evaluator_assume_aliasing<IndexedView<ArgType, RowIndices, ColIndices>> : std::true_type {};
+struct evaluator_assume_aliasing<IndexedView<ArgType, RowIndices, ColIndices>> {
+  static const bool value = true;
+};

 }  // end namespace internal

--- a/Eigen/src/Core/InnerProduct.h
+++ b/Eigen/src/Core/InnerProduct.h
@@ -142,36 +142,31 @@ struct inner_product_impl<Evaluator, true> {
    const UnsignedIndex numPackets = size / PacketSize;
    const UnsignedIndex numRemPackets = (packetEnd - quadEnd) / PacketSize;

-    Packet presult0 = eval.template packet<Packet>(0 * PacketSize);
-    if (numPackets >= 2) {
-      Packet presult1 = eval.template packet<Packet>(1 * PacketSize);
-      if (numPackets >= 3) {
-        Packet presult2 = eval.template packet<Packet>(2 * PacketSize);
-        if (numPackets >= 4) {
-          Packet presult3 = eval.template packet<Packet>(3 * PacketSize);
+    Packet presult0, presult1, presult2, presult3;

-          for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) {
-            presult0 = eval.packet(presult0, k + 0 * PacketSize);
-            presult1 = eval.packet(presult1, k + 1 * PacketSize);
-            presult2 = eval.packet(presult2, k + 2 * PacketSize);
-            presult3 = eval.packet(presult3, k + 3 * PacketSize);
-          }
+    presult0 = eval.template packet<Packet>(0 * PacketSize);
+    if (numPackets >= 2) presult1 = eval.template packet<Packet>(1 * PacketSize);
+    if (numPackets >= 3) presult2 = eval.template packet<Packet>(2 * PacketSize);
+    if (numPackets >= 4) {
+      presult3 = eval.template packet<Packet>(3 * PacketSize);

-          if (numRemPackets >= 1) {
-            presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize);
-            if (numRemPackets >= 2) {
-              presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize);
-              if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize);
-            }
-          }
-
-          presult2 = padd(presult2, presult3);
-        }
-        presult1 = padd(presult1, presult2);
+      for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) {
+        presult0 = eval.packet(presult0, k + 0 * PacketSize);
+        presult1 = eval.packet(presult1, k + 1 * PacketSize);
+        presult2 = eval.packet(presult2, k + 2 * PacketSize);
+        presult3 = eval.packet(presult3, k + 3 * PacketSize);
      }
-      presult0 = padd(presult0, presult1);
+
+      if (numRemPackets >= 1) presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize);
+      if (numRemPackets >= 2) presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize);
+      if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize);
+
+      presult2 = padd(presult2, presult3);
    }

+    if (numPackets >= 3) presult1 = padd(presult1, presult2);
+    if (numPackets >= 2) presult0 = padd(presult0, presult1);
+
    Scalar result = predux(presult0);
    for (UnsignedIndex k = packetEnd; k < size; k++) {
      result = eval.coeff(result, k);
@@ -221,8 +216,8 @@ struct scalar_inner_product_op {
 template <typename Scalar, bool Conj>
 struct scalar_inner_product_op<
    Scalar,
-    std::enable_if_t<internal::is_same<typename ScalarBinaryOpTraits<Scalar, Scalar>::ReturnType, Scalar>::value,
-                     Scalar>,
+    typename std::enable_if<internal::is_same<typename ScalarBinaryOpTraits<Scalar, Scalar>::ReturnType, Scalar>::value,
+                            Scalar>::type,
    Conj> {
  using result_type = Scalar;
  using conj_helper = conditional_conj<Scalar, Conj>;
--- a/Eigen/src/Core/Inverse.h
+++ b/Eigen/src/Core/Inverse.h
@@ -49,12 +49,12 @@ class Inverse : public InverseImpl<XprType, typename internal::traits<XprType>::
  typedef typename internal::ref_selector<Inverse>::type Nested;
  typedef internal::remove_all_t<XprType> NestedExpression;

-  explicit EIGEN_DEVICE_FUNC constexpr Inverse(const XprType& xpr) : m_xpr(xpr) {}
+  explicit EIGEN_DEVICE_FUNC Inverse(const XprType& xpr) : m_xpr(xpr) {}

  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_xpr.cols(); }
  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_xpr.rows(); }

-  EIGEN_DEVICE_FUNC constexpr const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
+  EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }

 protected:
  XprTypeNested m_xpr;
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -100,7 +100,7 @@ class Map : public MapBase<Map<PlainObjectType, MapOptions, StrideType> > {

  typedef typename Base::PointerType PointerType;
  typedef PointerType PointerArgType;
-  EIGEN_DEVICE_FUNC constexpr inline PointerType cast_to_pointer_type(PointerArgType ptr) const { return ptr; }
+  EIGEN_DEVICE_FUNC inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }

  EIGEN_DEVICE_FUNC constexpr Index innerStride() const {
    return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
@@ -120,7 +120,7 @@ class Map : public MapBase<Map<PlainObjectType, MapOptions, StrideType> > {
   * \param dataPtr pointer to the array to map
   * \param stride optional Stride object, passing the strides.
   */
-  EIGEN_DEVICE_FUNC constexpr explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
+  EIGEN_DEVICE_FUNC explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
      : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) {}

  /** Constructor in the dynamic-size vector case.
@@ -129,7 +129,7 @@ class Map : public MapBase<Map<PlainObjectType, MapOptions, StrideType> > {
   * \param size the size of the vector expression
   * \param stride optional Stride object, passing the strides.
   */
-  EIGEN_DEVICE_FUNC constexpr inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
+  EIGEN_DEVICE_FUNC inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
      : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) {}

  /** Constructor in the dynamic-size matrix case.
@@ -139,8 +139,7 @@ class Map : public MapBase<Map<PlainObjectType, MapOptions, StrideType> > {
   * \param cols the number of columns of the matrix expression
   * \param stride optional Stride object, passing the strides.
   */
-  EIGEN_DEVICE_FUNC constexpr inline Map(PointerArgType dataPtr, Index rows, Index cols,
-                                         const StrideType& stride = StrideType())
+  EIGEN_DEVICE_FUNC inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
      : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) {}

  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -97,23 +97,23 @@ class MapBase<Derived, ReadOnlyAccessors> : public internal::dense_xpr_base<Deri
  EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_data; }

  /** \copydoc PlainObjectBase::coeff(Index,Index) const */
-  EIGEN_DEVICE_FUNC constexpr inline const Scalar& coeff(Index rowId, Index colId) const {
+  EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const {
    return m_data[colId * colStride() + rowId * rowStride()];
  }

  /** \copydoc PlainObjectBase::coeff(Index) const */
-  EIGEN_DEVICE_FUNC constexpr inline const Scalar& coeff(Index index) const {
+  EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index index) const {
    EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
    return m_data[index * innerStride()];
  }

  /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */
-  EIGEN_DEVICE_FUNC constexpr inline const Scalar& coeffRef(Index rowId, Index colId) const {
+  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const {
    return this->m_data[colId * colStride() + rowId * rowStride()];
  }

  /** \copydoc PlainObjectBase::coeffRef(Index) const */
-  EIGEN_DEVICE_FUNC constexpr inline const Scalar& coeffRef(Index index) const {
+  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const {
    EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
    return this->m_data[index * innerStride()];
  }
@@ -132,14 +132,14 @@ class MapBase<Derived, ReadOnlyAccessors> : public internal::dense_xpr_base<Deri
  }

  /** \internal Constructor for fixed size matrices or vectors */
-  EIGEN_DEVICE_FUNC constexpr explicit inline MapBase(PointerType dataPtr)
+  EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr)
      : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) {
    EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
    checkSanity<Derived>();
  }

  /** \internal Constructor for dynamically sized vectors */
-  EIGEN_DEVICE_FUNC constexpr inline MapBase(PointerType dataPtr, Index vecSize)
+  EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize)
      : m_data(dataPtr),
        m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
        m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime)) {
@@ -150,7 +150,7 @@ class MapBase<Derived, ReadOnlyAccessors> : public internal::dense_xpr_base<Deri
  }

  /** \internal Constructor for dynamically sized matrices */
-  EIGEN_DEVICE_FUNC constexpr inline MapBase(PointerType dataPtr, Index rows, Index cols)
+  EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols)
      : m_data(dataPtr), m_rows(rows), m_cols(cols) {
    eigen_assert((dataPtr == 0) || (rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) &&
                                    cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
@@ -238,11 +238,11 @@ class MapBase<Derived, WriteAccessors> : public MapBase<Derived, ReadOnlyAccesso
    return this->m_data;
  }  // no const-cast here so non-const-correct code will give a compile error

-  EIGEN_DEVICE_FUNC constexpr inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) {
+  EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) {
    return this->m_data[col * colStride() + row * rowStride()];
  }

-  EIGEN_DEVICE_FUNC constexpr inline ScalarWithConstIfNotLvalue& coeffRef(Index index) {
+  EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index index) {
    EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
    return this->m_data[index * innerStride()];
  }
@@ -258,9 +258,9 @@ class MapBase<Derived, WriteAccessors> : public MapBase<Derived, ReadOnlyAccesso
    internal::pstoret<Scalar, PacketScalar, StoreMode>(this->m_data + index * innerStride(), val);
  }

-  EIGEN_DEVICE_FUNC constexpr explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
-  EIGEN_DEVICE_FUNC constexpr inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
-  EIGEN_DEVICE_FUNC constexpr inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
+  EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+  EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+  EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}

  EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) {
    ReadOnlyMapBase::Base::operator=(other);
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -11,7 +11,7 @@
 #ifndef EIGEN_MATHFUNCTIONS_H
 #define EIGEN_MATHFUNCTIONS_H

-// TODO: consider moving these constants to NumTraits.
+// TODO this should better be moved to NumTraits
 // Source: WolframAlpha
 #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
 #define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L
@@ -74,7 +74,7 @@ struct global_math_functions_filtering_base<
 template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
 struct real_default_impl {
  typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC static constexpr RealScalar run(const Scalar& x) { return x; }
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x; }
 };

 template <typename Scalar>
@@ -170,24 +170,18 @@ struct imag_ref_default_impl {

 template <typename Scalar>
 struct imag_ref_default_impl<Scalar, false> {
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  EIGEN_DEVICE_FUNC constexpr static inline RealScalar run(Scalar&) { return RealScalar(0); }
-  EIGEN_DEVICE_FUNC constexpr static inline RealScalar run(const Scalar&) { return RealScalar(0); }
+  EIGEN_DEVICE_FUNC constexpr static Scalar run(Scalar&) { return Scalar(0); }
+  EIGEN_DEVICE_FUNC constexpr static const Scalar run(const Scalar&) { return Scalar(0); }
 };

 template <typename Scalar>
 struct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};

-template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+template <typename Scalar>
 struct imag_ref_retval {
  typedef typename NumTraits<Scalar>::Real& type;
 };

-template <typename Scalar>
-struct imag_ref_retval<Scalar, false> {
-  typedef typename NumTraits<Scalar>::Real type;
-};
-
 }  // namespace internal

 namespace numext {
@@ -228,7 +222,7 @@ namespace internal {

 template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
 struct conj_default_impl {
-  EIGEN_DEVICE_FUNC static constexpr Scalar run(const Scalar& x) { return x; }
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { return x; }
 };

 template <typename Scalar>
@@ -293,7 +287,7 @@ struct sqrt_impl {

 // Complex sqrt defined in MathFunctionsImpl.h.
 template <typename ComplexT>
-EIGEN_DEVICE_FUNC constexpr ComplexT complex_sqrt(const ComplexT& a_x);
+EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& a_x);

 // Custom implementation is faster than `std::sqrt`, works on
 // GPU, and correctly handles special cases (unlike MSVC).
@@ -313,7 +307,7 @@ struct rsqrt_impl;

 // Complex rsqrt defined in MathFunctionsImpl.h.
 template <typename ComplexT>
-EIGEN_DEVICE_FUNC constexpr ComplexT complex_rsqrt(const ComplexT& a_x);
+EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& a_x);

 template <typename T>
 struct rsqrt_impl<std::complex<T>> {
@@ -396,7 +390,7 @@ struct cast_impl<OldType, NewType,
  }
 };

-// Returns NewType directly to avoid unintended intermediate conversions.
+// here, for once, we're plainly returning NewType: we don't want cast to do weird things.

 template <typename OldType, typename NewType>
 EIGEN_DEVICE_FUNC inline NewType cast(const OldType& x) {
@@ -510,7 +504,7 @@ struct expm1_retval {

 // Complex log defined in MathFunctionsImpl.h.
 template <typename ComplexT>
-EIGEN_DEVICE_FUNC constexpr ComplexT complex_log(const ComplexT& z);
+EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z);

 template <typename Scalar>
 struct log_impl {
@@ -838,8 +832,8 @@ EIGEN_DEVICE_FUNC std::enable_if_t<(std::numeric_limits<T>::has_infinity && !Num

 template <typename T>
 EIGEN_DEVICE_FUNC
-    std::enable_if_t<!(std::numeric_limits<T>::has_quiet_NaN || std::numeric_limits<T>::has_signaling_NaN), bool>
-    isnan_impl(const T&) {
+std::enable_if_t<!(std::numeric_limits<T>::has_quiet_NaN || std::numeric_limits<T>::has_signaling_NaN), bool>
+isnan_impl(const T&) {
  return false;
 }

@@ -1029,13 +1023,13 @@ namespace numext {

 #if (!defined(EIGEN_GPUCC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
 template <typename T>
-EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) {
  EIGEN_USING_STD(min)
  return min EIGEN_NOT_A_MACRO(x, y);
 }

 template <typename T>
-EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) {
  EIGEN_USING_STD(max)
  return max EIGEN_NOT_A_MACRO(x, y);
 }
@@ -1363,12 +1357,6 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T round_down(T a, U b) {
  return ub * (ua / ub);
 }

-template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log2(T x) {
-  EIGEN_USING_STD(log2);
-  return log2(x);
-}
-
 /** Log base 2 for 32 bits positive integers.
 * Conveniently returns 0 for x==0. */
 constexpr int log2(int x) {
@@ -1448,17 +1436,17 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double log(const double& x) {
 #endif

 template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-    std::enable_if_t<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex, typename NumTraits<T>::Real>
-    abs(const T& x) {
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE std::enable_if_t<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex, typename NumTraits<T>::Real>
+abs(const T& x) {
  EIGEN_USING_STD(abs);
  return abs(x);
 }

 template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-    std::enable_if_t<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex), typename NumTraits<T>::Real>
-    abs(const T& x) {
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE std::enable_if_t<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex), typename NumTraits<T>::Real>
+abs(const T& x) {
  return x;
 }

@@ -1923,8 +1911,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double fmod(const double& a, const double&

 template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_left(const Scalar& a, int n) {
-  using UnsignedScalar = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
-  return bit_cast<Scalar, UnsignedScalar>(bit_cast<UnsignedScalar, Scalar>(a) << n);
+  return a << n;
 }

 template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>>
@@ -2097,15 +2084,7 @@ struct expm1_impl<std::complex<RealScalar>> {

 template <typename T>
 struct rsqrt_impl {
-// C4804: unsafe use of type 'bool' in operation. Unavoidable when instantiated with T=bool.
-#if EIGEN_COMP_MSVC
-#pragma warning(push)
-#pragma warning(disable : 4804)
-#endif
  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE T run(const T& x) { return T(1) / numext::sqrt(x); }
-#if EIGEN_COMP_MSVC
-#pragma warning(pop)
-#endif
 };

 #if defined(EIGEN_GPU_COMPILE_PHASE)
@@ -2117,57 +2096,6 @@ struct conj_impl<std::complex<T>, true> {
 };
 #endif

-// Complex multiply and division operators.
-// Note that these do not handle the case if inf+NaNi, which is considered an infinity.
-// This is for consistency with our standard pmul, pdiv implementations.
-template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_multiply(const std::complex<T>& a,
-                                                                       const std::complex<T>& b) {
-  const T a_real = numext::real(a);
-  const T a_imag = numext::imag(a);
-  const T b_real = numext::real(b);
-  const T b_imag = numext::imag(b);
-  return std::complex<T>(a_real * b_real - a_imag * b_imag, a_imag * b_real + a_real * b_imag);
-}
-
-template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide_fast(const std::complex<T>& a,
-                                                                          const std::complex<T>& b) {
-  const T a_real = numext::real(a);
-  const T a_imag = numext::imag(a);
-  const T b_real = numext::real(b);
-  const T b_imag = numext::imag(b);
-  const T norm = (b_real * b_real + b_imag * b_imag);
-  return std::complex<T>((a_real * b_real + a_imag * b_imag) / norm, (a_imag * b_real - a_real * b_imag) / norm);
-}
-
-template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide_smith(const std::complex<T>& a,
-                                                                           const std::complex<T>& b) {
-  const T a_real = numext::real(a);
-  const T a_imag = numext::imag(a);
-  const T b_real = numext::real(b);
-  const T b_imag = numext::imag(b);
-  // Smith's complex division (https://arxiv.org/pdf/1210.4539.pdf),
-  // guards against over/under-flow.
-  const bool scale_imag = numext::abs(b_imag) <= numext::abs(b_real);
-  const T rscale = scale_imag ? T(1) : b_real / b_imag;
-  const T iscale = scale_imag ? b_imag / b_real : T(1);
-  const T denominator = b_real * rscale + b_imag * iscale;
-  return std::complex<T>((a_real * rscale + a_imag * iscale) / denominator,
-                         (a_imag * rscale - a_real * iscale) / denominator);
-}
-
-template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide(const std::complex<T>& a,
-                                                                     const std::complex<T>& b) {
-#if EIGEN_FAST_MATH
-  return complex_divide_fast(a, b);
-#else
-  return complex_divide_smith(a, b);
-#endif
-}
-
 }  // end namespace internal

 }  // end namespace Eigen
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -37,16 +37,15 @@ struct generic_reciprocal_newton_step {
  static_assert(Steps > 0, "Steps must be at least 1.");
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_a_recip) {
    using Scalar = typename unpacket_traits<Packet>::type;
-    const Packet one = pset1<Packet>(Scalar(1));
+    const Packet two = pset1<Packet>(Scalar(2));
    // Refine the approximation using one Newton-Raphson step:
    //   x_{i} = x_{i-1} * (2 - a * x_{i-1})
    const Packet x = generic_reciprocal_newton_step<Packet, Steps - 1>::run(a, approx_a_recip);
-    const Packet tmp = pnmadd(a, x, one);
+    const Packet tmp = pnmadd(a, x, two);
    // If tmp is NaN, it means that a is either +/-0 or +/-Inf.
    // In this case return the approximation directly.
    const Packet is_not_nan = pcmp_eq(tmp, tmp);
-    // Use two FMAs instead of FMA+FMUL to improve precision.
-    return pselect(is_not_nan, pmadd(x, tmp, x), x);
+    return pselect(is_not_nan, pmul(x, tmp), x);
  }
 };

@@ -148,16 +147,16 @@ struct generic_sqrt_newton_step {
 };

 template <typename RealScalar>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x,
-                                                                               const RealScalar& y) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) {
  // IEEE IEC 6059 special cases.
  if ((numext::isinf)(x) || (numext::isinf)(y)) return NumTraits<RealScalar>::infinity();
  if ((numext::isnan)(x) || (numext::isnan)(y)) return NumTraits<RealScalar>::quiet_NaN();

  EIGEN_USING_STD(sqrt);
-  RealScalar p = numext::maxi(x, y);
+  RealScalar p, qp;
+  p = numext::maxi(x, y);
  if (numext::is_exactly_zero(p)) return RealScalar(0);
-  RealScalar qp = numext::mini(y, x) / p;
+  qp = numext::mini(y, x) / p;
  return p * sqrt(RealScalar(1) + qp * qp);
 }

@@ -173,7 +172,7 @@ struct hypot_impl {
 // Generic complex sqrt implementation that correctly handles corner cases
 // according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt
 template <typename ComplexT>
-EIGEN_DEVICE_FUNC constexpr ComplexT complex_sqrt(const ComplexT& z) {
+EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) {
  // Computes the principal sqrt of the input.
  //
  // For a complex square root of the number x + i*y. We want to find real
@@ -209,7 +208,7 @@ EIGEN_DEVICE_FUNC constexpr ComplexT complex_sqrt(const ComplexT& z) {

 // Generic complex rsqrt implementation.
 template <typename ComplexT>
-EIGEN_DEVICE_FUNC constexpr ComplexT complex_rsqrt(const ComplexT& z) {
+EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) {
  // Computes the principal reciprocal sqrt of the input.
  //
  // For a complex reciprocal square root of the number z = x + i*y. We want to
@@ -248,7 +247,7 @@ EIGEN_DEVICE_FUNC constexpr ComplexT complex_rsqrt(const ComplexT& z) {
 }

 template <typename ComplexT>
-EIGEN_DEVICE_FUNC constexpr ComplexT complex_log(const ComplexT& z) {
+EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) {
  // Computes complex log.
  using T = typename NumTraits<ComplexT>::Real;
  T a = numext::abs(z);
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -207,7 +207,7 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma
   *
   * \callgraph
   */
-  EIGEN_DEVICE_FUNC constexpr Matrix& operator=(const Matrix& other) { return Base::_set(other); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(const Matrix& other) { return Base::_set(other); }

  /** \internal
   * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
@@ -249,16 +249,16 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma
   * \sa resize(Index,Index)
   */
 #if defined(EIGEN_INITIALIZE_COEFFS)
-  EIGEN_DEVICE_FUNC constexpr Matrix() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
 #else
-  EIGEN_DEVICE_FUNC constexpr Matrix() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() = default;
 #endif
  /** \brief Move constructor */
-  EIGEN_DEVICE_FUNC constexpr Matrix(Matrix&&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix&&) = default;
  /** \brief Moves the matrix into the other one.
   *
   */
-  EIGEN_DEVICE_FUNC constexpr Matrix& operator=(Matrix&& other) noexcept(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other) noexcept(
      std::is_nothrow_move_assignable<Scalar>::value) {
    Base::operator=(std::move(other));
    return *this;
@@ -271,7 +271,7 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma
   * This constructor is for 1D array or vectors with more than 4 coefficients.
   *
   * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this
-   * constructor must match the fixed number of rows (resp. columns) of \c *this.
+   * constructor must match the the fixed number of rows (resp. columns) of \c *this.
   *
   *
   * Example: \include Matrix_variadic_ctor_cxx11.cpp
@@ -316,12 +316,12 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma

  // This constructor is for both 1x1 matrices and dynamic vectors
  template <typename T>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit Matrix(const T& x) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Matrix(const T& x) {
    Base::template _init1<T>(x);
  }

  template <typename T0, typename T1>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) {
    Base::template _init2<T0, T1>(x, y);
  }

@@ -367,7 +367,7 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma
  /** \brief Constructs an initialized 3D vector with given coefficients
   * \sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...)
   */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) {
    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
    m_storage.data()[0] = x;
    m_storage.data()[1] = y;
@@ -376,8 +376,7 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma
  /** \brief Constructs an initialized 4D vector with given coefficients
   * \sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...)
   */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z,
-                                                         const Scalar& w) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) {
    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
    m_storage.data()[0] = x;
    m_storage.data()[1] = y;
@@ -386,14 +385,13 @@ class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, Ma
  }

  /** \brief Copy constructor */
-  EIGEN_DEVICE_FUNC constexpr Matrix(const Matrix&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(const Matrix&) = default;

  /** \brief Copy constructor for generic expressions.
   * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
   */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived>& other)
-      : Base(other.derived()) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived>& other) : Base(other.derived()) {}

  EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return 1; }
  EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return this->innerSize(); }
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -99,7 +99,7 @@ class MatrixBase : public DenseBase<Derived> {

  /** \returns the size of the main diagonal, which is min(rows(),cols()).
   * \sa rows(), cols(), SizeAtCompileTime. */
-  EIGEN_DEVICE_FUNC constexpr Index diagonalSize() const { return (numext::mini)(rows(), cols()); }
+  EIGEN_DEVICE_FUNC inline Index diagonalSize() const { return (numext::mini)(rows(), cols()); }

  typedef typename Base::PlainObject PlainObject;

@@ -136,19 +136,19 @@ class MatrixBase : public DenseBase<Derived> {
  /** Special case of the template operator=, in order to prevent the compiler
   * from generating a default operator= (issue hit with g++ 4.1)
   */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator=(const MatrixBase& other);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const MatrixBase& other);

  // We cannot inherit here via Base::operator= since it is causing
  // trouble with MSVC.

  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other);

  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr Derived& operator=(const EigenBase<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase<OtherDerived>& other);

  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr Derived& operator=(const ReturnByValue<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue<OtherDerived>& other);

  template <typename OtherDerived>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const MatrixBase<OtherDerived>& other);
@@ -180,11 +180,11 @@ class MatrixBase : public DenseBase<Derived> {
      const SkewSymmetricBase<SkewDerived>& skew) const;

  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,
-                                                            typename internal::traits<OtherDerived>::Scalar>::ReturnType
+  EIGEN_DEVICE_FUNC typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,
+                                                  typename internal::traits<OtherDerived>::Scalar>::ReturnType
  dot(const MatrixBase<OtherDerived>& other) const;

-  EIGEN_DEVICE_FUNC constexpr RealScalar squaredNorm() const;
+  EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
  EIGEN_DEVICE_FUNC RealScalar norm() const;
  RealScalar stableNorm() const;
  RealScalar blueNorm() const;
@@ -194,23 +194,23 @@ class MatrixBase : public DenseBase<Derived> {
  EIGEN_DEVICE_FUNC void normalize();
  EIGEN_DEVICE_FUNC void stableNormalize();

-  EIGEN_DEVICE_FUNC constexpr const AdjointReturnType adjoint() const;
+  EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
  EIGEN_DEVICE_FUNC void adjointInPlace();

  typedef Diagonal<Derived> DiagonalReturnType;
-  EIGEN_DEVICE_FUNC constexpr DiagonalReturnType diagonal();
+  EIGEN_DEVICE_FUNC DiagonalReturnType diagonal();

  typedef Diagonal<const Derived> ConstDiagonalReturnType;
-  EIGEN_DEVICE_FUNC constexpr const ConstDiagonalReturnType diagonal() const;
+  EIGEN_DEVICE_FUNC const ConstDiagonalReturnType diagonal() const;

  template <int Index>
-  EIGEN_DEVICE_FUNC constexpr Diagonal<Derived, Index> diagonal();
+  EIGEN_DEVICE_FUNC Diagonal<Derived, Index> diagonal();

  template <int Index>
-  EIGEN_DEVICE_FUNC constexpr const Diagonal<const Derived, Index> diagonal() const;
+  EIGEN_DEVICE_FUNC const Diagonal<const Derived, Index> diagonal() const;

-  EIGEN_DEVICE_FUNC constexpr Diagonal<Derived, DynamicIndex> diagonal(Index index);
-  EIGEN_DEVICE_FUNC constexpr const Diagonal<const Derived, DynamicIndex> diagonal(Index index) const;
+  EIGEN_DEVICE_FUNC Diagonal<Derived, DynamicIndex> diagonal(Index index);
+  EIGEN_DEVICE_FUNC const Diagonal<const Derived, DynamicIndex> diagonal(Index index) const;

  template <unsigned int Mode>
  struct TriangularViewReturnType {
@@ -222,9 +222,9 @@ class MatrixBase : public DenseBase<Derived> {
  };

  template <unsigned int Mode>
-  EIGEN_DEVICE_FUNC constexpr typename TriangularViewReturnType<Mode>::Type triangularView();
+  EIGEN_DEVICE_FUNC typename TriangularViewReturnType<Mode>::Type triangularView();
  template <unsigned int Mode>
-  EIGEN_DEVICE_FUNC constexpr typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
+  EIGEN_DEVICE_FUNC typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;

  template <unsigned int UpLo>
  struct SelfAdjointViewReturnType {
@@ -236,9 +236,9 @@ class MatrixBase : public DenseBase<Derived> {
  };

  template <unsigned int UpLo>
-  EIGEN_DEVICE_FUNC constexpr typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
+  EIGEN_DEVICE_FUNC typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
  template <unsigned int UpLo>
-  EIGEN_DEVICE_FUNC constexpr typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
+  EIGEN_DEVICE_FUNC typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;

  const SparseView<Derived> sparseView(
      const Scalar& m_reference = Scalar(0),
@@ -252,9 +252,9 @@ class MatrixBase : public DenseBase<Derived> {
  EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
  EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();

-  EIGEN_DEVICE_FUNC constexpr const DiagonalWrapper<const Derived> asDiagonal() const;
+  EIGEN_DEVICE_FUNC const DiagonalWrapper<const Derived> asDiagonal() const;
  const PermutationWrapper<const Derived> asPermutation() const;
-  EIGEN_DEVICE_FUNC constexpr const SkewSymmetricWrapper<const Derived> asSkewSymmetric() const;
+  EIGEN_DEVICE_FUNC const SkewSymmetricWrapper<const Derived> asSkewSymmetric() const;

  EIGEN_DEVICE_FUNC Derived& setIdentity();
  EIGEN_DEVICE_FUNC Derived& setIdentity(Index rows, Index cols);
@@ -274,17 +274,6 @@ class MatrixBase : public DenseBase<Derived> {
                    const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
  bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;

-  /* diagonalView */
-  template <int DiagIndex_ = 0>
-  EIGEN_DEVICE_FUNC constexpr DiagonalWrapper<Diagonal<Derived, DiagIndex_>> diagonalView();
-
-  template <int DiagIndex_ = 0>
-  EIGEN_DEVICE_FUNC constexpr DiagonalWrapper<Diagonal<const Derived, DiagIndex_>> diagonalView() const;
-
-  EIGEN_DEVICE_FUNC constexpr DiagonalWrapper<Diagonal<Derived, DynamicIndex>> diagonalView(Index index);
-
-  EIGEN_DEVICE_FUNC constexpr DiagonalWrapper<Diagonal<const Derived, DynamicIndex>> diagonalView(Index index) const;
-
  /** \returns true if each coefficients of \c *this and \a other are all exactly equal.
   * \warning When using floating point scalar values you probably should rather use a
   *          fuzzy comparison such as isApprox()
@@ -307,14 +296,14 @@ class MatrixBase : public DenseBase<Derived> {

  // TODO forceAlignedAccess is temporarily disabled
  // Need to find a nicer workaround.
-  constexpr const Derived& forceAlignedAccess() const { return derived(); }
-  constexpr Derived& forceAlignedAccess() { return derived(); }
+  inline const Derived& forceAlignedAccess() const { return derived(); }
+  inline Derived& forceAlignedAccess() { return derived(); }
  template <bool Enable>
-  constexpr const Derived& forceAlignedAccessIf() const {
+  inline const Derived& forceAlignedAccessIf() const {
    return derived();
  }
  template <bool Enable>
-  constexpr Derived& forceAlignedAccessIf() {
+  inline Derived& forceAlignedAccessIf() {
    return derived();
  }

@@ -323,31 +312,29 @@ class MatrixBase : public DenseBase<Derived> {
  template <int p>
  EIGEN_DEVICE_FUNC RealScalar lpNorm() const;

-  EIGEN_DEVICE_FUNC constexpr MatrixBase<Derived>& matrix() { return *this; }
-  EIGEN_DEVICE_FUNC constexpr const MatrixBase<Derived>& matrix() const { return *this; }
+  EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
+  EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }

  /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
   * \sa ArrayBase::matrix() */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() {
-    return ArrayWrapper<Derived>(derived());
-  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
  /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
   * \sa ArrayBase::matrix() */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const {
    return ArrayWrapper<const Derived>(derived());
  }

  /////////// LU module ///////////

  template <typename PermutationIndex = DefaultPermutationIndex>
-  inline FullPivLU<PlainObject, PermutationIndex> fullPivLu() const;
+  inline const FullPivLU<PlainObject, PermutationIndex> fullPivLu() const;
  template <typename PermutationIndex = DefaultPermutationIndex>
-  inline PartialPivLU<PlainObject, PermutationIndex> partialPivLu() const;
+  inline const PartialPivLU<PlainObject, PermutationIndex> partialPivLu() const;

  template <typename PermutationIndex = DefaultPermutationIndex>
-  inline PartialPivLU<PlainObject, PermutationIndex> lu() const;
+  inline const PartialPivLU<PlainObject, PermutationIndex> lu() const;

-  EIGEN_DEVICE_FUNC inline Inverse<Derived> inverse() const;
+  EIGEN_DEVICE_FUNC inline const Inverse<Derived> inverse() const;

  template <typename ResultType>
  inline void computeInverseAndDetWithCheck(
@@ -363,18 +350,18 @@ class MatrixBase : public DenseBase<Derived> {

  /////////// Cholesky module ///////////

-  inline LLT<PlainObject> llt() const;
-  inline LDLT<PlainObject> ldlt() const;
+  inline const LLT<PlainObject> llt() const;
+  inline const LDLT<PlainObject> ldlt() const;

  /////////// QR module ///////////

-  inline HouseholderQR<PlainObject> householderQr() const;
+  inline const HouseholderQR<PlainObject> householderQr() const;
  template <typename PermutationIndex = DefaultPermutationIndex>
-  inline ColPivHouseholderQR<PlainObject, PermutationIndex> colPivHouseholderQr() const;
+  inline const ColPivHouseholderQR<PlainObject, PermutationIndex> colPivHouseholderQr() const;
  template <typename PermutationIndex = DefaultPermutationIndex>
-  inline FullPivHouseholderQR<PlainObject, PermutationIndex> fullPivHouseholderQr() const;
+  inline const FullPivHouseholderQR<PlainObject, PermutationIndex> fullPivHouseholderQr() const;
  template <typename PermutationIndex = DefaultPermutationIndex>
-  inline CompleteOrthogonalDecomposition<PlainObject, PermutationIndex> completeOrthogonalDecomposition() const;
+  inline const CompleteOrthogonalDecomposition<PlainObject, PermutationIndex> completeOrthogonalDecomposition() const;

  /////////// Eigenvalues module ///////////

@@ -411,6 +398,7 @@ class MatrixBase : public DenseBase<Derived> {

  EIGEN_DEVICE_FUNC inline Matrix<Scalar, 3, 1> canonicalEulerAngles(Index a0, Index a1, Index a2) const;

+  // put this as separate enum value to work around possible GCC 4.3 bug (?)
  enum {
    HomogeneousReturnTypeDirection =
        ColsAtCompileTime == 1 && RowsAtCompileTime == 1
--- a/Eigen/src/Core/NestByValue.h
+++ b/Eigen/src/Core/NestByValue.h
@@ -43,24 +43,24 @@ class NestByValue : public internal::dense_xpr_base<NestByValue<ExpressionType>

  EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)

-  EIGEN_DEVICE_FUNC constexpr explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
+  EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}

  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); }
  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); }

-  EIGEN_DEVICE_FUNC constexpr operator const ExpressionType&() const { return m_expression; }
+  EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }

-  EIGEN_DEVICE_FUNC constexpr const ExpressionType& nestedExpression() const { return m_expression; }
+  EIGEN_DEVICE_FUNC const ExpressionType& nestedExpression() const { return m_expression; }

-  EIGEN_DEVICE_FUNC constexpr std::enable_if_t<HasDirectAccess, const Scalar*> data() const {
+  EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, const Scalar*>::type data() const {
    return m_expression.data();
  }

-  EIGEN_DEVICE_FUNC constexpr std::enable_if_t<HasDirectAccess, Index> innerStride() const {
+  EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, Index>::type innerStride() const {
    return m_expression.innerStride();
  }

-  EIGEN_DEVICE_FUNC constexpr std::enable_if_t<HasDirectAccess, Index> outerStride() const {
+  EIGEN_DEVICE_FUNC typename std::enable_if<HasDirectAccess, Index>::type outerStride() const {
    return m_expression.outerStride();
  }

@@ -71,7 +71,7 @@ class NestByValue : public internal::dense_xpr_base<NestByValue<ExpressionType>
 /** \returns an expression of the temporary version of *this.
 */
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr inline const NestByValue<Derived> DenseBase<Derived>::nestByValue() const {
+EIGEN_DEVICE_FUNC inline const NestByValue<Derived> DenseBase<Derived>::nestByValue() const {
  return NestByValue<Derived>(derived());
 }

@@ -82,7 +82,7 @@ template <typename ArgType>
 struct evaluator<NestByValue<ArgType> > : public evaluator<ArgType> {
  typedef evaluator<ArgType> Base;

-  EIGEN_DEVICE_FUNC constexpr explicit evaluator(const NestByValue<ArgType>& xpr) : Base(xpr.nestedExpression()) {}
+  EIGEN_DEVICE_FUNC explicit evaluator(const NestByValue<ArgType>& xpr) : Base(xpr.nestedExpression()) {}
 };
 }  // namespace internal

--- a/Eigen/src/Core/NoAlias.h
+++ b/Eigen/src/Core/NoAlias.h
@@ -35,7 +35,7 @@ class NoAlias {
 public:
  typedef typename ExpressionType::Scalar Scalar;

-  EIGEN_DEVICE_FUNC constexpr explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
+  EIGEN_DEVICE_FUNC explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}

  template <typename OtherDerived>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other) {
@@ -58,7 +58,7 @@ class NoAlias {
    return m_expression;
  }

-  EIGEN_DEVICE_FUNC constexpr ExpressionType& expression() const { return m_expression; }
+  EIGEN_DEVICE_FUNC ExpressionType& expression() const { return m_expression; }

 protected:
  ExpressionType& m_expression;
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -99,12 +99,12 @@ namespace numext {
 /** \internal bit-wise cast without changing the underlying bit representation. */
 #if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L
 template <typename Tgt, typename Src>
-EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) {
  return std::bit_cast<Tgt>(src);
 }
 #elif EIGEN_HAS_BUILTIN(__builtin_bit_cast)
 template <typename Tgt, typename Src>
-EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) {
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) {
  EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Src>::value, THIS_TYPE_IS_NOT_SUPPORTED)
  EIGEN_STATIC_ASSERT(std::is_trivially_copyable<Tgt>::value, THIS_TYPE_IS_NOT_SUPPORTED)
  EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED)
--- a/Eigen/src/Core/PartialReduxEvaluator.h
+++ b/Eigen/src/Core/PartialReduxEvaluator.h
@@ -42,12 +42,12 @@ namespace internal {
 /* logic deciding a strategy for unrolling of vectorized paths */
 template <typename Func, typename Evaluator>
 struct packetwise_redux_traits {
-  static constexpr int OuterSize =
-      int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime;
-  static constexpr int Cost = OuterSize == Dynamic
-                                  ? HugeCost
-                                  : OuterSize * Evaluator::CoeffReadCost + (OuterSize - 1) * functor_traits<Func>::Cost;
-  static constexpr int Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling;
+  enum {
+    OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,
+    Cost = OuterSize == Dynamic ? HugeCost
+                                : OuterSize * Evaluator::CoeffReadCost + (OuterSize - 1) * functor_traits<Func>::Cost,
+    Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling
+  };
 };

 /* Value to be returned when size==0 , by default let's return 0 */
@@ -70,8 +70,8 @@ struct packetwise_redux_impl;
 /* Perform the actual reduction with unrolling */
 template <typename Func, typename Evaluator>
 struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling> {
-  using Base = redux_novec_unroller<Func, Evaluator, 0, Evaluator::SizeAtCompileTime>;
-  using Scalar = typename Evaluator::Scalar;
+  typedef redux_novec_unroller<Func, Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
+  typedef typename Evaluator::Scalar Scalar;

  template <typename PacketType>
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func, Index /*size*/) {
@@ -96,8 +96,8 @@ struct redux_vec_unroller<Func, Evaluator, Start, 0> {
 /* Perform the actual reduction for dynamic sizes */
 template <typename Func, typename Evaluator>
 struct packetwise_redux_impl<Func, Evaluator, NoUnrolling> {
-  using Scalar = typename Evaluator::Scalar;
-  using PacketScalar = typename redux_traits<Func, Evaluator>::PacketType;
+  typedef typename Evaluator::Scalar Scalar;
+  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;

  template <typename PacketType>
  EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size) {
@@ -122,8 +122,8 @@ struct packetwise_redux_impl<Func, Evaluator, NoUnrolling> {

 template <typename Func, typename Evaluator>
 struct packetwise_segment_redux_impl {
-  using Scalar = typename Evaluator::Scalar;
-  using PacketScalar = typename redux_traits<Func, Evaluator>::PacketType;
+  typedef typename Evaluator::Scalar Scalar;
+  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;

  template <typename PacketType>
  EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size, Index begin,
@@ -140,16 +140,16 @@ struct packetwise_segment_redux_impl {
 template <typename ArgType, typename MemberOp, int Direction>
 struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
    : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > {
-  using XprType = PartialReduxExpr<ArgType, MemberOp, Direction>;
-  using ArgTypeNested = typename internal::nested_eval<ArgType, 1>::type;
-  using ConstArgTypeNested = add_const_on_value_type_t<ArgTypeNested>;
-  using ArgTypeNestedCleaned = internal::remove_all_t<ArgTypeNested>;
-  using InputScalar = typename ArgType::Scalar;
-  using Scalar = typename XprType::Scalar;
+  typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+  typedef typename internal::nested_eval<ArgType, 1>::type ArgTypeNested;
+  typedef add_const_on_value_type_t<ArgTypeNested> ConstArgTypeNested;
+  typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
+  typedef typename ArgType::Scalar InputScalar;
+  typedef typename XprType::Scalar Scalar;
  enum {
    TraversalSize = Direction == int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
  };
-  using CostOpType = typename MemberOp::template Cost<int(TraversalSize)>;
+  typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;
  enum {
    CoeffReadCost = TraversalSize == Dynamic ? HugeCost
                    : TraversalSize == 0
@@ -168,13 +168,13 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
    Alignment = 0  // FIXME this will need to be improved once PartialReduxExpr is vectorized
  };

-  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) {
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) {
    EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize == Dynamic ? HugeCost
                                                             : (TraversalSize == 0 ? 1 : int(CostOpType::value)));
    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
  }

-  using CoeffReturnType = typename XprType::CoeffReturnType;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const {
    return coeff(Direction == Vertical ? j : i);
@@ -203,7 +203,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
    // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of
    // panel get reversed and methods like packetByOuterInner do not make sense anymore in this context. So let's just
    // by pass "vectorization" in this case:
-    EIGEN_IF_CONSTEXPR(PacketSize == 1) return internal::pset1<PacketType>(coeff(idx));
+    if (PacketSize == 1) return internal::pset1<PacketType>(coeff(idx));

    Index startRow = Direction == Vertical ? 0 : idx;
    Index startCol = Direction == Vertical ? idx : 0;
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -159,17 +159,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
                      INVALID_MATRIX_TEMPLATE_PARAMETERS)
  EIGEN_STATIC_ASSERT(((Options & (DontAlign | RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS)

-  EIGEN_DEVICE_FUNC constexpr Base& base() { return *static_cast<Base*>(this); }
-  EIGEN_DEVICE_FUNC constexpr const Base& base() const { return *static_cast<const Base*>(this); }
+  EIGEN_DEVICE_FUNC Base& base() { return *static_cast<Base*>(this); }
+  EIGEN_DEVICE_FUNC const Base& base() const { return *static_cast<const Base*>(this); }

-  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_storage.rows(); }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_storage.cols(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_storage.rows(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_storage.cols(); }

  /** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index,Index) const
   * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
   *
   * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
-  EIGEN_DEVICE_FUNC constexpr const Scalar& coeff(Index rowId, Index colId) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index rowId, Index colId) const {
    if (Flags & RowMajorBit)
      return m_storage.data()[colId + rowId * m_storage.cols()];
    else  // column-major
@@ -180,13 +180,15 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
   * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
   *
   * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
-  EIGEN_DEVICE_FUNC constexpr const Scalar& coeff(Index index) const { return m_storage.data()[index]; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index index) const {
+    return m_storage.data()[index];
+  }

  /** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const
   * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
   *
   * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
-  EIGEN_DEVICE_FUNC constexpr Scalar& coeffRef(Index rowId, Index colId) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index rowId, Index colId) {
    if (Flags & RowMajorBit)
      return m_storage.data()[colId + rowId * m_storage.cols()];
    else  // column-major
@@ -197,11 +199,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
   * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
   *
   * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
-  EIGEN_DEVICE_FUNC constexpr Scalar& coeffRef(Index index) { return m_storage.data()[index]; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { return m_storage.data()[index]; }

  /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
   * It is provided for convenience. */
-  EIGEN_DEVICE_FUNC constexpr const Scalar& coeffRef(Index rowId, Index colId) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index rowId, Index colId) const {
    if (Flags & RowMajorBit)
      return m_storage.data()[colId + rowId * m_storage.cols()];
    else  // column-major
@@ -210,7 +212,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {

  /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).
   * It is provided for convenience. */
-  EIGEN_DEVICE_FUNC constexpr const Scalar& coeffRef(Index index) const { return m_storage.data()[index]; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index index) const {
+    return m_storage.data()[index];
+  }

  /** \internal */
  template <int LoadMode>
@@ -339,7 +343,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
   * remain row-vectors and vectors remain vectors.
   */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other) {
    const OtherDerived& other = _other.derived();
 #ifndef EIGEN_NO_DEBUG
    internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime>::run(
@@ -422,7 +426,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
  /** This is a special case of the templated operator=. Its purpose is to
   * prevent a default operator= from hiding the templated operator=.
   */
-  EIGEN_DEVICE_FUNC constexpr Derived& operator=(const PlainObjectBase& other) { return _set(other); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& operator=(const PlainObjectBase& other) {
+    return _set(other);
+  }

  /** \sa MatrixBase::lazyAssign() */
  template <typename OtherDerived>
@@ -440,9 +446,9 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
  // Prevent user from trying to instantiate PlainObjectBase objects
  // by making all its constructor protected. See bug 1074.
 protected:
-  EIGEN_DEVICE_FUNC constexpr PlainObjectBase() = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() = default;
  /** \brief Move constructor */
-  EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(PlainObjectBase&&) = default;
  /** \brief Move assignment operator */
  EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) noexcept {
    m_storage = std::move(other.m_storage);
@@ -450,7 +456,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
  }

  /** Copy constructor */
-  EIGEN_DEVICE_FUNC constexpr PlainObjectBase(const PlainObjectBase&) = default;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase&) = default;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
      : m_storage(size, rows, cols) {}

@@ -461,7 +467,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
   * This constructor is for 1D array or vectors with more than 4 coefficients.
   *
   * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this
-   * constructor must match the fixed number of rows (resp. columns) of \c *this.
+   * constructor must match the the fixed number of rows (resp. columns) of \c *this.
   */
  template <typename... ArgTypes>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2,
@@ -518,14 +524,14 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {

  /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived>& other) : m_storage() {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived>& other) : m_storage() {
    resizeLike(other);
    _set_noalias(other);
  }

  /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived>& other) : m_storage() {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived>& other) : m_storage() {
    resizeLike(other);
    *this = other.derived();
  }
@@ -685,7 +691,6 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
    eigen_assert((this->size() == 0 || (IsVectorAtCompileTime ? (this->size() == other.size())
                                                              : (rows() == other.rows() && cols() == other.cols()))) &&
                 "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
-    if (this->size() == 0) resizeLike(other);
    EIGEN_ONLY_USED_FOR_DEBUG(other);
 #else
    resizeLike(other);
@@ -709,7 +714,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
  // aliasing is dealt once in internal::call_assignment
  // so at this stage we have to assume aliasing... and resising has to be done later.
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr Derived& _set(const DenseBase<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set(const DenseBase<OtherDerived>& other) {
    internal::call_assignment(this->derived(), other.derived());
    return this->derived();
  }
@@ -720,7 +725,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
   * \sa operator=(const MatrixBase<OtherDerived>&), _set()
   */
  template <typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr Derived& _set_noalias(const DenseBase<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set_noalias(const DenseBase<OtherDerived>& other) {
    // I don't think we need this resize call since the lazyAssign will anyways resize
    // and lazyAssign will be called by the assign selector.
    //_resize_to_match(other);
@@ -732,23 +737,23 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
  }

  template <typename T0, typename T1>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init2(Index rows, Index cols,
-                                                              std::enable_if_t<Base::SizeAtCompileTime != 2, T0>* = 0) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(Index rows, Index cols,
+                                                    std::enable_if_t<Base::SizeAtCompileTime != 2, T0>* = 0) {
    EIGEN_STATIC_ASSERT(internal::is_valid_index_type<T0>::value && internal::is_valid_index_type<T1>::value,
                        T0 AND T1 MUST BE INTEGER TYPES)
    resize(rows, cols);
  }

  template <typename T0, typename T1>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1,
-                                                              std::enable_if_t<Base::SizeAtCompileTime == 2, T0>* = 0) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1,
+                                                    std::enable_if_t<Base::SizeAtCompileTime == 2, T0>* = 0) {
    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
    m_storage.data()[0] = Scalar(val0);
    m_storage.data()[1] = Scalar(val1);
  }

  template <typename T0, typename T1>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init2(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(
      const Index& val0, const Index& val1,
      std::enable_if_t<(!internal::is_same<Index, Scalar>::value) && (internal::is_same<T0, Index>::value) &&
                           (internal::is_same<T1, Index>::value) && Base::SizeAtCompileTime == 2,
@@ -761,7 +766,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
  // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array,
  // then the argument is meant to be the size of the object.
  template <typename T>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(
      Index size,
      std::enable_if_t<(Base::SizeAtCompileTime != 1 || !internal::is_convertible<T, Scalar>::value) &&
                           ((!internal::is_same<typename internal::traits<Derived>::XprKind, ArrayXpr>::value ||
@@ -777,7 +782,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
  // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar
  // type can be implicitly converted)
  template <typename T>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(
      const Scalar& val0,
      std::enable_if_t<Base::SizeAtCompileTime == 1 && internal::is_convertible<T, Scalar>::value, T>* = 0) {
    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
@@ -787,7 +792,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {
  // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar
  // type match the index type)
  template <typename T>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(
      const Index& val0,
      std::enable_if_t<(!internal::is_same<Index, Scalar>::value) && (internal::is_same<Index, T>::value) &&
                           Base::SizeAtCompileTime == 1 && internal::is_convertible<T, Scalar>::value,
@@ -798,42 +803,42 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {

  // Initialize a fixed size matrix from a pointer to raw data
  template <typename T>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const Scalar* data) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar* data) {
    this->_set_noalias(ConstMapType(data));
  }

  // Initialize an arbitrary matrix from a dense expression
  template <typename T, typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other) {
    this->_set_noalias(other);
  }

  // Initialize an arbitrary matrix from an object convertible to the Derived type.
  template <typename T>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const Derived& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Derived& other) {
    this->_set_noalias(other);
  }

  // Initialize an arbitrary matrix from a generic Eigen expression
  template <typename T, typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other) {
    this->derived() = other;
  }

  template <typename T, typename OtherDerived>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other) {
    resize(other.rows(), other.cols());
    other.evalTo(this->derived());
  }

  template <typename T, typename OtherDerived, int ColsAtCompileTime>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived, ColsAtCompileTime>& r) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived, ColsAtCompileTime>& r) {
    this->derived() = r;
  }

  // For fixed-size Array<Scalar,...>
  template <typename T>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(
      const Scalar& val0,
      std::enable_if_t<Base::SizeAtCompileTime != Dynamic && Base::SizeAtCompileTime != 1 &&
                           internal::is_convertible<T, Scalar>::value &&
@@ -844,7 +849,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type {

  // For fixed-size Array<Index,...>
  template <typename T>
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(
      const Index& val0,
      std::enable_if_t<(!internal::is_same<Index, Scalar>::value) && (internal::is_same<Index, T>::value) &&
                           Base::SizeAtCompileTime != Dynamic && Base::SizeAtCompileTime != 1 &&
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -219,16 +219,16 @@ class Product
  using TransposeReturnType = typename internal::product_transpose_helper<Lhs, Rhs, Option>::TransposeType;
  using AdjointReturnType = typename internal::product_transpose_helper<Lhs, Rhs, Option>::AdjointType;

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) {
    eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" &&
                 "if you wanted a coeff-wise or a dot product use the respective explicit functions");
  }

-  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_lhs.rows(); }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_rhs.cols(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_lhs.rows(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_rhs.cols(); }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const LhsNestedCleaned& lhs() const { return m_lhs; }
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const RhsNestedCleaned& rhs() const { return m_rhs; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNestedCleaned& lhs() const { return m_lhs; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNestedCleaned& rhs() const { return m_rhs; }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeReturnType transpose() const {
    return internal::product_transpose_helper<Lhs, Rhs, Option>::run_transpose(*this);
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -15,13 +15,6 @@
 // IWYU pragma: private
 #include "./InternalHeaderCheck.h"

-// C4804: unsafe use of type 'bool' in operation. Unavoidable in generic code
-// instantiated with bool scalars (e.g. += and * on bool).
-#if EIGEN_COMP_MSVC
-#pragma warning(push)
-#pragma warning(disable : 4804)
-#endif
-
 namespace Eigen {

 namespace internal {
@@ -36,27 +29,30 @@ namespace internal {
 */
 template <typename Lhs, typename Rhs, int Options>
 struct evaluator<Product<Lhs, Rhs, Options>> : public product_evaluator<Product<Lhs, Rhs, Options>> {
-  using XprType = Product<Lhs, Rhs, Options>;
-  using Base = product_evaluator<XprType>;
+  typedef Product<Lhs, Rhs, Options> XprType;
+  typedef product_evaluator<XprType> Base;

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
 };

 // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
-// TODO: we should apply that rule only if that's really helpful
+// TODO we should apply that rule only if that's really helpful
 template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
                                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
-                                               const Product<Lhs, Rhs, DefaultProduct>>> : std::true_type {};
+                                               const Product<Lhs, Rhs, DefaultProduct>>> {
+  static const bool value = true;
+};
 template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
 struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
                               const Product<Lhs, Rhs, DefaultProduct>>>
    : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> {
-  using XprType = CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
-                                const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
-                                const Product<Lhs, Rhs, DefaultProduct>>;
-  using Base = evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>>;
+  typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
+                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+                        const Product<Lhs, Rhs, DefaultProduct>>
+      XprType;
+  typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> Base;

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
      : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {}
@@ -65,8 +61,8 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
 template <typename Lhs, typename Rhs, int DiagIndex>
 struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex>>
    : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> {
-  using XprType = Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex>;
-  using Base = evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>>;
+  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
+  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> Base;

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
      : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
@@ -82,16 +78,18 @@ template <typename Lhs, typename Rhs, typename LhsShape = typename evaluator_tra
 struct generic_product_impl;

 template <typename Lhs, typename Rhs>
-struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct>> : std::true_type {};
+struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct>> {
+  static const bool value = true;
+};

 // This is the default evaluator implementation for products:
 // It creates a temporary and call generic_product_impl
 template <typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
 struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
    : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> {
-  using XprType = Product<Lhs, Rhs, Options>;
-  using PlainObject = typename XprType::PlainObject;
-  using Base = evaluator<PlainObject>;
+  typedef Product<Lhs, Rhs, Options> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef evaluator<PlainObject> Base;
  enum { Flags = Base::Flags | EvalBeforeNestingBit };

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
@@ -125,7 +123,7 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
 template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
 struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::assign_op<Scalar, Scalar>, Dense2Dense,
                  std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
-  using SrcXprType = Product<Lhs, Rhs, Options>;
+  typedef Product<Lhs, Rhs, Options> SrcXprType;
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
                                                        const internal::assign_op<Scalar, Scalar>&) {
    Index dstRows = src.rows();
@@ -140,7 +138,7 @@ struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::assign_op<Sc
 template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
 struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::add_assign_op<Scalar, Scalar>, Dense2Dense,
                  std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
-  using SrcXprType = Product<Lhs, Rhs, Options>;
+  typedef Product<Lhs, Rhs, Options> SrcXprType;
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
                                                        const internal::add_assign_op<Scalar, Scalar>&) {
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -153,7 +151,7 @@ struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::add_assign_o
 template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
 struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::sub_assign_op<Scalar, Scalar>, Dense2Dense,
                  std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
-  using SrcXprType = Product<Lhs, Rhs, Options>;
+  typedef Product<Lhs, Rhs, Options> SrcXprType;
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
                                                        const internal::sub_assign_op<Scalar, Scalar>&) {
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@@ -163,7 +161,7 @@ struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::sub_assign_o
 };

 // Dense ?= scalar * Product
-// TODO: we should apply that rule if that's really helpful
+// TODO we should apply that rule if that's really helpful
 // for instance, this is not good for inner products
 template <typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis,
          typename Plain>
@@ -172,9 +170,10 @@ struct Assignment<DstXprType,
                                const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
                                const Product<Lhs, Rhs, DefaultProduct>>,
                  AssignFunc, Dense2Dense> {
-  using SrcXprType = CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
-                                   const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
-                                   const Product<Lhs, Rhs, DefaultProduct>>;
+  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
+                        const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
+                        const Product<Lhs, Rhs, DefaultProduct>>
+      SrcXprType;
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
                                                        const AssignFunc& func) {
    call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs()) * src.rhs().rhs(), func);
@@ -183,21 +182,25 @@ struct Assignment<DstXprType,

 //----------------------------------------
 // Catch "Dense ?= xpr + Product<>" expression to save one temporary
-// FIXME: consider enabling these rules for all product types, not only Dense and DefaultProduct.
+// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct

 template <typename OtherXpr, typename Lhs, typename Rhs>
 struct evaluator_assume_aliasing<
    CwiseBinaryOp<
        internal::scalar_sum_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
        const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
-    DenseShape> : std::true_type {};
+    DenseShape> {
+  static const bool value = true;
+};

 template <typename OtherXpr, typename Lhs, typename Rhs>
 struct evaluator_assume_aliasing<
    CwiseBinaryOp<
        internal::scalar_difference_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
        const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
-    DenseShape> : std::true_type {};
+    DenseShape> {
+  static const bool value = true;
+};

 template <typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
 struct assignment_from_xpr_op_product {
@@ -234,17 +237,17 @@ template <typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, InnerProduct> {
  using impl = default_inner_product_impl<Lhs, Rhs, false>;
  template <typename Dst>
-  static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
    dst.coeffRef(0, 0) = impl::run(lhs, rhs);
  }

  template <typename Dst>
-  static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
    dst.coeffRef(0, 0) += impl::run(lhs, rhs);
  }

  template <typename Dst>
-  static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
    dst.coeffRef(0, 0) -= impl::run(lhs, rhs);
  }
 };
@@ -281,9 +284,9 @@ template <typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, OuterProduct> {
  template <typename T>
  struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {};
-  using Scalar = typename Product<Lhs, Rhs>::Scalar;
+  typedef typename Product<Lhs, Rhs>::Scalar Scalar;

-  // TODO: it would be nice to be able to exploit our *_assign_op functors for that purpose
+  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
  struct set {
    template <typename Dst, typename Src>
    EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
@@ -340,7 +343,7 @@ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, OuterProduct> {
 // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
 template <typename Lhs, typename Rhs, typename Derived>
 struct generic_product_impl_base {
-  using Scalar = typename Product<Lhs, Rhs>::Scalar;
+  typedef typename Product<Lhs, Rhs>::Scalar Scalar;

  template <typename Dst>
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
@@ -368,11 +371,11 @@ struct generic_product_impl_base {
 template <typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>
    : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>> {
-  using LhsNested = typename nested_eval<Lhs, 1>::type;
-  using RhsNested = typename nested_eval<Rhs, 1>::type;
-  using Scalar = typename Product<Lhs, Rhs>::Scalar;
+  typedef typename nested_eval<Lhs, 1>::type LhsNested;
+  typedef typename nested_eval<Rhs, 1>::type RhsNested;
+  typedef typename Product<Lhs, Rhs>::Scalar Scalar;
  enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
-  using MatrixType = internal::remove_all_t<std::conditional_t<int(Side) == OnTheRight, LhsNested, RhsNested>>;
+  typedef internal::remove_all_t<std::conditional_t<int(Side) == OnTheRight, LhsNested, RhsNested>> MatrixType;

  template <typename Dest>
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
@@ -393,7 +396,7 @@ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>

 template <typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {
-  using Scalar = typename Product<Lhs, Rhs>::Scalar;
+  typedef typename Product<Lhs, Rhs>::Scalar Scalar;

  template <typename Dst>
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
@@ -482,9 +485,9 @@ struct etor_product_packet_impl;
 template <typename Lhs, typename Rhs, int ProductTag>
 struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
    : evaluator_base<Product<Lhs, Rhs, LazyProduct>> {
-  using XprType = Product<Lhs, Rhs, LazyProduct>;
-  using Scalar = typename XprType::Scalar;
-  using CoeffReturnType = typename XprType::CoeffReturnType;
+  typedef Product<Lhs, Rhs, LazyProduct> XprType;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
      : m_lhs(xpr.lhs()),
@@ -497,18 +500,30 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+#if 0
+    std::cerr << "LhsOuterStrideBytes=  " << LhsOuterStrideBytes << "\n";
+    std::cerr << "RhsOuterStrideBytes=  " << RhsOuterStrideBytes << "\n";
+    std::cerr << "LhsAlignment=         " << LhsAlignment << "\n";
+    std::cerr << "RhsAlignment=         " << RhsAlignment << "\n";
+    std::cerr << "CanVectorizeLhs=      " << CanVectorizeLhs << "\n";
+    std::cerr << "CanVectorizeRhs=      " << CanVectorizeRhs << "\n";
+    std::cerr << "CanVectorizeInner=    " << CanVectorizeInner << "\n";
+    std::cerr << "EvalToRowMajor=       " << EvalToRowMajor << "\n";
+    std::cerr << "Alignment=            " << Alignment << "\n";
+    std::cerr << "Flags=                " << Flags << "\n";
+#endif
  }

  // Everything below here is taken from CoeffBasedProduct.h

-  using LhsNested = typename internal::nested_eval<Lhs, Rhs::ColsAtCompileTime>::type;
-  using RhsNested = typename internal::nested_eval<Rhs, Lhs::RowsAtCompileTime>::type;
+  typedef typename internal::nested_eval<Lhs, Rhs::ColsAtCompileTime>::type LhsNested;
+  typedef typename internal::nested_eval<Rhs, Lhs::RowsAtCompileTime>::type RhsNested;

-  using LhsNestedCleaned = internal::remove_all_t<LhsNested>;
-  using RhsNestedCleaned = internal::remove_all_t<RhsNested>;
+  typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
+  typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;

-  using LhsEtorType = evaluator<LhsNestedCleaned>;
-  using RhsEtorType = evaluator<RhsNestedCleaned>;
+  typedef evaluator<LhsNestedCleaned> LhsEtorType;
+  typedef evaluator<RhsNestedCleaned> RhsEtorType;

  enum {
    RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
@@ -518,77 +533,78 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
    MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
  };

-  using LhsVecPacketType = typename find_best_packet<Scalar, RowsAtCompileTime>::type;
-  using RhsVecPacketType = typename find_best_packet<Scalar, ColsAtCompileTime>::type;
+  typedef typename find_best_packet<Scalar, RowsAtCompileTime>::type LhsVecPacketType;
+  typedef typename find_best_packet<Scalar, ColsAtCompileTime>::type RhsVecPacketType;

-  static constexpr int LhsCoeffReadCost = LhsEtorType::CoeffReadCost;
-  static constexpr int RhsCoeffReadCost = RhsEtorType::CoeffReadCost;
-  static constexpr int CoeffReadCost =
-      InnerSize == 0 ? NumTraits<Scalar>::ReadCost
-      : InnerSize == Dynamic
-          ? HugeCost
-          : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) +
-                (InnerSize - 1) * NumTraits<Scalar>::AddCost;
+  enum {

-  static constexpr bool Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
+    LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
+    RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
+    CoeffReadCost = InnerSize == 0 ? NumTraits<Scalar>::ReadCost
+                    : InnerSize == Dynamic
+                        ? HugeCost
+                        : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) +
+                              (InnerSize - 1) * NumTraits<Scalar>::AddCost,

-  static constexpr int LhsFlags = LhsEtorType::Flags;
-  static constexpr int RhsFlags = RhsEtorType::Flags;
+    Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,

-  static constexpr int LhsRowMajor = LhsFlags & RowMajorBit;
-  static constexpr int RhsRowMajor = RhsFlags & RowMajorBit;
+    LhsFlags = LhsEtorType::Flags,
+    RhsFlags = RhsEtorType::Flags,

-  static constexpr int LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size;
-  static constexpr int RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size;
+    LhsRowMajor = LhsFlags & RowMajorBit,
+    RhsRowMajor = RhsFlags & RowMajorBit,

-  // Here, we don't care about alignment larger than the usable packet size.
-  static constexpr int LhsAlignment =
-      plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar)));
-  static constexpr int RhsAlignment =
-      plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar)));
+    LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
+    RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,

-  static constexpr bool SameType = is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value;
+    // Here, we don't care about alignment larger than the usable packet size.
+    LhsAlignment =
+        plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar))),
+    RhsAlignment =
+        plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar))),

-  static constexpr bool CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1);
-  static constexpr bool CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1);
+    SameType = is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value,

-  static constexpr int EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1
-                                        : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1)
-                                            ? 0
-                                            : (bool(RhsRowMajor) && !CanVectorizeLhs);
+    CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1),
+    CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1),

-  static constexpr int Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) |
-                               (EvalToRowMajor ? RowMajorBit : 0)
-                               // TODO: enable vectorization for mixed types
-                               | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
-                               (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0);
+    EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1
+                     : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1)
+                         ? 0
+                         : (bool(RhsRowMajor) && !CanVectorizeLhs),

-  static constexpr int LhsOuterStrideBytes =
-      int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar));
-  static constexpr int RhsOuterStrideBytes =
-      int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar));
+    Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) |
+            (EvalToRowMajor ? RowMajorBit : 0)
+            // TODO enable vectorization for mixed types
+            | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
+            (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),

-  static constexpr int Alignment =
-      bool(CanVectorizeLhs)
-          ? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0
-                 ? 0
-                 : LhsAlignment)
-      : bool(CanVectorizeRhs)
-          ? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0
-                 ? 0
-                 : RhsAlignment)
-          : 0;
+    LhsOuterStrideBytes =
+        int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
+    RhsOuterStrideBytes =
+        int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),

-  /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
-   * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
-   * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
-   * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
-   */
-  static constexpr bool CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) &&
-                                            (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) &&
-                                            (int(InnerSize) % packet_traits<Scalar>::size == 0);
+    Alignment = bool(CanVectorizeLhs)
+                    ? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0
+                           ? 0
+                           : LhsAlignment)
+                : bool(CanVectorizeRhs)
+                    ? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0
+                           ? 0
+                           : RhsAlignment)
+                    : 0,

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const {
+    /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
+     * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
+     * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
+     * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
+     */
+    CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) &&
+                        (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) &&
+                        (int(InnerSize) % packet_traits<Scalar>::size == 0)
+  };
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const {
    return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
  }

@@ -596,7 +612,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
   * which is why we don't set the LinearAccessBit.
   * TODO: this seems possible when the result is a vector
   */
-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
    const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
    const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
    return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
@@ -605,9 +621,9 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
  template <int LoadMode, typename PacketType>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index row, Index col) const {
    PacketType res;
-    using PacketImpl =
-        etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
-                                 Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>;
+    typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
+                                     Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
+        PacketImpl;
    PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
    return res;
  }
@@ -623,9 +639,9 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin,
                                                                       Index count) const {
    PacketType res;
-    using PacketImpl =
-        etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
-                                 Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>;
+    typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
+                                     Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
+        PacketImpl;
    PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count);
    return res;
  }
@@ -644,15 +660,16 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
  LhsEtorType m_lhsImpl;
  RhsEtorType m_rhsImpl;

-  variable_if_dynamic<Index, InnerSize> m_innerDim;
+  // TODO: Get rid of m_innerDim if known at compile time
+  Index m_innerDim;
 };

 template <typename Lhs, typename Rhs>
 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
    : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape> {
-  using XprType = Product<Lhs, Rhs, DefaultProduct>;
-  using BaseProduct = Product<Lhs, Rhs, LazyProduct>;
-  using Base = product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape>;
+  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
+  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
  enum { Flags = Base::Flags | EvalBeforeNestingBit };
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
      : Base(BaseProduct(xpr.lhs(), xpr.rhs())) {}
@@ -700,8 +717,8 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load

 template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> {
-  static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
-                                                                  Index /*innerDim*/, Packet& res) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
+                                                        Index /*innerDim*/, Packet& res) {
    res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))), rhs.template packet<LoadMode, Packet>(Index(0), col));
  }
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
@@ -714,8 +731,8 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> {

 template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> {
-  static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
-                                                                  Index /*innerDim*/, Packet& res) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
+                                                        Index /*innerDim*/, Packet& res) {
    res = pmul(lhs.template packet<LoadMode, Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
  }
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
@@ -795,7 +812,7 @@ struct triangular_product_impl;
 template <typename Lhs, typename Rhs, int ProductTag>
 struct generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>
    : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>> {
-  using Scalar = typename Product<Lhs, Rhs>::Scalar;
+  typedef typename Product<Lhs, Rhs>::Scalar Scalar;

  template <typename Dest>
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
@@ -807,7 +824,7 @@ struct generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>
 template <typename Lhs, typename Rhs, int ProductTag>
 struct generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>
    : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>> {
-  using Scalar = typename Product<Lhs, Rhs>::Scalar;
+  typedef typename Product<Lhs, Rhs>::Scalar Scalar;

  template <typename Dest>
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
@@ -825,7 +842,7 @@ struct selfadjoint_product_impl;
 template <typename Lhs, typename Rhs, int ProductTag>
 struct generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>
    : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>> {
-  using Scalar = typename Product<Lhs, Rhs>::Scalar;
+  typedef typename Product<Lhs, Rhs>::Scalar Scalar;

  template <typename Dest>
  static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
@@ -837,7 +854,7 @@ struct generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>
 template <typename Lhs, typename Rhs, int ProductTag>
 struct generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>
    : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>> {
-  using Scalar = typename Product<Lhs, Rhs>::Scalar;
+  typedef typename Product<Lhs, Rhs>::Scalar Scalar;

  template <typename Dest>
  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
@@ -852,7 +869,7 @@ struct generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>

 template <typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
 struct diagonal_product_evaluator_base : evaluator_base<Derived> {
-  using Scalar = typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType;
+  typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;

 public:
  enum {
@@ -879,8 +896,8 @@ struct diagonal_product_evaluator_base : evaluator_base<Derived> {
                    (ScalarAccessOnDiag_ || (bool(int(DiagFlags) & PacketAccessBit))),
    LinearAccessMask_ =
        (MatrixType::RowsAtCompileTime == 1 || MatrixType::ColsAtCompileTime == 1) ? LinearAccessBit : 0,
-    Flags = ((HereditaryBits | LinearAccessMask_) & static_cast<unsigned int>(MatrixFlags)) |
-            (Vectorizable_ ? PacketAccessBit : 0),
+    Flags =
+        ((HereditaryBits | LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
    Alignment = evaluator<MatrixType>::Alignment,

    AsScalarProduct =
@@ -896,7 +913,7 @@ struct diagonal_product_evaluator_base : evaluator_base<Derived> {
    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const {
    if (AsScalarProduct)
      return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
    else
@@ -915,9 +932,8 @@ struct diagonal_product_evaluator_base : evaluator_base<Derived> {
    enum {
      InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
      DiagonalPacketLoadMode = plain_enum_min(
-          LoadMode, ((InnerSize * int(sizeof(Scalar))) % int(unpacket_traits<PacketType>::alignment) == 0)
-                        ? int(unpacket_traits<PacketType>::alignment)
-                        : int(evaluator<DiagonalType>::Alignment))
+          LoadMode,
+          ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment))  // FIXME hardcoded 16!!
    };
    return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
                          m_diagImpl.template packet<DiagonalPacketLoadMode, PacketType>(id));
@@ -936,9 +952,8 @@ struct diagonal_product_evaluator_base : evaluator_base<Derived> {
    enum {
      InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
      DiagonalPacketLoadMode = plain_enum_min(
-          LoadMode, ((InnerSize * int(sizeof(Scalar))) % int(unpacket_traits<PacketType>::alignment) == 0)
-                        ? int(unpacket_traits<PacketType>::alignment)
-                        : int(evaluator<DiagonalType>::Alignment))
+          LoadMode,
+          ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment))  // FIXME hardcoded 16!!
    };
    return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
                          m_diagImpl.template packetSegment<DiagonalPacketLoadMode, PacketType>(id, begin, count));
@@ -953,23 +968,24 @@ template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
    : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
                                      OnTheLeft> {
-  using Base =
-      diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>;
+  typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
+                                          OnTheLeft>
+      Base;
  using Base::coeff;
  using Base::m_diagImpl;
  using Base::m_matImpl;
-  using Scalar = typename Base::Scalar;
+  typedef typename Base::Scalar Scalar;

-  using XprType = Product<Lhs, Rhs, ProductKind>;
-  using PlainObject = typename XprType::PlainObject;
-  using DiagonalType = typename Lhs::DiagonalVectorType;
+  typedef Product<Lhs, Rhs, ProductKind> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef typename Lhs::DiagonalVectorType DiagonalType;

  static constexpr int StorageOrder = Base::StorageOrder_;
  using IsRowMajor_t = bool_constant<StorageOrder == RowMajor>;

-  EIGEN_DEVICE_FUNC constexpr explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
+  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
    return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
  }

@@ -1007,22 +1023,23 @@ template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
    : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
                                      OnTheRight> {
-  using Base = diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
-                                               OnTheRight>;
+  typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
+                                          OnTheRight>
+      Base;
  using Base::coeff;
  using Base::m_diagImpl;
  using Base::m_matImpl;
-  using Scalar = typename Base::Scalar;
+  typedef typename Base::Scalar Scalar;

-  using XprType = Product<Lhs, Rhs, ProductKind>;
-  using PlainObject = typename XprType::PlainObject;
+  typedef Product<Lhs, Rhs, ProductKind> XprType;
+  typedef typename XprType::PlainObject PlainObject;

  static constexpr int StorageOrder = Base::StorageOrder_;
  using IsColMajor_t = bool_constant<StorageOrder == ColMajor>;

-  EIGEN_DEVICE_FUNC constexpr explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}
+  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
    return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
  }

@@ -1064,8 +1081,8 @@ struct permutation_matrix_product;

 template <typename ExpressionType, int Side, bool Transposed>
 struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape> {
-  using MatrixType = typename nested_eval<ExpressionType, 1>::type;
-  using MatrixTypeCleaned = remove_all_t<MatrixType>;
+  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
+  typedef remove_all_t<MatrixType> MatrixTypeCleaned;

  template <typename Dest, typename PermutationType>
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm,
@@ -1153,7 +1170,7 @@ struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, Pr
 * Products with transpositions matrices
 ***************************************************************************/

-// FIXME: consider unifying Transpositions and Permutation into a single shape.
+// FIXME could we unify Transpositions and Permutation into a single "shape"??

 /** \internal
 * \class transposition_matrix_product
@@ -1161,14 +1178,14 @@ struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, Pr
 */
 template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
 struct transposition_matrix_product {
-  using MatrixType = typename nested_eval<ExpressionType, 1>::type;
-  using MatrixTypeCleaned = remove_all_t<MatrixType>;
+  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
+  typedef remove_all_t<MatrixType> MatrixTypeCleaned;

  template <typename Dest, typename TranspositionType>
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr,
                                                        const ExpressionType& xpr) {
    MatrixType mat(xpr);
-    using StorageIndex = typename TranspositionType::StorageIndex;
+    typedef typename TranspositionType::StorageIndex StorageIndex;
    const Index size = tr.size();
    StorageIndex j = 0;

@@ -1267,8 +1284,4 @@ struct generic_product_impl<Lhs, Rhs, HomogeneousShape, PermutationShape, Produc

 }  // end namespace Eigen

-#if EIGEN_COMP_MSVC
-#pragma warning(pop)
-#endif
-
 #endif  // EIGEN_PRODUCT_EVALUATORS_H
--- a/Eigen/src/Core/Random.h
+++ b/Eigen/src/Core/Random.h
@@ -19,7 +19,7 @@ namespace internal {

 template <typename Scalar>
 struct scalar_random_op {
-  inline Scalar operator()() const { return random<Scalar>(); }
+  inline const Scalar operator()() const { return random<Scalar>(); }
 };

 template <typename Scalar>
--- a/Eigen/src/Core/RandomImpl.h
+++ b/Eigen/src/Core/RandomImpl.h
@@ -56,21 +56,19 @@ struct random_bits_impl {
  EIGEN_STATIC_ASSERT(std::is_unsigned<Scalar>::value, SCALAR MUST BE A BUILT - IN UNSIGNED INTEGER)
  using RandomDevice = eigen_random_device;
  using RandomReturnType = typename RandomDevice::ReturnType;
+  static constexpr int kEntropy = RandomDevice::Entropy;
  static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT;
-  static constexpr int kEntropy = plain_enum_min(kTotalBits, RandomDevice::Entropy);
  // return a Scalar filled with numRandomBits beginning from the least significant bit
  static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) {
    eigen_assert((numRandomBits >= 0) && (numRandomBits <= kTotalBits));
+    const Scalar mask = Scalar(-1) >> ((kTotalBits - numRandomBits) & (kTotalBits - 1));
    Scalar randomBits = 0;
-    for (int filledBits = 0; filledBits < numRandomBits; filledBits += kEntropy) {
-      Scalar r = static_cast<Scalar>(RandomDevice::run());
-      int remainingBits = numRandomBits - filledBits;
-      if (remainingBits < kEntropy) {
-        // clear the excess bits to avoid UB and rounding bias
-        r >>= kEntropy - remainingBits;
-      }
-      randomBits |= r << filledBits;
+    for (int shift = 0; shift < numRandomBits; shift += kEntropy) {
+      RandomReturnType r = RandomDevice::run();
+      randomBits |= static_cast<Scalar>(r) << shift;
    }
+    // clear the excess bits
+    randomBits &= mask;
    return randomBits;
  }
 };
@@ -206,8 +204,7 @@ struct random_int_impl<Scalar, false, true> {
 template <typename Scalar>
 struct random_int_impl<Scalar, true, true> {
  static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT;
-  // avoid implicit integral promotion to `int`
-  using BitsType = std::conditional_t<(sizeof(Scalar) < sizeof(int)), unsigned int, std::make_unsigned_t<Scalar> >;
+  using BitsType = typename make_unsigned<Scalar>::type;
  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) {
    if (y <= x) return x;
    // Avoid overflow by representing `range` as an unsigned type
--- a/Eigen/src/Core/RealView.h
+++ b/Eigen/src/Core/RealView.h
@@ -17,16 +17,20 @@ namespace Eigen {

 namespace internal {

-// Write access and vectorization requires array-oriented access to the real and imaginary components.
+// Vectorized assignment to RealView requires array-oriented access to the real and imaginary components.
 // From https://en.cppreference.com/w/cpp/numeric/complex.html:
 // For any pointer to an element of an array of std::complex<T> named p and any valid array index i,
 // reinterpret_cast<T*>(p)[2 * i] is the real part of the complex number p[i], and
 // reinterpret_cast<T*>(p)[2 * i + 1] is the imaginary part of the complex number p[i].

-template <typename T>
+template <typename ComplexScalar>
 struct complex_array_access : std::false_type {};
-template <typename T>
-struct complex_array_access<std::complex<T>> : std::true_type {};
+template <>
+struct complex_array_access<std::complex<float>> : std::true_type {};
+template <>
+struct complex_array_access<std::complex<double>> : std::true_type {};
+template <>
+struct complex_array_access<std::complex<long double>> : std::true_type {};

 template <typename Xpr>
 struct traits<RealView<Xpr>> : public traits<Xpr> {
@@ -36,17 +40,13 @@ struct traits<RealView<Xpr>> : public traits<Xpr> {
    if (size_as_int == Dynamic) return Dynamic;
    return times_two ? (2 * size_as_int) : size_as_int;
  }
-
  using Base = traits<Xpr>;
  using ComplexScalar = typename Base::Scalar;
  using Scalar = typename NumTraits<ComplexScalar>::Real;
-
-  static constexpr bool ArrayAccess = complex_array_access<ComplexScalar>::value;
-  static constexpr int ActualDirectAccessBit = ArrayAccess ? DirectAccessBit : 0;
-  static constexpr int ActualLvaluebit = !std::is_const<Xpr>::value && ArrayAccess ? LvalueBit : 0;
+  static constexpr int ActualDirectAccessBit = complex_array_access<ComplexScalar>::value ? DirectAccessBit : 0;
  static constexpr int ActualPacketAccessBit = packet_traits<Scalar>::Vectorizable ? PacketAccessBit : 0;
  static constexpr int FlagMask =
-      ActualDirectAccessBit | ActualLvaluebit | ActualPacketAccessBit | HereditaryBits | LinearAccessBit;
+      ActualDirectAccessBit | ActualPacketAccessBit | HereditaryBits | LinearAccessBit | LvalueBit;
  static constexpr int BaseFlags = int(evaluator<Xpr>::Flags) | int(Base::Flags);
  static constexpr int Flags = BaseFlags & FlagMask;
  static constexpr bool IsRowMajor = Flags & RowMajorBit;
@@ -66,63 +66,55 @@ struct evaluator<RealView<Xpr>> : private evaluator<Xpr> {
  using XprType = RealView<Xpr>;
  using ExpressionTraits = traits<XprType>;
  using ComplexScalar = typename ExpressionTraits::ComplexScalar;
+  using ComplexCoeffReturnType = typename BaseEvaluator::CoeffReturnType;
  using Scalar = typename ExpressionTraits::Scalar;

+  static constexpr bool IsRowMajor = ExpressionTraits::IsRowMajor;
  static constexpr int Flags = ExpressionTraits::Flags;
  static constexpr int CoeffReadCost = BaseEvaluator::CoeffReadCost;
  static constexpr int Alignment = BaseEvaluator::Alignment;
-  static constexpr bool IsRowMajor = ExpressionTraits::IsRowMajor;
-  static constexpr bool DirectAccess = (Flags & DirectAccessBit) != 0;
-
-  using ComplexCoeffReturnType = std::conditional_t<DirectAccess, const ComplexScalar&, ComplexScalar>;
-  using CoeffReturnType = std::conditional_t<DirectAccess, const Scalar&, Scalar>;

  EIGEN_DEVICE_FUNC explicit evaluator(XprType realView) : BaseEvaluator(realView.m_xpr) {}

-  template <bool Enable = DirectAccess, std::enable_if_t<!Enable, bool> = true>
+  template <bool Enable = std::is_reference<ComplexCoeffReturnType>::value, typename = std::enable_if_t<!Enable>>
  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const {
-    Index r = IsRowMajor ? row : row / 2;
-    Index c = IsRowMajor ? col / 2 : col;
-    bool p = (IsRowMajor ? col : row) & 1;
-    ComplexScalar ccoeff = BaseEvaluator::coeff(r, c);
-    return p ? numext::imag(ccoeff) : numext::real(ccoeff);
-  }
-  template <bool Enable = DirectAccess, std::enable_if_t<Enable, bool> = true>
-  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
-    Index r = IsRowMajor ? row : row / 2;
-    Index c = IsRowMajor ? col / 2 : col;
+    ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col);
    Index p = (IsRowMajor ? col : row) & 1;
-    ComplexCoeffReturnType ccoeff = BaseEvaluator::coeff(r, c);
-    return reinterpret_cast<const Scalar(&)[2]>(ccoeff)[p];
-  }
-  template <bool Enable = DirectAccess, std::enable_if_t<!Enable, bool> = true>
-  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const {
-    ComplexScalar ccoeff = BaseEvaluator::coeff(index / 2);
-    bool p = index & 1;
-    return p ? numext::imag(ccoeff) : numext::real(ccoeff);
-  }
-  template <bool Enable = DirectAccess, std::enable_if_t<Enable, bool> = true>
-  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
-    ComplexCoeffReturnType ccoeff = BaseEvaluator::coeff(index / 2);
-    Index p = index & 1;
-    return reinterpret_cast<const Scalar(&)[2]>(ccoeff)[p];
-  }
-  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
-    Index r = IsRowMajor ? row : row / 2;
-    Index c = IsRowMajor ? col / 2 : col;
-    Index p = (IsRowMajor ? col : row) & 1;
-    ComplexScalar& ccoeffRef = BaseEvaluator::coeffRef(r, c);
-    return reinterpret_cast<Scalar(&)[2]>(ccoeffRef)[p];
-  }
-  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
-    ComplexScalar& ccoeffRef = BaseEvaluator::coeffRef(index / 2);
-    Index p = index & 1;
-    return reinterpret_cast<Scalar(&)[2]>(ccoeffRef)[p];
+    return p ? numext::real(cscalar) : numext::imag(cscalar);
  }

-  // If the first index is odd (imaginary), discard the first scalar
-  // in 'result' and assign the missing scalar.
-  // This operation is safe as the real component of the first scalar must exist.
+  template <bool Enable = std::is_reference<ComplexCoeffReturnType>::value, typename = std::enable_if_t<Enable>>
+  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index row, Index col) const {
+    ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col);
+    Index p = (IsRowMajor ? col : row) & 1;
+    return reinterpret_cast<const Scalar(&)[2]>(cscalar)[p];
+  }
+
+  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
+    ComplexScalar& cscalar = BaseEvaluator::coeffRef(IsRowMajor ? row : row / 2, IsRowMajor ? col / 2 : col);
+    Index p = (IsRowMajor ? col : row) & 1;
+    return reinterpret_cast<Scalar(&)[2]>(cscalar)[p];
+  }
+
+  template <bool Enable = std::is_reference<ComplexCoeffReturnType>::value, typename = std::enable_if_t<!Enable>>
+  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const {
+    ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(index / 2);
+    Index p = index & 1;
+    return p ? numext::real(cscalar) : numext::imag(cscalar);
+  }
+
+  template <bool Enable = std::is_reference<ComplexCoeffReturnType>::value, typename = std::enable_if_t<Enable>>
+  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const {
+    ComplexCoeffReturnType cscalar = BaseEvaluator::coeff(index / 2);
+    Index p = index & 1;
+    return reinterpret_cast<const Scalar(&)[2]>(cscalar)[p];
+  }
+
+  constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
+    ComplexScalar& cscalar = BaseEvaluator::coeffRef(index / 2);
+    Index p = index & 1;
+    return reinterpret_cast<Scalar(&)[2]>(cscalar)[p];
+  }

  template <int LoadMode, typename PacketType>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
@@ -130,20 +122,12 @@ struct evaluator<RealView<Xpr>> : private evaluator<Xpr> {
    using ComplexPacket = typename find_packet_by_size<ComplexScalar, RealPacketSize / 2>::type;
    EIGEN_STATIC_ASSERT((find_packet_by_size<ComplexScalar, RealPacketSize / 2>::value),
                        MISSING COMPATIBLE COMPLEX PACKET TYPE)
-    Index r = IsRowMajor ? row : row / 2;
-    Index c = IsRowMajor ? col / 2 : col;
-    bool p = (IsRowMajor ? col : row) & 1;
-    ComplexPacket cresult = BaseEvaluator::template packet<LoadMode, ComplexPacket>(r, c);
-    PacketType result = preinterpret<PacketType>(cresult);
-    if (p) {
-      Scalar aux[RealPacketSize + 1];
-      pstoreu(aux, result);
-      Index lastr = IsRowMajor ? row : row + RealPacketSize - 1;
-      Index lastc = IsRowMajor ? col + RealPacketSize - 1 : col;
-      aux[RealPacketSize] = coeff(lastr, lastc);
-      result = ploadu<PacketType>(aux + 1);
-    }
-    return result;
+    eigen_assert(((IsRowMajor ? col : row) % 2 == 0) && "the inner index must be even");
+
+    Index crow = IsRowMajor ? row : row / 2;
+    Index ccol = IsRowMajor ? col / 2 : col;
+    ComplexPacket cpacket = BaseEvaluator::template packet<LoadMode, ComplexPacket>(crow, ccol);
+    return preinterpret<PacketType, ComplexPacket>(cpacket);
  }

  template <int LoadMode, typename PacketType>
@@ -152,23 +136,12 @@ struct evaluator<RealView<Xpr>> : private evaluator<Xpr> {
    using ComplexPacket = typename find_packet_by_size<ComplexScalar, RealPacketSize / 2>::type;
    EIGEN_STATIC_ASSERT((find_packet_by_size<ComplexScalar, RealPacketSize / 2>::value),
                        MISSING COMPATIBLE COMPLEX PACKET TYPE)
-    ComplexPacket cresult = BaseEvaluator::template packet<LoadMode, ComplexPacket>(index / 2);
-    PacketType result = preinterpret<PacketType>(cresult);
-    bool p = index & 1;
-    if (p) {
-      Scalar aux[RealPacketSize + 1];
-      pstoreu(aux, result);
-      aux[RealPacketSize] = coeff(index + RealPacketSize - 1);
-      result = ploadu<PacketType>(aux + 1);
-    }
-    return result;
-  }
+    eigen_assert((index % 2 == 0) && "the index must be even");

-  // The requested real packet segment forms the half-open interval [begin, end), where 'end' = 'begin' + 'count'.
-  // In order to access the underlying complex array, even indices must be aligned with the real components
-  // of the complex scalars. 'begin' and 'count' must be modified as follows:
-  // a) 'begin' must be rounded down to the nearest even number; and
-  // b) 'end' must be rounded up to the nearest even number.
+    Index cindex = index / 2;
+    ComplexPacket cpacket = BaseEvaluator::template packet<LoadMode, ComplexPacket>(cindex);
+    return preinterpret<PacketType, ComplexPacket>(cpacket);
+  }

  template <int LoadMode, typename PacketType>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
@@ -176,24 +149,15 @@ struct evaluator<RealView<Xpr>> : private evaluator<Xpr> {
    using ComplexPacket = typename find_packet_by_size<ComplexScalar, RealPacketSize / 2>::type;
    EIGEN_STATIC_ASSERT((find_packet_by_size<ComplexScalar, RealPacketSize / 2>::value),
                        MISSING COMPATIBLE COMPLEX PACKET TYPE)
-    Index actualBegin = numext::round_down(begin, 2);
-    Index actualEnd = numext::round_down(begin + count + 1, 2);
-    Index actualCount = actualEnd - actualBegin;
-    Index r = IsRowMajor ? row : row / 2;
-    Index c = IsRowMajor ? col / 2 : col;
-    ComplexPacket cresult =
-        BaseEvaluator::template packetSegment<LoadMode, ComplexPacket>(r, c, actualBegin / 2, actualCount / 2);
-    PacketType result = preinterpret<PacketType>(cresult);
-    bool p = (IsRowMajor ? col : row) & 1;
-    if (p) {
-      Scalar aux[RealPacketSize + 1] = {};
-      pstoreu(aux, result);
-      Index lastr = IsRowMajor ? row : row + actualEnd - 1;
-      Index lastc = IsRowMajor ? col + actualEnd - 1 : col;
-      aux[actualEnd] = coeff(lastr, lastc);
-      result = ploadu<PacketType>(aux + 1);
-    }
-    return result;
+    eigen_assert(((IsRowMajor ? col : row) % 2 == 0) && "the inner index must be even");
+    eigen_assert((begin % 2 == 0) && (count % 2 == 0) && "begin and count must be even");
+
+    Index crow = IsRowMajor ? row : row / 2;
+    Index ccol = IsRowMajor ? col / 2 : col;
+    Index cbegin = begin / 2;
+    Index ccount = count / 2;
+    ComplexPacket cpacket = BaseEvaluator::template packetSegment<LoadMode, ComplexPacket>(crow, ccol, cbegin, ccount);
+    return preinterpret<PacketType, ComplexPacket>(cpacket);
  }

  template <int LoadMode, typename PacketType>
@@ -202,20 +166,14 @@ struct evaluator<RealView<Xpr>> : private evaluator<Xpr> {
    using ComplexPacket = typename find_packet_by_size<ComplexScalar, RealPacketSize / 2>::type;
    EIGEN_STATIC_ASSERT((find_packet_by_size<ComplexScalar, RealPacketSize / 2>::value),
                        MISSING COMPATIBLE COMPLEX PACKET TYPE)
-    Index actualBegin = numext::round_down(begin, 2);
-    Index actualEnd = numext::round_down(begin + count + 1, 2);
-    Index actualCount = actualEnd - actualBegin;
-    ComplexPacket cresult =
-        BaseEvaluator::template packetSegment<LoadMode, ComplexPacket>(index / 2, actualBegin / 2, actualCount / 2);
-    PacketType result = preinterpret<PacketType>(cresult);
-    bool p = index & 1;
-    if (p) {
-      Scalar aux[RealPacketSize + 1] = {};
-      pstoreu(aux, result);
-      aux[actualEnd] = coeff(index + actualEnd - 1);
-      result = ploadu<PacketType>(aux + 1);
-    }
-    return result;
+    eigen_assert((index % 2 == 0) && "the index must be even");
+    eigen_assert((begin % 2 == 0) && (count % 2 == 0) && "begin and count must be even");
+
+    Index cindex = index / 2;
+    Index cbegin = begin / 2;
+    Index ccount = count / 2;
+    ComplexPacket cpacket = BaseEvaluator::template packetSegment<LoadMode, ComplexPacket>(cindex, cbegin, ccount);
+    return preinterpret<PacketType, ComplexPacket>(cpacket);
  }
 };

@@ -253,7 +211,7 @@ class RealView : public internal::dense_xpr_base<RealView<Xpr>>::type {
  EIGEN_DEVICE_FUNC RealView& operator=(const DenseBase<OtherDerived>& other);

 protected:
-  friend struct internal::evaluator<RealView>;
+  friend struct internal::evaluator<RealView<Xpr>>;
  Xpr& m_xpr;
 };

--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -101,7 +101,7 @@ struct redux_novec_unroller {

  typedef typename Evaluator::Scalar Scalar;

-  EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) {
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) {
    return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval, func),
                redux_novec_unroller<Func, Evaluator, Start + HalfLength, Length - HalfLength>::run(eval, func));
  }
@@ -114,7 +114,7 @@ struct redux_novec_unroller<Func, Evaluator, Start, 1> {

  typedef typename Evaluator::Scalar Scalar;

-  EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) {
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) {
    return eval.coeffByOuterInner(outer, inner);
  }
 };
@@ -125,7 +125,7 @@ struct redux_novec_unroller<Func, Evaluator, Start, 1> {
 template <typename Func, typename Evaluator, Index Start>
 struct redux_novec_unroller<Func, Evaluator, Start, 0> {
  typedef typename Evaluator::Scalar Scalar;
-  EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
 };

 template <typename Func, typename Evaluator, Index Start, Index Length>
@@ -134,7 +134,7 @@ struct redux_novec_linear_unroller {

  typedef typename Evaluator::Scalar Scalar;

-  EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) {
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) {
    return func(redux_novec_linear_unroller<Func, Evaluator, Start, HalfLength>::run(eval, func),
                redux_novec_linear_unroller<Func, Evaluator, Start + HalfLength, Length - HalfLength>::run(eval, func));
  }
@@ -144,7 +144,7 @@ template <typename Func, typename Evaluator, Index Start>
 struct redux_novec_linear_unroller<Func, Evaluator, Start, 1> {
  typedef typename Evaluator::Scalar Scalar;

-  EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) {
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) {
    return eval.coeff(Start);
  }
 };
@@ -155,7 +155,7 @@ struct redux_novec_linear_unroller<Func, Evaluator, Start, 1> {
 template <typename Func, typename Evaluator, Index Start>
 struct redux_novec_linear_unroller<Func, Evaluator, Start, 0> {
  typedef typename Evaluator::Scalar Scalar;
-  EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
 };

 /*** vectorization ***/
@@ -367,7 +367,7 @@ struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>

  template <typename XprType>
  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) {
-    EIGEN_ONLY_USED_FOR_DEBUG(xpr);
+    EIGEN_ONLY_USED_FOR_DEBUG(xpr)
    eigen_assert(xpr.rows() > 0 && xpr.cols() > 0 && "you are using an empty matrix");
    if (VectorizedSize > 0) {
      Scalar res = func.predux(
@@ -398,8 +398,8 @@ class redux_evaluator : public internal::evaluator<XprType_> {
  enum {
    MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
-    // TODO: we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at
-    // runtime from the evaluator
+    // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime
+    // from the evaluator
    Flags = Base::Flags & ~DirectAccessBit,
    IsRowMajor = XprType::IsRowMajor,
    SizeAtCompileTime = XprType::SizeAtCompileTime,
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -43,7 +43,7 @@ struct traits<Ref<PlainObjectType_, Options_, StrideType_> >
      OuterStrideMatch = IsVectorAtCompileTime || int(OuterStrideAtCompileTime) == int(Dynamic) ||
                         int(OuterStrideAtCompileTime) == int(Derived::OuterStrideAtCompileTime),
      // NOTE, this indirection of evaluator<Derived>::Alignment is needed
-      // to work around an MSVC bug related to the instantiation
+      // to workaround a very strange bug in MSVC related to the instantiation
      // of has_*ary_operator in evaluator<CwiseNullaryOp>.
      // This line is surprisingly very sensitive. For instance, simply adding parenthesis
      // as "DerivedAlignment = (int(evaluator<Derived>::Alignment))," will make MSVC fail...
@@ -265,7 +265,7 @@ class Ref : public RefBase<Ref<PlainObjectType, Options, StrideType> > {
 private:
  typedef internal::traits<Ref> Traits;
  template <typename Derived>
-  EIGEN_DEVICE_FUNC constexpr inline Ref(
+  EIGEN_DEVICE_FUNC inline Ref(
      const PlainObjectBase<Derived>& expr,
      std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime), Derived>* = 0);

@@ -275,17 +275,17 @@ class Ref : public RefBase<Ref<PlainObjectType, Options, StrideType> > {

 #ifndef EIGEN_PARSED_BY_DOXYGEN
  template <typename Derived>
-  EIGEN_DEVICE_FUNC constexpr inline Ref(
+  EIGEN_DEVICE_FUNC inline Ref(
      PlainObjectBase<Derived>& expr,
      std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime), Derived>* = 0) {
    EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
    // Construction must pass since we will not create temporary storage in the non-const case.
    const bool success = Base::construct(expr.derived());
-    EIGEN_UNUSED_VARIABLE(success);
+    EIGEN_UNUSED_VARIABLE(success)
    eigen_assert(success);
  }
  template <typename Derived>
-  EIGEN_DEVICE_FUNC constexpr inline Ref(
+  EIGEN_DEVICE_FUNC inline Ref(
      const DenseBase<Derived>& expr,
      std::enable_if_t<bool(Traits::template match<Derived>::MatchAtCompileTime), Derived>* = 0)
 #else
@@ -299,7 +299,7 @@ class Ref : public RefBase<Ref<PlainObjectType, Options, StrideType> > {
    EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase, THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
    // Construction must pass since we will not create temporary storage in the non-const case.
    const bool success = Base::construct(expr.const_cast_derived());
-    EIGEN_UNUSED_VARIABLE(success);
+    EIGEN_UNUSED_VARIABLE(success)
    eigen_assert(success);
  }

@@ -327,9 +327,8 @@ class Ref<const TPlainObjectType, Options, StrideType>
  EIGEN_DENSE_PUBLIC_INTERFACE(Ref)

  template <typename Derived>
-  EIGEN_DEVICE_FUNC constexpr inline Ref(
-      const DenseBase<Derived>& expr,
-      std::enable_if_t<bool(Traits::template match<Derived>::ScalarTypeMatch), Derived>* = 0) {
+  EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
+                               std::enable_if_t<bool(Traits::template match<Derived>::ScalarTypeMatch), Derived>* = 0) {
    //      std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << ","
    //      << match_helper<Derived>::InnerStrideMatch << "\n"; std::cout << int(StrideType::OuterStrideAtCompileTime)
    //      << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; std::cout <<
@@ -339,11 +338,11 @@ class Ref<const TPlainObjectType, Options, StrideType>
    construct(expr.derived(), typename Traits::template match<Derived>::type());
  }

-  EIGEN_DEVICE_FUNC constexpr inline Ref(const Ref& other) : Base(other) {
+  EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) {
    // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
  }

-  EIGEN_DEVICE_FUNC constexpr inline Ref(Ref&& other) {
+  EIGEN_DEVICE_FUNC inline Ref(Ref&& other) {
    if (other.data() == other.m_object.data()) {
      m_object = std::move(other.m_object);
      Base::construct(m_object);
@@ -352,7 +351,7 @@ class Ref<const TPlainObjectType, Options, StrideType>
  }

  template <typename OtherRef>
-  EIGEN_DEVICE_FUNC constexpr inline Ref(const RefBase<OtherRef>& other) {
+  EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) {
    EIGEN_STATIC_ASSERT(Traits::template match<OtherRef>::type::value || may_map_m_object_successfully,
                        STORAGE_LAYOUT_DOES_NOT_MATCH);
    construct(other.derived(), typename Traits::template match<OtherRef>::type());
@@ -371,7 +370,7 @@ class Ref<const TPlainObjectType, Options, StrideType>
  EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) {
    internal::call_assignment_no_alias(m_object, expr, internal::assign_op<Scalar, Scalar>());
    const bool success = Base::construct(m_object);
-    EIGEN_ONLY_USED_FOR_DEBUG(success);
+    EIGEN_ONLY_USED_FOR_DEBUG(success)
    eigen_assert(success);
  }

--- a/Eigen/src/Core/Replicate.h
+++ b/Eigen/src/Core/Replicate.h
@@ -30,7 +30,7 @@ struct traits<Replicate<MatrixType, RowFactor, ColFactor> > : traits<MatrixType>
    ColsAtCompileTime = ColFactor == Dynamic || int(MatrixType::ColsAtCompileTime) == Dynamic
                            ? Dynamic
                            : ColFactor * MatrixType::ColsAtCompileTime,
-    // FIXME: propagate MaxRowsAtCompileTime and MaxColsAtCompileTime.
+    // FIXME we don't propagate the max sizes !!!
    MaxRowsAtCompileTime = RowsAtCompileTime,
    MaxColsAtCompileTime = ColsAtCompileTime,
    IsRowMajor = MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1   ? 1
@@ -38,7 +38,7 @@ struct traits<Replicate<MatrixType, RowFactor, ColFactor> > : traits<MatrixType>
                 : (MatrixType::Flags & RowMajorBit)                      ? 1
                                                                          : 0,

-    // FIXME: consider enabling DirectAccess with negative strides.
+    // FIXME enable DirectAccess with negative strides?
    Flags = IsRowMajor ? RowMajorBit : 0
  };
 };
@@ -71,7 +71,7 @@ class Replicate : public internal::dense_xpr_base<Replicate<MatrixType, RowFacto
  typedef internal::remove_all_t<MatrixType> NestedExpression;

  template <typename OriginalMatrixType>
-  EIGEN_DEVICE_FUNC constexpr inline explicit Replicate(const OriginalMatrixType& matrix)
+  EIGEN_DEVICE_FUNC inline explicit Replicate(const OriginalMatrixType& matrix)
      : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) {
    EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>, OriginalMatrixType>::value),
                        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
@@ -79,7 +79,7 @@ class Replicate : public internal::dense_xpr_base<Replicate<MatrixType, RowFacto
  }

  template <typename OriginalMatrixType>
-  EIGEN_DEVICE_FUNC constexpr inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
+  EIGEN_DEVICE_FUNC inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
      : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) {
    EIGEN_STATIC_ASSERT((internal::is_same<std::remove_const_t<MatrixType>, OriginalMatrixType>::value),
                        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
@@ -88,7 +88,7 @@ class Replicate : public internal::dense_xpr_base<Replicate<MatrixType, RowFacto
  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_matrix.cols() * m_colFactor.value(); }

-  EIGEN_DEVICE_FUNC constexpr const MatrixTypeNested_& nestedExpression() const { return m_matrix; }
+  EIGEN_DEVICE_FUNC const MatrixTypeNested_& nestedExpression() const { return m_matrix; }

 protected:
  MatrixTypeNested m_matrix;
--- a/Eigen/src/Core/Reshaped.h
+++ b/Eigen/src/Core/Reshaped.h
@@ -107,7 +107,7 @@ class Reshaped : public ReshapedImpl<XprType, Rows, Cols, Order, typename intern

  /** Fixed-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr inline Reshaped(XprType& xpr) : Impl(xpr) {
+  EIGEN_DEVICE_FUNC inline Reshaped(XprType& xpr) : Impl(xpr) {
    EIGEN_STATIC_ASSERT(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic,
                        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
    eigen_assert(Rows * Cols == xpr.rows() * xpr.cols());
@@ -115,7 +115,7 @@ class Reshaped : public ReshapedImpl<XprType, Rows, Cols, Order, typename intern

  /** Dynamic-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr inline Reshaped(XprType& xpr, Index reshapeRows, Index reshapeCols)
+  EIGEN_DEVICE_FUNC inline Reshaped(XprType& xpr, Index reshapeRows, Index reshapeCols)
      : Impl(xpr, reshapeRows, reshapeCols) {
    eigen_assert((RowsAtCompileTime == Dynamic || RowsAtCompileTime == reshapeRows) &&
                 (ColsAtCompileTime == Dynamic || ColsAtCompileTime == reshapeCols));
@@ -136,8 +136,8 @@ class ReshapedImpl<XprType, Rows, Cols, Order, Dense>
 public:
  typedef Impl Base;
  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl)
-  EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl(XprType& xpr) : Impl(xpr) {}
-  EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl(XprType& xpr, Index reshapeRows, Index reshapeCols)
+  EIGEN_DEVICE_FUNC inline ReshapedImpl(XprType& xpr) : Impl(xpr) {}
+  EIGEN_DEVICE_FUNC inline ReshapedImpl(XprType& xpr, Index reshapeRows, Index reshapeCols)
      : Impl(xpr, reshapeRows, reshapeCols) {}
 };

@@ -161,15 +161,15 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, false>

  /** Fixed-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl_dense(XprType& xpr) : m_xpr(xpr), m_rows(Rows), m_cols(Cols) {}
+  EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr) : m_xpr(xpr), m_rows(Rows), m_cols(Cols) {}

  /** Dynamic-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols)
+  EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols)
      : m_xpr(xpr), m_rows(nRows), m_cols(nCols) {}

-  EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; }
-  EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; }
+  EIGEN_DEVICE_FUNC Index rows() const { return m_rows; }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_cols; }

 #ifdef EIGEN_PARSED_BY_DOXYGEN
  /** \sa MapBase::data() */
@@ -179,10 +179,10 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, false>
 #endif

  /** \returns the nested expression */
-  EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t<XprType>& nestedExpression() const { return m_xpr; }
+  EIGEN_DEVICE_FUNC const internal::remove_all_t<XprType>& nestedExpression() const { return m_xpr; }

  /** \returns the nested expression */
-  EIGEN_DEVICE_FUNC constexpr std::remove_reference_t<XprType>& nestedExpression() { return m_xpr; }
+  EIGEN_DEVICE_FUNC std::remove_reference_t<XprType>& nestedExpression() { return m_xpr; }

 protected:
  MatrixTypeNested m_xpr;
@@ -203,16 +203,16 @@ class ReshapedImpl_dense<XprType, Rows, Cols, Order, true> : public MapBase<Resh

  /** Fixed-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl_dense(XprType& xpr) : Base(xpr.data()), m_xpr(xpr) {}
+  EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr) : Base(xpr.data()), m_xpr(xpr) {}

  /** Dynamic-size constructor
   */
-  EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols)
+  EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols)
      : Base(xpr.data(), nRows, nCols), m_xpr(xpr) {}

-  EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t<XprTypeNested>& nestedExpression() const { return m_xpr; }
+  EIGEN_DEVICE_FUNC const internal::remove_all_t<XprTypeNested>& nestedExpression() const { return m_xpr; }

-  EIGEN_DEVICE_FUNC constexpr XprType& nestedExpression() { return m_xpr; }
+  EIGEN_DEVICE_FUNC XprType& nestedExpression() { return m_xpr; }

  /** \sa MapBase::innerStride() */
  EIGEN_DEVICE_FUNC constexpr Index innerStride() const { return m_xpr.innerStride(); }
@@ -265,7 +265,7 @@ struct evaluator<Reshaped<ArgType, Rows, Cols, Order> >
    Alignment = evaluator<ArgType>::Alignment
  };
  typedef reshaped_evaluator<ArgType, Rows, Cols, Order, HasDirectAccess> reshaped_evaluator_type;
-  EIGEN_DEVICE_FUNC constexpr explicit evaluator(const XprType& xpr) : reshaped_evaluator_type(xpr) {
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : reshaped_evaluator_type(xpr) {
    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
  }
 };
@@ -283,8 +283,7 @@ struct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ fals
    Alignment = 0
  };

-  EIGEN_DEVICE_FUNC constexpr explicit reshaped_evaluator(const XprType& xpr)
-      : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) {
+  EIGEN_DEVICE_FUNC explicit reshaped_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) {
    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
  }

@@ -293,7 +292,7 @@ struct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ fals

  typedef std::pair<Index, Index> RowCol;

-  EIGEN_DEVICE_FUNC constexpr inline RowCol index_remap(Index rowId, Index colId) const {
+  EIGEN_DEVICE_FUNC inline RowCol index_remap(Index rowId, Index colId) const {
    if (Order == ColMajor) {
      const Index nth_elem_idx = colId * m_xpr.rows() + rowId;
      return RowCol(nth_elem_idx % m_xpr.nestedExpression().rows(), nth_elem_idx / m_xpr.nestedExpression().rows());
@@ -303,38 +302,74 @@ struct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ fals
    }
  }

-  EIGEN_DEVICE_FUNC constexpr inline Scalar& coeffRef(Index rowId, Index colId) {
+  EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) {
    EIGEN_STATIC_ASSERT_LVALUE(XprType)
    const RowCol row_col = index_remap(rowId, colId);
    return m_argImpl.coeffRef(row_col.first, row_col.second);
  }

-  EIGEN_DEVICE_FUNC constexpr inline const Scalar& coeffRef(Index rowId, Index colId) const {
+  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const {
    const RowCol row_col = index_remap(rowId, colId);
    return m_argImpl.coeffRef(row_col.first, row_col.second);
  }

-  EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const {
    const RowCol row_col = index_remap(rowId, colId);
    return m_argImpl.coeff(row_col.first, row_col.second);
  }

-  EIGEN_DEVICE_FUNC constexpr inline Scalar& coeffRef(Index index) {
+  EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) {
    EIGEN_STATIC_ASSERT_LVALUE(XprType)
    const RowCol row_col = index_remap(Rows == 1 ? 0 : index, Rows == 1 ? index : 0);
    return m_argImpl.coeffRef(row_col.first, row_col.second);
  }

-  EIGEN_DEVICE_FUNC constexpr inline const Scalar& coeffRef(Index index) const {
+  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const {
    const RowCol row_col = index_remap(Rows == 1 ? 0 : index, Rows == 1 ? index : 0);
    return m_argImpl.coeffRef(row_col.first, row_col.second);
  }

-  EIGEN_DEVICE_FUNC constexpr inline const CoeffReturnType coeff(Index index) const {
+  EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const {
    const RowCol row_col = index_remap(Rows == 1 ? 0 : index, Rows == 1 ? index : 0);
    return m_argImpl.coeff(row_col.first, row_col.second);
  }
+#if 0
+  EIGEN_DEVICE_FUNC
+  template<int LoadMode>
+  inline PacketScalar packet(Index rowId, Index colId) const
+  {
+    const RowCol row_col = index_remap(rowId, colId);
+    return m_argImpl.template packet<Unaligned>(row_col.first, row_col.second);

+  }
+
+  template<int LoadMode>
+  EIGEN_DEVICE_FUNC
+  inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+  {
+    const RowCol row_col = index_remap(rowId, colId);
+    m_argImpl.const_cast_derived().template writePacket<Unaligned>
+            (row_col.first, row_col.second, val);
+  }
+
+  template<int LoadMode>
+  EIGEN_DEVICE_FUNC
+  inline PacketScalar packet(Index index) const
+  {
+    const RowCol row_col = index_remap(RowsAtCompileTime == 1 ? 0 : index,
+                                        RowsAtCompileTime == 1 ? index : 0);
+    return m_argImpl.template packet<Unaligned>(row_col.first, row_col.second);
+  }
+
+  template<int LoadMode>
+  EIGEN_DEVICE_FUNC
+  inline void writePacket(Index index, const PacketScalar& val)
+  {
+    const RowCol row_col = index_remap(RowsAtCompileTime == 1 ? 0 : index,
+                                        RowsAtCompileTime == 1 ? index : 0);
+    return m_argImpl.template packet<Unaligned>(row_col.first, row_col.second, val);
+  }
+#endif
 protected:
  evaluator<ArgType> m_argImpl;
  const XprType& m_xpr;
@@ -347,7 +382,7 @@ struct reshaped_evaluator<ArgType, Rows, Cols, Order, /* HasDirectAccess */ true
  typedef Reshaped<ArgType, Rows, Cols, Order> XprType;
  typedef typename XprType::Scalar Scalar;

-  EIGEN_DEVICE_FUNC constexpr explicit reshaped_evaluator(const XprType& xpr)
+  EIGEN_DEVICE_FUNC explicit reshaped_evaluator(const XprType& xpr)
      : mapbase_evaluator<XprType, typename XprType::PlainObject>(xpr) {
    // TODO: for the 3.4 release, this should be turned to an internal assertion, but let's keep it as is for the beta
    // lifetime
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -23,7 +23,7 @@ struct traits<ReturnByValue<Derived> > : public traits<typename traits<Derived>:
  enum {
    // We're disabling the DirectAccess because e.g. the constructor of
    // the Block-with-DirectAccess expression requires to have a coeffRef method.
-    // Also, this avoids having to implement stride support.
+    // Also, we don't want to have to implement the stride stuff.
    Flags = (traits<typename traits<Derived>::ReturnType>::Flags | EvalBeforeNestingBit) & ~DirectAccessBit
  };
 };
@@ -32,7 +32,7 @@ struct traits<ReturnByValue<Derived> > : public traits<typename traits<Derived>:
 * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
 * So internal::nested always gives the plain return matrix type.
 *
- * FIXME: this specialization may be redundant with EvalBeforeNestingBit.
+ * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
 * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
 */
 template <typename Derived, int n, typename PlainObject>
--- a/Eigen/src/Core/Reverse.h
+++ b/Eigen/src/Core/Reverse.h
@@ -83,7 +83,7 @@ class Reverse : public internal::dense_xpr_base<Reverse<MatrixType, Direction> >
  typedef internal::reverse_packet_cond<PacketScalar, ReversePacket> reverse_packet;

 public:
-  EIGEN_DEVICE_FUNC constexpr explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) {}
+  EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) {}

  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)

@@ -92,7 +92,7 @@ class Reverse : public internal::dense_xpr_base<Reverse<MatrixType, Direction> >

  EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); }

-  EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t<typename MatrixType::Nested>& nestedExpression() const {
+  EIGEN_DEVICE_FUNC const internal::remove_all_t<typename MatrixType::Nested>& nestedExpression() const {
    return m_matrix;
  }

--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@@ -45,7 +45,7 @@ using Select = CwiseTernaryOp<internal::scalar_boolean_select_op<typename DenseB
 */
 template <typename Derived>
 template <typename ThenDerived, typename ElseDerived>
-inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp<
+inline EIGEN_DEVICE_FUNC CwiseTernaryOp<
    internal::scalar_boolean_select_op<typename DenseBase<ThenDerived>::Scalar, typename DenseBase<ElseDerived>::Scalar,
                                       typename DenseBase<Derived>::Scalar>,
    ThenDerived, ElseDerived, Derived>
@@ -59,7 +59,7 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix, const Dense
 */
 template <typename Derived>
 template <typename ThenDerived>
-inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp<
+inline EIGEN_DEVICE_FUNC CwiseTernaryOp<
    internal::scalar_boolean_select_op<typename DenseBase<ThenDerived>::Scalar, typename DenseBase<ThenDerived>::Scalar,
                                       typename DenseBase<Derived>::Scalar>,
    ThenDerived, typename DenseBase<ThenDerived>::ConstantReturnType, Derived>
@@ -76,7 +76,7 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
 */
 template <typename Derived>
 template <typename ElseDerived>
-inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp<
+inline EIGEN_DEVICE_FUNC CwiseTernaryOp<
    internal::scalar_boolean_select_op<typename DenseBase<ElseDerived>::Scalar, typename DenseBase<ElseDerived>::Scalar,
                                       typename DenseBase<Derived>::Scalar>,
    typename DenseBase<ElseDerived>::ConstantReturnType, ElseDerived, Derived>
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -219,8 +219,8 @@ class SelfAdjointView : public TriangularBase<SelfAdjointView<MatrixType_, UpLo>

  /////////// Cholesky module ///////////

-  LLT<PlainObject, UpLo> llt() const;
-  LDLT<PlainObject, UpLo> ldlt() const;
+  const LLT<PlainObject, UpLo> llt() const;
+  const LDLT<PlainObject, UpLo> ldlt() const;

  /////////// Eigenvalue module ///////////

@@ -236,6 +236,14 @@ class SelfAdjointView : public TriangularBase<SelfAdjointView<MatrixType_, UpLo>
  MatrixTypeNested m_matrix;
 };

+// template<typename OtherDerived, typename MatrixType, unsigned int UpLo>
+// internal::selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >
+// operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView<MatrixType,UpLo>& rhs)
+// {
+//   return internal::matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo>
+//   >(lhs.derived(),rhs);
+// }
+
 // selfadjoint to dense matrix

 namespace internal {
@@ -280,14 +288,6 @@ class triangular_dense_assignment_kernel<UpLo, SelfAdjoint, SetOpposite, DstEval
    m_functor.assignCoeff(m_dst.coeffRef(col, row), numext::conj(tmp));
  }

-  // Override to ensure the SelfAdjoint assignCoeff (which mirrors conjugates) is called,
-  // not the base class version (which is a plain copy).
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) {
-    Index row = Base::rowIndexByOuterInner(outer, inner);
-    Index col = Base::colIndexByOuterInner(outer, inner);
-    assignCoeff(row, col);
-  }
-
  EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) { Base::assignCoeff(id, id); }

  EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index) { eigen_internal_assert(false && "should never be called"); }
@@ -302,7 +302,7 @@ class triangular_dense_assignment_kernel<UpLo, SelfAdjoint, SetOpposite, DstEval
 /** This is the const version of MatrixBase::selfadjointView() */
 template <typename Derived>
 template <unsigned int UpLo>
-EIGEN_DEVICE_FUNC constexpr typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
+EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
 MatrixBase<Derived>::selfadjointView() const {
  return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
 }
@@ -319,7 +319,7 @@ MatrixBase<Derived>::selfadjointView() const {
 */
 template <typename Derived>
 template <unsigned int UpLo>
-EIGEN_DEVICE_FUNC constexpr typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
+EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
 MatrixBase<Derived>::selfadjointView() {
  return typename SelfAdjointViewReturnType<UpLo>::Type(derived());
 }
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -16,7 +16,7 @@
 namespace Eigen {

 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) {
  using ConstantExpr = typename internal::plain_constant_type<Derived, Scalar>::type;
  using Op = internal::mul_assign_op<Scalar>;
  internal::call_assignment(derived(), ConstantExpr(rows(), cols(), other), Op());
@@ -25,13 +25,13 @@ EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::ope

 template <typename Derived>
 template <bool Enable, typename>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const RealScalar& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const RealScalar& other) {
  realView() *= other;
  return derived();
 }

 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) {
  using ConstantExpr = typename internal::plain_constant_type<Derived, Scalar>::type;
  using Op = internal::div_assign_op<Scalar>;
  internal::call_assignment(derived(), ConstantExpr(rows(), cols(), other), Op());
@@ -40,7 +40,7 @@ EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::ope

 template <typename Derived>
 template <bool Enable, typename>
-EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const RealScalar& other) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const RealScalar& other) {
  realView() /= other;
  return derived();
 }
--- a/Eigen/src/Core/SkewSymmetricMatrix3.h
+++ b/Eigen/src/Core/SkewSymmetricMatrix3.h
@@ -62,7 +62,7 @@ class SkewSymmetricBase : public EigenBase<Derived> {
  /**
   * Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type,
   * not an expression.
-   * \returns A dense matrix, with its entries set from the derived object. */
+   * \returns A dense matrix, with its entries set from the the derived object. */
  EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); }

  /** Determinant vanishes */
@@ -308,7 +308,7 @@ class SkewSymmetricWrapper : public SkewSymmetricBase<SkewSymmetricWrapper<SkewS
 * \sa class SkewSymmetricWrapper, class SkewSymmetricMatrix3, vector(), isSkewSymmetric()
 **/
 template <typename Derived>
-EIGEN_DEVICE_FUNC constexpr const SkewSymmetricWrapper<const Derived> MatrixBase<Derived>::asSkewSymmetric() const {
+EIGEN_DEVICE_FUNC inline const SkewSymmetricWrapper<const Derived> MatrixBase<Derived>::asSkewSymmetric() const {
  return SkewSymmetricWrapper<const Derived>(derived());
 }

--- a/Eigen/src/Core/Solve.h
+++ b/Eigen/src/Core/Solve.h
@@ -69,8 +69,8 @@ class Solve : public SolveImpl<Decomposition, RhsType, typename internal::traits
  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dec.cols(); }
  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_rhs.cols(); }

-  EIGEN_DEVICE_FUNC constexpr const Decomposition &dec() const { return m_dec; }
-  EIGEN_DEVICE_FUNC constexpr const RhsType &rhs() const { return m_rhs; }
+  EIGEN_DEVICE_FUNC const Decomposition &dec() const { return m_dec; }
+  EIGEN_DEVICE_FUNC const RhsType &rhs() const { return m_rhs; }

 protected:
  const Decomposition &m_dec;
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -53,11 +53,10 @@ struct triangular_solver_selector<Lhs, Rhs, Side, Mode, NoUnrolling, 1> {
  typedef typename Lhs::Scalar LhsScalar;
  typedef typename Rhs::Scalar RhsScalar;
  typedef blas_traits<Lhs> LhsProductTraits;
-  typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
-  typedef remove_all_t<ActualLhsType> ActualLhsTypeCleaned;
+  typedef typename LhsProductTraits::ExtractType ActualLhsType;
  typedef Map<Matrix<RhsScalar, Dynamic, 1>, Aligned> MappedRhs;
  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs) {
-    add_const_on_value_type_t<ActualLhsType> actualLhs = LhsProductTraits::extract(lhs);
+    ActualLhsType actualLhs = LhsProductTraits::extract(lhs);

    // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1

@@ -68,11 +67,10 @@ struct triangular_solver_selector<Lhs, Rhs, Side, Mode, NoUnrolling, 1> {
    if (!useRhsDirectly) MappedRhs(actualRhs, rhs.size()) = rhs;

    triangular_solve_vector<LhsScalar, RhsScalar, Index, Side, Mode, LhsProductTraits::NeedToConjugate,
-                            (int(ActualLhsTypeCleaned::Flags) & RowMajorBit) ? RowMajor
-                                                                             : ColMajor>::run(actualLhs.cols(),
-                                                                                              actualLhs.data(),
-                                                                                              actualLhs.outerStride(),
-                                                                                              actualRhs);
+                            (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>::run(actualLhs.cols(),
+                                                                                        actualLhs.data(),
+                                                                                        actualLhs.outerStride(),
+                                                                                        actualRhs);

    if (!useRhsDirectly) rhs = MappedRhs(actualRhs, rhs.size());
  }
@@ -183,15 +181,11 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::solveInPlace
  if (derived().cols() == 0) return;

  enum {
-    OtherFlags = internal::traits<OtherDerived>::Flags,
-    IsRowMajorVector =
-        (OtherFlags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime != 1,
-    copy = IsRowMajorVector || ((OtherFlags & DirectAccessBit) == 0)
+    copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime &&
+           OtherDerived::SizeAtCompileTime != 1
  };
-  typedef std::conditional_t<IsRowMajorVector, typename internal::plain_matrix_type_column_major<OtherDerived>::type,
-                             typename internal::plain_matrix_type<OtherDerived>::type>
-      OtherPlainObject;
-  typedef std::conditional_t<copy, OtherPlainObject, OtherDerived&> OtherCopy;
+  typedef std::conditional_t<copy, typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>
+      OtherCopy;
  OtherCopy otherCopy(other);

  internal::triangular_solver_selector<MatrixType, std::remove_reference_t<OtherCopy>, Side, Mode>::run(
--- a/Eigen/src/Core/SolverBase.h
+++ b/Eigen/src/Core/SolverBase.h
@@ -111,7 +111,7 @@ class SolverBase : public EigenBase<Derived> {
  /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
   */
  template <typename Rhs>
-  inline Solve<Derived, Rhs> solve(const MatrixBase<Rhs>& b) const {
+  inline const Solve<Derived, Rhs> solve(const MatrixBase<Rhs>& b) const {
    internal::solve_assertion<internal::remove_all_t<Derived>>::template run<false>(derived(), b);
    return Solve<Derived, Rhs>(derived(), b.derived());
  }
--- a/Show More
+++ b/Show More