Replace blas/f2c with clean C++ implementations

libeigen/eigen!2402 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
2026-04-10 11:34:33 +08:00 · 2026-04-05 16:04:41 -07:00
parent fe6ada10be
commit 4ad90a60f1
29 changed files with 1170 additions and 6371 deletions
--- a/benchmarks/BLAS/CMakeLists.txt
+++ b/benchmarks/BLAS/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Benchmarks for Eigen's built-in BLAS implementation.
+# Compiles the Eigen BLAS sources directly into the benchmark executable
+# so there is no external BLAS dependency.
+
+set(EIGEN_BLAS_SRCS
+  ${EIGEN_SOURCE_DIR}/blas/single.cpp
+  ${EIGEN_SOURCE_DIR}/blas/double.cpp
+  ${EIGEN_SOURCE_DIR}/blas/complex_single.cpp
+  ${EIGEN_SOURCE_DIR}/blas/complex_double.cpp
+  ${EIGEN_SOURCE_DIR}/blas/xerbla.cpp
+  ${EIGEN_SOURCE_DIR}/blas/lsame.cpp
+  ${EIGEN_SOURCE_DIR}/blas/complexdots.cpp
+)
+
+eigen_add_benchmark(bench_blas bench_blas.cpp)
+target_sources(bench_blas PRIVATE ${EIGEN_BLAS_SRCS})
+target_include_directories(bench_blas PRIVATE ${EIGEN_SOURCE_DIR}/blas)
--- a/benchmarks/BLAS/bench_blas.cpp
+++ b/benchmarks/BLAS/bench_blas.cpp
@@ -0,0 +1,488 @@
+// Benchmark for Eigen's BLAS implementation.
+//
+// Calls the Eigen BLAS C interface directly (the extern "C" functions defined
+// in blas/{single,double,complex_single,complex_double}.cpp).
+//
+// Covers Level 1, 2, and 3 routines — with emphasis on the routines that
+// were recently rewritten from f2c to C++: rotm, rotmg, spmv, sbmv, hbmv,
+// hpmv, tbmv, lsame, and complex dot products.
+
+#include <benchmark/benchmark.h>
+
+#include <Eigen/Core>
+#include <complex>
+#include <vector>
+
+#include "blas/blas.h"
+
+using Eigen::Index;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+// Flop-rate counter (units = individual flops per call).
+static benchmark::Counter GflopsCounter(double flops) {
+  return benchmark::Counter(flops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000);
+}
+
+// Fill a vector with random values in [-1, 1].
+template <typename T>
+static void fillRand(T* data, Index n) {
+  Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1>>(data, n).setRandom();
+}
+
+// Fill a symmetric band matrix A in BLAS band storage (column-major).
+// Upper triangle: A[i,j] stored at a[(k+i-j) + j*lda], 0 <= j-i <= k.
+template <typename T>
+static void fillSymBandUpper(T* a, int n, int k, int lda) {
+  std::fill(a, a + lda * n, T(0));
+  for (int j = 0; j < n; ++j)
+    for (int i = std::max(0, j - k); i <= j; ++i) a[(k + i - j) + j * lda] = T(std::rand()) / T(RAND_MAX) - T(0.5);
+}
+
+// Fill a packed symmetric matrix (upper triangle, column-major).
+template <typename T>
+static void fillSymPacked(T* ap, int n) {
+  int sz = n * (n + 1) / 2;
+  for (int i = 0; i < sz; ++i) ap[i] = T(std::rand()) / T(RAND_MAX) - T(0.5);
+}
+
+// Fill a triangular band matrix in BLAS band storage (upper, column-major).
+template <typename T>
+static void fillTriBandUpper(T* a, int n, int k, int lda) {
+  std::fill(a, a + lda * n, T(0));
+  for (int j = 0; j < n; ++j)
+    for (int i = std::max(0, j - k); i <= j; ++i) {
+      T val = T(std::rand()) / T(RAND_MAX) - T(0.5);
+      if (i == j) val += T(n);  // diagonal dominance
+      a[(k + i - j) + j * lda] = val;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Type-dispatched BLAS wrappers
+// ---------------------------------------------------------------------------
+
+inline float blas_dot(int* n, float* x, int* incx, float* y, int* incy) { return sdot_(n, x, incx, y, incy); }
+inline double blas_dot(int* n, double* x, int* incx, double* y, int* incy) { return ddot_(n, x, incx, y, incy); }
+
+inline void blas_axpy(int* n, float* a, float* x, int* incx, float* y, int* incy) { saxpy_(n, a, x, incx, y, incy); }
+inline void blas_axpy(int* n, double* a, double* x, int* incx, double* y, int* incy) { daxpy_(n, a, x, incx, y, incy); }
+
+inline float blas_nrm2(int* n, float* x, int* incx) { return snrm2_(n, x, incx); }
+inline double blas_nrm2(int* n, double* x, int* incx) { return dnrm2_(n, x, incx); }
+
+inline void blas_rotm(int* n, float* x, int* incx, float* y, int* incy, float* p) { srotm_(n, x, incx, y, incy, p); }
+inline void blas_rotm(int* n, double* x, int* incx, double* y, int* incy, double* p) { drotm_(n, x, incx, y, incy, p); }
+
+inline void blas_rotmg(float* d1, float* d2, float* x1, float* y1, float* p) { srotmg_(d1, d2, x1, y1, p); }
+inline void blas_rotmg(double* d1, double* d2, double* x1, double* y1, double* p) { drotmg_(d1, d2, x1, y1, p); }
+
+inline void blas_dotcw(int* n, float* cx, int* incx, float* cy, int* incy, float* res) {
+  cdotcw_(n, cx, incx, cy, incy, res);
+}
+inline void blas_dotcw(int* n, double* cx, int* incx, double* cy, int* incy, double* res) {
+  zdotcw_(n, cx, incx, cy, incy, res);
+}
+
+inline void blas_gemv(char* t, int* m, int* n, float* a, float* A, int* lda, float* x, int* incx, float* b, float* y,
+                      int* incy) {
+  sgemv_(t, m, n, a, A, lda, x, incx, b, y, incy);
+}
+inline void blas_gemv(char* t, int* m, int* n, double* a, double* A, int* lda, double* x, int* incx, double* b,
+                      double* y, int* incy) {
+  dgemv_(t, m, n, a, A, lda, x, incx, b, y, incy);
+}
+
+inline void blas_spmv(char* uplo, int* n, float* alpha, float* ap, float* x, int* incx, float* beta, float* y,
+                      int* incy) {
+  sspmv_(uplo, n, alpha, ap, x, incx, beta, y, incy);
+}
+inline void blas_spmv(char* uplo, int* n, double* alpha, double* ap, double* x, int* incx, double* beta, double* y,
+                      int* incy) {
+  dspmv_(uplo, n, alpha, ap, x, incx, beta, y, incy);
+}
+
+inline void blas_sbmv(char* uplo, int* n, int* k, float* alpha, float* a, int* lda, float* x, int* incx, float* beta,
+                      float* y, int* incy) {
+  ssbmv_(uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
+}
+inline void blas_sbmv(char* uplo, int* n, int* k, double* alpha, double* a, int* lda, double* x, int* incx,
+                      double* beta, double* y, int* incy) {
+  dsbmv_(uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
+}
+
+inline void blas_tbmv(char* uplo, char* trans, char* diag, int* n, int* k, float* a, int* lda, float* x, int* incx) {
+  stbmv_(uplo, trans, diag, n, k, a, lda, x, incx);
+}
+inline void blas_tbmv(char* uplo, char* trans, char* diag, int* n, int* k, double* a, int* lda, double* x, int* incx) {
+  dtbmv_(uplo, trans, diag, n, k, a, lda, x, incx);
+}
+
+inline void blas_hbmv(char* uplo, int* n, int* k, float* alpha, float* a, int* lda, float* x, int* incx, float* beta,
+                      float* y, int* incy) {
+  chbmv_(uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
+}
+inline void blas_hbmv(char* uplo, int* n, int* k, double* alpha, double* a, int* lda, double* x, int* incx,
+                      double* beta, double* y, int* incy) {
+  zhbmv_(uplo, n, k, alpha, a, lda, x, incx, beta, y, incy);
+}
+
+inline void blas_hpmv(char* uplo, int* n, float* alpha, float* ap, float* x, int* incx, float* beta, float* y,
+                      int* incy) {
+  chpmv_(uplo, n, alpha, ap, x, incx, beta, y, incy);
+}
+inline void blas_hpmv(char* uplo, int* n, double* alpha, double* ap, double* x, int* incx, double* beta, double* y,
+                      int* incy) {
+  zhpmv_(uplo, n, alpha, ap, x, incx, beta, y, incy);
+}
+
+inline void blas_gemm(char* ta, char* tb, int* m, int* n, int* k, float* alpha, float* a, int* lda, float* b, int* ldb,
+                      float* beta, float* c, int* ldc) {
+  sgemm_(ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+}
+inline void blas_gemm(char* ta, char* tb, int* m, int* n, int* k, double* alpha, double* a, int* lda, double* b,
+                      int* ldb, double* beta, double* c, int* ldc) {
+  dgemm_(ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
+}
+
+// =========================================================================
+//  Level 1 — Real
+// =========================================================================
+
+// ----- SDOT / DDOT -----
+template <typename T>
+static void BM_dot(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int one = 1;
+  std::vector<T> x(n), y(n);
+  fillRand(x.data(), n);
+  fillRand(y.data(), n);
+  for (auto _ : state) {
+    T r = blas_dot(&n, x.data(), &one, y.data(), &one);
+    benchmark::DoNotOptimize(r);
+  }
+  state.counters["GFLOPS"] = GflopsCounter(2.0 * n);
+}
+
+// ----- SAXPY / DAXPY -----
+template <typename T>
+static void BM_axpy(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int one = 1;
+  T alpha = T(2.5);
+  std::vector<T> x(n), y(n);
+  fillRand(x.data(), n);
+  fillRand(y.data(), n);
+  for (auto _ : state) {
+    blas_axpy(&n, &alpha, x.data(), &one, y.data(), &one);
+    benchmark::DoNotOptimize(y.data());
+  }
+  state.counters["GFLOPS"] = GflopsCounter(2.0 * n);
+}
+
+// ----- SNRM2 / DNRM2 -----
+template <typename T>
+static void BM_nrm2(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int one = 1;
+  std::vector<T> x(n);
+  fillRand(x.data(), n);
+  for (auto _ : state) {
+    T r = blas_nrm2(&n, x.data(), &one);
+    benchmark::DoNotOptimize(r);
+  }
+  // Nominal flops; Eigen's stableNorm() does more work internally.
+  state.counters["GFLOPS"] = GflopsCounter(2.0 * n - 1);
+}
+
+// ----- SROTM / DROTM -----
+template <typename T>
+static void BM_rotm(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int one = 1;
+  std::vector<T> x(n), y(n);
+  T param[5] = {T(-1), T(0.6), T(-0.8), T(0.8), T(0.6)};  // full rotation
+  fillRand(x.data(), n);
+  fillRand(y.data(), n);
+  for (auto _ : state) {
+    blas_rotm(&n, x.data(), &one, y.data(), &one, param);
+    benchmark::DoNotOptimize(x.data());
+    benchmark::DoNotOptimize(y.data());
+  }
+  // 4 muls + 2 adds per element pair.
+  state.counters["GFLOPS"] = GflopsCounter(6.0 * n);
+}
+
+// ----- SROTMG / DROTMG -----
+template <typename T>
+static void BM_rotmg(benchmark::State& state) {
+  T d1 = T(2), d2 = T(3), x1 = T(1), y1 = T(0.5);
+  T param[5];
+  for (auto _ : state) {
+    T td1 = d1, td2 = d2, tx1 = x1;
+    blas_rotmg(&td1, &td2, &tx1, &y1, param);
+    benchmark::DoNotOptimize(param);
+  }
+}
+
+// =========================================================================
+//  Level 1 — Complex
+// =========================================================================
+
+// Complex conjugate dot product via the worker functions (cdotcw_ / zdotcw_)
+// which use an output pointer, avoiding the ABI ambiguity of the struct-returning
+// cdotc_ / zdotc_ wrappers.
+template <typename T>
+static void BM_dotc(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int one = 1;
+  std::vector<T> x(2 * n), y(2 * n);  // interleaved real/imag
+  fillRand(x.data(), 2 * n);
+  fillRand(y.data(), 2 * n);
+  T res[2];
+  for (auto _ : state) {
+    blas_dotcw(&n, x.data(), &one, y.data(), &one, res);
+    benchmark::DoNotOptimize(res);
+  }
+  // Conjugate dot: 6 mul + 2 add per element = 8n flops.
+  state.counters["GFLOPS"] = GflopsCounter(8.0 * n);
+}
+
+// =========================================================================
+//  Level 2 — General Matrix-Vector (SGEMV / DGEMV)
+// =========================================================================
+
+template <typename T>
+static void BM_gemv(benchmark::State& state) {
+  int m = static_cast<int>(state.range(0));
+  int n = static_cast<int>(state.range(1));
+  int one = 1;
+  T alpha = T(1), beta = T(0);
+  char trans = 'N';
+  std::vector<T> a(m * n), x(n), y(m);
+  fillRand(a.data(), m * n);
+  fillRand(x.data(), n);
+  fillRand(y.data(), m);
+  for (auto _ : state) {
+    blas_gemv(&trans, &m, &n, &alpha, a.data(), &m, x.data(), &one, &beta, y.data(), &one);
+    benchmark::DoNotOptimize(y.data());
+  }
+  state.counters["GFLOPS"] = GflopsCounter(2.0 * m * n);
+}
+
+// =========================================================================
+//  Level 2 — Symmetric Packed (SSPMV / DSPMV)
+// =========================================================================
+
+template <typename T>
+static void BM_spmv(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int one = 1;
+  T alpha = T(1), beta = T(0);
+  char uplo = 'U';
+  int sz = n * (n + 1) / 2;
+  std::vector<T> ap(sz), x(n), y(n);
+  fillSymPacked(ap.data(), n);
+  fillRand(x.data(), n);
+  fillRand(y.data(), n);
+  for (auto _ : state) {
+    blas_spmv(&uplo, &n, &alpha, ap.data(), x.data(), &one, &beta, y.data(), &one);
+    benchmark::DoNotOptimize(y.data());
+  }
+  // Symmetric: each off-diag element contributes to two y entries.
+  state.counters["GFLOPS"] = GflopsCounter(2.0 * n * n);
+}
+
+// =========================================================================
+//  Level 2 — Symmetric Band (SSBMV / DSBMV)
+// =========================================================================
+
+template <typename T>
+static void BM_sbmv(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int k = static_cast<int>(state.range(1));
+  int lda = k + 1;
+  int one = 1;
+  T alpha = T(1), beta = T(0);
+  char uplo = 'U';
+  std::vector<T> a(lda * n), x(n), y(n);
+  fillSymBandUpper(a.data(), n, k, lda);
+  fillRand(x.data(), n);
+  fillRand(y.data(), n);
+  for (auto _ : state) {
+    blas_sbmv(&uplo, &n, &k, &alpha, a.data(), &lda, x.data(), &one, &beta, y.data(), &one);
+    benchmark::DoNotOptimize(y.data());
+  }
+  state.counters["GFLOPS"] = GflopsCounter(2.0 * n * (2 * k + 1));
+}
+
+// =========================================================================
+//  Level 2 — Triangular Band (STBMV / DTBMV)
+// =========================================================================
+
+template <typename T>
+static void BM_tbmv(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int k = static_cast<int>(state.range(1));
+  int lda = k + 1;
+  int one = 1;
+  char uplo = 'U', trans = 'N', diag = 'N';
+  std::vector<T> a(lda * n), x(n), x_orig(n);
+  fillTriBandUpper(a.data(), n, k, lda);
+  fillRand(x_orig.data(), n);
+  for (auto _ : state) {
+    state.PauseTiming();
+    std::copy(x_orig.begin(), x_orig.end(), x.begin());
+    state.ResumeTiming();
+    blas_tbmv(&uplo, &trans, &diag, &n, &k, a.data(), &lda, x.data(), &one);
+    benchmark::DoNotOptimize(x.data());
+  }
+  state.counters["GFLOPS"] = GflopsCounter(1.0 * n * (k + 1));
+}
+
+// =========================================================================
+//  Level 2 — Hermitian Band (CHBMV / ZHBMV)
+// =========================================================================
+
+template <typename T>
+static void BM_hbmv(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int k = static_cast<int>(state.range(1));
+  int lda = k + 1;
+  int one = 1;
+  char uplo = 'U';
+  // Complex: each element is 2 reals.
+  std::vector<T> a(2 * lda * n), x(2 * n), y(2 * n);
+  T alpha[2] = {T(1), T(0)};
+  T beta[2] = {T(0), T(0)};
+  fillRand(a.data(), 2 * lda * n);
+  // Make diagonal real (imag part = 0).
+  for (int j = 0; j < n; ++j) a[2 * (k + j * lda) + 1] = T(0);
+  fillRand(x.data(), 2 * n);
+  fillRand(y.data(), 2 * n);
+  for (auto _ : state) {
+    blas_hbmv(&uplo, &n, &k, alpha, a.data(), &lda, x.data(), &one, beta, y.data(), &one);
+    benchmark::DoNotOptimize(y.data());
+  }
+  // Complex hermitian band: 8*n*(2k+1) flops approximately.
+  state.counters["GFLOPS"] = GflopsCounter(8.0 * n * (2 * k + 1));
+}
+
+// =========================================================================
+//  Level 2 — Hermitian Packed (CHPMV / ZHPMV)
+// =========================================================================
+
+template <typename T>
+static void BM_hpmv(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  int one = 1;
+  char uplo = 'U';
+  int sz = n * (n + 1) / 2;
+  std::vector<T> ap(2 * sz), x(2 * n), y(2 * n);
+  T alpha[2] = {T(1), T(0)};
+  T beta[2] = {T(0), T(0)};
+  fillRand(ap.data(), 2 * sz);
+  // Make diagonal real.
+  int kk = 0;
+  for (int j = 0; j < n; ++j) {
+    ap[2 * (kk + j) + 1] = T(0);
+    kk += j + 1;
+  }
+  fillRand(x.data(), 2 * n);
+  fillRand(y.data(), 2 * n);
+  for (auto _ : state) {
+    blas_hpmv(&uplo, &n, alpha, ap.data(), x.data(), &one, beta, y.data(), &one);
+    benchmark::DoNotOptimize(y.data());
+  }
+  state.counters["GFLOPS"] = GflopsCounter(8.0 * n * n);
+}
+
+// =========================================================================
+//  Level 3 — General Matrix Multiply (SGEMM / DGEMM)
+// =========================================================================
+
+template <typename T>
+static void BM_gemm(benchmark::State& state) {
+  int n = static_cast<int>(state.range(0));
+  T alpha = T(1), beta = T(0);
+  char trans = 'N';
+  std::vector<T> a(n * n), b(n * n), c(n * n);
+  fillRand(a.data(), n * n);
+  fillRand(b.data(), n * n);
+  fillRand(c.data(), n * n);
+  for (auto _ : state) {
+    blas_gemm(&trans, &trans, &n, &n, &n, &alpha, a.data(), &n, b.data(), &n, &beta, c.data(), &n);
+    benchmark::DoNotOptimize(c.data());
+  }
+  state.counters["GFLOPS"] = GflopsCounter(2.0 * n * n * n);
+}
+
+// =========================================================================
+//  Register benchmarks
+// =========================================================================
+
+// clang-format off
+
+// --- Vector sizes for Level 1 ---
+#define L1_SIZES ->Arg(64)->Arg(256)->Arg(1024)->Arg(4096)->Arg(16384)->Arg(65536)
+
+BENCHMARK(BM_dot<float>)    L1_SIZES ->Name("sdot");
+BENCHMARK(BM_dot<double>)   L1_SIZES ->Name("ddot");
+BENCHMARK(BM_axpy<float>)   L1_SIZES ->Name("saxpy");
+BENCHMARK(BM_axpy<double>)  L1_SIZES ->Name("daxpy");
+BENCHMARK(BM_nrm2<float>)   L1_SIZES ->Name("snrm2");
+BENCHMARK(BM_nrm2<double>)  L1_SIZES ->Name("dnrm2");
+BENCHMARK(BM_rotm<float>)   L1_SIZES ->Name("srotm");
+BENCHMARK(BM_rotm<double>)  L1_SIZES ->Name("drotm");
+BENCHMARK(BM_rotmg<float>)  ->Name("srotmg");
+BENCHMARK(BM_rotmg<double>) ->Name("drotmg");
+BENCHMARK(BM_dotc<float>)   L1_SIZES ->Name("cdotc");
+BENCHMARK(BM_dotc<double>)  L1_SIZES ->Name("zdotc");
+
+#undef L1_SIZES
+
+// --- Matrix sizes for Level 2 ---
+// GEMV: {m, n}
+#define GEMV_SIZES \
+    ->Args({64, 64})->Args({256, 256})->Args({1024, 1024})->Args({4096, 4096}) \
+    ->Args({4096, 64})->Args({64, 4096})
+
+BENCHMARK(BM_gemv<float>)   GEMV_SIZES ->Name("sgemv");
+BENCHMARK(BM_gemv<double>)  GEMV_SIZES ->Name("dgemv");
+#undef GEMV_SIZES
+
+// Symmetric packed: {n}
+#define SPM_SIZES ->Arg(64)->Arg(256)->Arg(1024)->Arg(4096)
+
+BENCHMARK(BM_spmv<float>)  SPM_SIZES ->Name("sspmv");
+BENCHMARK(BM_spmv<double>) SPM_SIZES ->Name("dspmv");
+BENCHMARK(BM_hpmv<float>)  SPM_SIZES ->Name("chpmv");
+BENCHMARK(BM_hpmv<double>) SPM_SIZES ->Name("zhpmv");
+
+#undef SPM_SIZES
+
+// Band: {n, k}
+#define BAND_SIZES \
+    ->Args({256, 4})->Args({256, 32})->Args({1024, 4})->Args({1024, 32}) \
+    ->Args({4096, 4})->Args({4096, 32})->Args({4096, 128})
+
+BENCHMARK(BM_sbmv<float>)  BAND_SIZES ->Name("ssbmv");
+BENCHMARK(BM_sbmv<double>) BAND_SIZES ->Name("dsbmv");
+BENCHMARK(BM_tbmv<float>)  BAND_SIZES ->Name("stbmv");
+BENCHMARK(BM_tbmv<double>) BAND_SIZES ->Name("dtbmv");
+BENCHMARK(BM_hbmv<float>)  BAND_SIZES ->Name("chbmv");
+BENCHMARK(BM_hbmv<double>) BAND_SIZES ->Name("zhbmv");
+
+#undef BAND_SIZES
+
+// --- Square sizes for Level 3 ---
+#define GEMM_SIZES ->Arg(32)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024)
+
+BENCHMARK(BM_gemm<float>)  GEMM_SIZES ->Name("sgemm");
+BENCHMARK(BM_gemm<double>) GEMM_SIZES ->Name("dgemm");
+
+#undef GEMM_SIZES
+
+// clang-format on
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -20,7 +20,11 @@ function(eigen_add_benchmark name source)
  if(BENCH_LIBRARIES)
    target_link_libraries(${name} PRIVATE ${BENCH_LIBRARIES})
  endif()
-  target_compile_options(${name} PRIVATE -O3 -DNDEBUG)
+  target_compile_options(${name} PRIVATE
+    $<$<CXX_COMPILER_ID:MSVC>:/O2>
+    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-O3>
+  )
+  target_compile_definitions(${name} PRIVATE NDEBUG)
  if(BENCH_DEFINITIONS)
    target_compile_definitions(${name} PRIVATE ${BENCH_DEFINITIONS})
  endif()
@@ -38,3 +42,4 @@ add_subdirectory(FFT)
 add_subdirectory(Householder)
 add_subdirectory(Solvers)
 add_subdirectory(Tuning)
+add_subdirectory(BLAS)
--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt
@@ -6,11 +6,7 @@ if(EIGEN_BUILD_BLAS)
 add_custom_target(blas)

 set(EigenBlas_SRCS  single.cpp double.cpp complex_single.cpp complex_double.cpp xerbla.cpp
-                    f2c/srotm.c   f2c/srotmg.c  f2c/drotm.c f2c/drotmg.c
-                    f2c/lsame.c   f2c/dspmv.c   f2c/ssbmv.c f2c/chbmv.c
-                    f2c/sspmv.c   f2c/zhbmv.c   f2c/chpmv.c f2c/dsbmv.c
-                    f2c/zhpmv.c   f2c/dtbmv.c   f2c/stbmv.c f2c/ctbmv.c
-                    f2c/ztbmv.c   f2c/complexdots.c
+                    lsame.cpp complexdots.cpp
   )

 set(EIGEN_BLAS_TARGETS "")
--- a/blas/complexdots.cpp
+++ b/blas/complexdots.cpp
@@ -0,0 +1,72 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// C++ replacements for the f2c complex dot product wrappers.
+// These are thin wrappers around the worker functions (cdotcw_, etc.)
+// defined in level1_cplx_impl.h.
+//
+// Note: blas.h declares these as void, but gfortran expects complex functions
+// to return by value. We define the correct signatures here and do not include
+// blas.h to avoid the conflicting declarations.
+
+#if defined(_WIN32)
+#if defined(EIGEN_BLAS_BUILD_DLL)
+#define EIGEN_BLAS_CDOT_API __declspec(dllexport)
+#else
+#define EIGEN_BLAS_CDOT_API
+#endif
+#elif ((defined(__GNUC__) && __GNUC__ >= 4) || defined(__clang__)) && defined(EIGEN_BLAS_BUILD_DLL)
+#define EIGEN_BLAS_CDOT_API __attribute__((visibility("default")))
+#else
+#define EIGEN_BLAS_CDOT_API
+#endif
+
+extern "C" {
+
+// Worker function declarations (defined in level1_cplx_impl.h via complex_single.cpp / complex_double.cpp).
+void cdotcw_(int *n, float *cx, int *incx, float *cy, int *incy, float *res);
+void cdotuw_(int *n, float *cx, int *incx, float *cy, int *incy, float *res);
+void zdotcw_(int *n, double *cx, int *incx, double *cy, int *incy, double *res);
+void zdotuw_(int *n, double *cx, int *incx, double *cy, int *incy, double *res);
+
+// POD complex types for C-compatible return values (matches Fortran complex layout).
+struct eigen_blas_complex_float {
+  float r, i;
+};
+struct eigen_blas_complex_double {
+  double r, i;
+};
+
+// CDOTC computes the conjugated dot product of two single-precision complex vectors.
+EIGEN_BLAS_CDOT_API eigen_blas_complex_float cdotc_(int *n, float *cx, int *incx, float *cy, int *incy) {
+  eigen_blas_complex_float res = {0.0f, 0.0f};
+  cdotcw_(n, cx, incx, cy, incy, &res.r);
+  return res;
+}
+
+// CDOTU computes the unconjugated dot product of two single-precision complex vectors.
+EIGEN_BLAS_CDOT_API eigen_blas_complex_float cdotu_(int *n, float *cx, int *incx, float *cy, int *incy) {
+  eigen_blas_complex_float res = {0.0f, 0.0f};
+  cdotuw_(n, cx, incx, cy, incy, &res.r);
+  return res;
+}
+
+// ZDOTC computes the conjugated dot product of two double-precision complex vectors.
+EIGEN_BLAS_CDOT_API eigen_blas_complex_double zdotc_(int *n, double *cx, int *incx, double *cy, int *incy) {
+  eigen_blas_complex_double res = {0.0, 0.0};
+  zdotcw_(n, cx, incx, cy, incy, &res.r);
+  return res;
+}
+
+// ZDOTU computes the unconjugated dot product of two double-precision complex vectors.
+EIGEN_BLAS_CDOT_API eigen_blas_complex_double zdotu_(int *n, double *cx, int *incx, double *cy, int *incy) {
+  eigen_blas_complex_double res = {0.0, 0.0};
+  zdotuw_(n, cx, incx, cy, incy, &res.r);
+  return res;
+}
+
+}  // extern "C"
--- a/blas/f2c/chbmv.c
+++ b/blas/f2c/chbmv.c
@@ -1,456 +0,0 @@
-/* chbmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-static inline void r_cnjg(complex *r, complex *z) {
-  r->r = z->r;
-  r->i = -(z->i);
-}
-
-/* Subroutine */ void chbmv_(char *uplo, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *x,
-                             integer *incx, complex *beta, complex *y, integer *incy) {
-  /* System generated locals */
-  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-  real r__1;
-  complex q__1, q__2, q__3, q__4;
-
-  /* Local variables */
-  integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-  complex temp1, temp2;
-  extern logical lsame_(char *, char *);
-  integer kplus1;
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  CHBMV  performs the matrix-vector  operation */
-
-  /*     y := alpha*A*x + beta*y, */
-
-  /*  where alpha and beta are scalars, x and y are n element vectors and */
-  /*  A is an n by n hermitian band matrix, with k super-diagonals. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the upper or lower */
-  /*           triangular part of the band matrix A is being supplied as */
-  /*           follows: */
-
-  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-  /*                                  being supplied. */
-
-  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-  /*                                  being supplied. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  K      - INTEGER. */
-  /*           On entry, K specifies the number of super-diagonals of the */
-  /*           matrix A. K must satisfy  0 .le. K. */
-  /*           Unchanged on exit. */
-
-  /*  ALPHA  - COMPLEX         . */
-  /*           On entry, ALPHA specifies the scalar alpha. */
-  /*           Unchanged on exit. */
-
-  /*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the upper triangular */
-  /*           band part of the hermitian matrix, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row */
-  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
-  /*           position 2 in row k, and so on. The top left k by k triangle */
-  /*           of the array A is not referenced. */
-  /*           The following program segment will transfer the upper */
-  /*           triangular part of a hermitian band matrix from conventional */
-  /*           full matrix storage to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = K + 1 - J */
-  /*                    DO 10, I = MAX( 1, J - K ), J */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the lower triangular */
-  /*           band part of the hermitian matrix, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row 1 of */
-  /*           the array, the first sub-diagonal starting at position 1 in */
-  /*           row 2, and so on. The bottom right k by k triangle of the */
-  /*           array A is not referenced. */
-  /*           The following program segment will transfer the lower */
-  /*           triangular part of a hermitian band matrix from conventional */
-  /*           full matrix storage to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = 1 - J */
-  /*                    DO 10, I = J, MIN( N, J + K ) */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Note that the imaginary parts of the diagonal elements need */
-  /*           not be set and are assumed to be zero. */
-  /*           Unchanged on exit. */
-
-  /*  LDA    - INTEGER. */
-  /*           On entry, LDA specifies the first dimension of A as declared */
-  /*           in the calling (sub) program. LDA must be at least */
-  /*           ( k + 1 ). */
-  /*           Unchanged on exit. */
-
-  /*  X      - COMPLEX          array of DIMENSION at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the */
-  /*           vector x. */
-  /*           Unchanged on exit. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  BETA   - COMPLEX         . */
-  /*           On entry, BETA specifies the scalar beta. */
-  /*           Unchanged on exit. */
-
-  /*  Y      - COMPLEX          array of DIMENSION at least */
-  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-  /*           Before entry, the incremented array Y must contain the */
-  /*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-  /*  INCY   - INTEGER. */
-  /*           On entry, INCY specifies the increment for the elements of */
-  /*           Y. INCY must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  a_dim1 = *lda;
-  a_offset = 1 + a_dim1;
-  a -= a_offset;
-  --x;
-  --y;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (*n < 0) {
-    info = 2;
-  } else if (*k < 0) {
-    info = 3;
-  } else if (*lda < *k + 1) {
-    info = 6;
-  } else if (*incx == 0) {
-    info = 8;
-  } else if (*incy == 0) {
-    info = 11;
-  }
-  if (info != 0) {
-    xerbla_("CHBMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f))) {
-    return;
-  }
-
-  /*     Set up the start points in  X  and  Y. */
-
-  if (*incx > 0) {
-    kx = 1;
-  } else {
-    kx = 1 - (*n - 1) * *incx;
-  }
-  if (*incy > 0) {
-    ky = 1;
-  } else {
-    ky = 1 - (*n - 1) * *incy;
-  }
-
-  /*     Start the operations. In this version the elements of the array A */
-  /*     are accessed sequentially with one pass through A. */
-
-  /*     First form  y := beta*y. */
-
-  if (beta->r != 1.f || beta->i != 0.f) {
-    if (*incy == 1) {
-      if (beta->r == 0.f && beta->i == 0.f) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = i__;
-          y[i__2].r = 0.f, y[i__2].i = 0.f;
-          /* L10: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = i__;
-          i__3 = i__;
-          q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
-          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-          /* L20: */
-        }
-      }
-    } else {
-      iy = ky;
-      if (beta->r == 0.f && beta->i == 0.f) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = iy;
-          y[i__2].r = 0.f, y[i__2].i = 0.f;
-          iy += *incy;
-          /* L30: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = iy;
-          i__3 = iy;
-          q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
-          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-          iy += *incy;
-          /* L40: */
-        }
-      }
-    }
-  }
-  if (alpha->r == 0.f && alpha->i == 0.f) {
-    return;
-  }
-  if (lsame_(uplo, "U")) {
-    /*        Form  y  when upper triangle of A is stored. */
-
-    kplus1 = *k + 1;
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = j;
-        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = q__1.r, temp1.i = q__1.i;
-        temp2.r = 0.f, temp2.i = 0.f;
-        l = kplus1 - j;
-        /* Computing MAX */
-        i__2 = 1, i__3 = j - *k;
-        i__4 = j - 1;
-        for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
-          i__2 = i__;
-          i__3 = i__;
-          i__5 = l + i__ + j * a_dim1;
-          q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
-          q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-          r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-          i__2 = i__;
-          q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i = q__3.r * x[i__2].i + q__3.i * x[i__2].r;
-          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-          temp2.r = q__1.r, temp2.i = q__1.i;
-          /* L50: */
-        }
-        i__4 = j;
-        i__2 = j;
-        i__3 = kplus1 + j * a_dim1;
-        r__1 = a[i__3].r;
-        q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-        q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i;
-        q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-        y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-        /* L60: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__4 = jx;
-        q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4].r;
-        temp1.r = q__1.r, temp1.i = q__1.i;
-        temp2.r = 0.f, temp2.i = 0.f;
-        ix = kx;
-        iy = ky;
-        l = kplus1 - j;
-        /* Computing MAX */
-        i__4 = 1, i__2 = j - *k;
-        i__3 = j - 1;
-        for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
-          i__4 = iy;
-          i__2 = iy;
-          i__5 = l + i__ + j * a_dim1;
-          q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
-          q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-          y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-          r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-          i__4 = ix;
-          q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-          temp2.r = q__1.r, temp2.i = q__1.i;
-          ix += *incx;
-          iy += *incy;
-          /* L70: */
-        }
-        i__3 = jy;
-        i__4 = jy;
-        i__2 = kplus1 + j * a_dim1;
-        r__1 = a[i__2].r;
-        q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-        q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i;
-        q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-        jx += *incx;
-        jy += *incy;
-        if (j > *k) {
-          kx += *incx;
-          ky += *incy;
-        }
-        /* L80: */
-      }
-    }
-  } else {
-    /*        Form  y  when lower triangle of A is stored. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__3 = j;
-        q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-        temp1.r = q__1.r, temp1.i = q__1.i;
-        temp2.r = 0.f, temp2.i = 0.f;
-        i__3 = j;
-        i__4 = j;
-        i__2 = j * a_dim1 + 1;
-        r__1 = a[i__2].r;
-        q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-        q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-        l = 1 - j;
-        /* Computing MIN */
-        i__4 = *n, i__2 = j + *k;
-        i__3 = min(i__4, i__2);
-        for (i__ = j + 1; i__ <= i__3; ++i__) {
-          i__4 = i__;
-          i__2 = i__;
-          i__5 = l + i__ + j * a_dim1;
-          q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
-          q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-          y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-          r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-          i__4 = i__;
-          q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-          temp2.r = q__1.r, temp2.i = q__1.i;
-          /* L90: */
-        }
-        i__3 = j;
-        i__4 = j;
-        q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-        /* L100: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__3 = jx;
-        q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-        temp1.r = q__1.r, temp1.i = q__1.i;
-        temp2.r = 0.f, temp2.i = 0.f;
-        i__3 = jy;
-        i__4 = jy;
-        i__2 = j * a_dim1 + 1;
-        r__1 = a[i__2].r;
-        q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-        q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-        l = 1 - j;
-        ix = jx;
-        iy = jy;
-        /* Computing MIN */
-        i__4 = *n, i__2 = j + *k;
-        i__3 = min(i__4, i__2);
-        for (i__ = j + 1; i__ <= i__3; ++i__) {
-          ix += *incx;
-          iy += *incy;
-          i__4 = iy;
-          i__2 = iy;
-          i__5 = l + i__ + j * a_dim1;
-          q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
-          q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-          y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-          r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-          i__4 = ix;
-          q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-          temp2.r = q__1.r, temp2.i = q__1.i;
-          /* L110: */
-        }
-        i__3 = jy;
-        i__4 = jy;
-        q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-        y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-        jx += *incx;
-        jy += *incy;
-        /* L120: */
-      }
-    }
-  }
-
-  /*     End of CHBMV . */
-
-} /* chbmv_ */
--- a/blas/f2c/chpmv.c
+++ b/blas/f2c/chpmv.c
@@ -1,407 +0,0 @@
-/* chpmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-static inline void r_cnjg(complex *r, complex *z) {
-  r->r = z->r;
-  r->i = -(z->i);
-}
-
-/* Subroutine */ void chpmv_(char *uplo, integer *n, complex *alpha, complex *ap, complex *x, integer *incx,
-                             complex *beta, complex *y, integer *incy) {
-  /* System generated locals */
-  integer i__1, i__2, i__3, i__4, i__5;
-  real r__1;
-  complex q__1, q__2, q__3, q__4;
-
-  /* Local variables */
-  integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-  complex temp1, temp2;
-  extern logical lsame_(char *, char *);
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  CHPMV  performs the matrix-vector operation */
-
-  /*     y := alpha*A*x + beta*y, */
-
-  /*  where alpha and beta are scalars, x and y are n element vectors and */
-  /*  A is an n by n hermitian matrix, supplied in packed form. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the upper or lower */
-  /*           triangular part of the matrix A is supplied in the packed */
-  /*           array AP as follows: */
-
-  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-  /*                                  supplied in AP. */
-
-  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-  /*                                  supplied in AP. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  ALPHA  - COMPLEX         . */
-  /*           On entry, ALPHA specifies the scalar alpha. */
-  /*           Unchanged on exit. */
-
-  /*  AP     - COMPLEX          array of DIMENSION at least */
-  /*           ( ( n*( n + 1 ) )/2 ). */
-  /*           Before entry with UPLO = 'U' or 'u', the array AP must */
-  /*           contain the upper triangular part of the hermitian matrix */
-  /*           packed sequentially, column by column, so that AP( 1 ) */
-  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-  /*           and a( 2, 2 ) respectively, and so on. */
-  /*           Before entry with UPLO = 'L' or 'l', the array AP must */
-  /*           contain the lower triangular part of the hermitian matrix */
-  /*           packed sequentially, column by column, so that AP( 1 ) */
-  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-  /*           and a( 3, 1 ) respectively, and so on. */
-  /*           Note that the imaginary parts of the diagonal elements need */
-  /*           not be set and are assumed to be zero. */
-  /*           Unchanged on exit. */
-
-  /*  X      - COMPLEX          array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the n */
-  /*           element vector x. */
-  /*           Unchanged on exit. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  BETA   - COMPLEX         . */
-  /*           On entry, BETA specifies the scalar beta. When BETA is */
-  /*           supplied as zero then Y need not be set on input. */
-  /*           Unchanged on exit. */
-
-  /*  Y      - COMPLEX          array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-  /*           Before entry, the incremented array Y must contain the n */
-  /*           element vector y. On exit, Y is overwritten by the updated */
-  /*           vector y. */
-
-  /*  INCY   - INTEGER. */
-  /*           On entry, INCY specifies the increment for the elements of */
-  /*           Y. INCY must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  --y;
-  --x;
-  --ap;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (*n < 0) {
-    info = 2;
-  } else if (*incx == 0) {
-    info = 6;
-  } else if (*incy == 0) {
-    info = 9;
-  }
-  if (info != 0) {
-    xerbla_("CHPMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f))) {
-    return;
-  }
-
-  /*     Set up the start points in  X  and  Y. */
-
-  if (*incx > 0) {
-    kx = 1;
-  } else {
-    kx = 1 - (*n - 1) * *incx;
-  }
-  if (*incy > 0) {
-    ky = 1;
-  } else {
-    ky = 1 - (*n - 1) * *incy;
-  }
-
-  /*     Start the operations. In this version the elements of the array AP */
-  /*     are accessed sequentially with one pass through AP. */
-
-  /*     First form  y := beta*y. */
-
-  if (beta->r != 1.f || beta->i != 0.f) {
-    if (*incy == 1) {
-      if (beta->r == 0.f && beta->i == 0.f) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = i__;
-          y[i__2].r = 0.f, y[i__2].i = 0.f;
-          /* L10: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = i__;
-          i__3 = i__;
-          q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
-          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-          /* L20: */
-        }
-      }
-    } else {
-      iy = ky;
-      if (beta->r == 0.f && beta->i == 0.f) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = iy;
-          y[i__2].r = 0.f, y[i__2].i = 0.f;
-          iy += *incy;
-          /* L30: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = iy;
-          i__3 = iy;
-          q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
-          y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-          iy += *incy;
-          /* L40: */
-        }
-      }
-    }
-  }
-  if (alpha->r == 0.f && alpha->i == 0.f) {
-    return;
-  }
-  kk = 1;
-  if (lsame_(uplo, "U")) {
-    /*        Form  y  when AP contains the upper triangle. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = j;
-        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = q__1.r, temp1.i = q__1.i;
-        temp2.r = 0.f, temp2.i = 0.f;
-        k = kk;
-        i__2 = j - 1;
-        for (i__ = 1; i__ <= i__2; ++i__) {
-          i__3 = i__;
-          i__4 = i__;
-          i__5 = k;
-          q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
-          q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-          y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-          r_cnjg(&q__3, &ap[k]);
-          i__3 = i__;
-          q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-          temp2.r = q__1.r, temp2.i = q__1.i;
-          ++k;
-          /* L50: */
-        }
-        i__2 = j;
-        i__3 = j;
-        i__4 = kk + j - 1;
-        r__1 = ap[i__4].r;
-        q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-        q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-        q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-        kk += j;
-        /* L60: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = jx;
-        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = q__1.r, temp1.i = q__1.i;
-        temp2.r = 0.f, temp2.i = 0.f;
-        ix = kx;
-        iy = ky;
-        i__2 = kk + j - 2;
-        for (k = kk; k <= i__2; ++k) {
-          i__3 = iy;
-          i__4 = iy;
-          i__5 = k;
-          q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
-          q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-          y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-          r_cnjg(&q__3, &ap[k]);
-          i__3 = ix;
-          q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-          temp2.r = q__1.r, temp2.i = q__1.i;
-          ix += *incx;
-          iy += *incy;
-          /* L70: */
-        }
-        i__2 = jy;
-        i__3 = jy;
-        i__4 = kk + j - 1;
-        r__1 = ap[i__4].r;
-        q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-        q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-        q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-        jx += *incx;
-        jy += *incy;
-        kk += j;
-        /* L80: */
-      }
-    }
-  } else {
-    /*        Form  y  when AP contains the lower triangle. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = j;
-        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = q__1.r, temp1.i = q__1.i;
-        temp2.r = 0.f, temp2.i = 0.f;
-        i__2 = j;
-        i__3 = j;
-        i__4 = kk;
-        r__1 = ap[i__4].r;
-        q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-        q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-        k = kk + 1;
-        i__2 = *n;
-        for (i__ = j + 1; i__ <= i__2; ++i__) {
-          i__3 = i__;
-          i__4 = i__;
-          i__5 = k;
-          q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
-          q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-          y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-          r_cnjg(&q__3, &ap[k]);
-          i__3 = i__;
-          q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-          temp2.r = q__1.r, temp2.i = q__1.i;
-          ++k;
-          /* L90: */
-        }
-        i__2 = j;
-        i__3 = j;
-        q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-        kk += *n - j + 1;
-        /* L100: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = jx;
-        q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = q__1.r, temp1.i = q__1.i;
-        temp2.r = 0.f, temp2.i = 0.f;
-        i__2 = jy;
-        i__3 = jy;
-        i__4 = kk;
-        r__1 = ap[i__4].r;
-        q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-        q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-        ix = jx;
-        iy = jy;
-        i__2 = kk + *n - j;
-        for (k = kk + 1; k <= i__2; ++k) {
-          ix += *incx;
-          iy += *incy;
-          i__3 = iy;
-          i__4 = iy;
-          i__5 = k;
-          q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
-          q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-          y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-          r_cnjg(&q__3, &ap[k]);
-          i__3 = ix;
-          q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-          q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-          temp2.r = q__1.r, temp2.i = q__1.i;
-          /* L110: */
-        }
-        i__2 = jy;
-        i__3 = jy;
-        q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-        y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-        jx += *incx;
-        jy += *incy;
-        kk += *n - j + 1;
-        /* L120: */
-      }
-    }
-  }
-
-  /*     End of CHPMV . */
-
-} /* chpmv_ */
--- a/blas/f2c/complexdots.c
+++ b/blas/f2c/complexdots.c
@@ -1,73 +0,0 @@
-/* This file has been modified to use the standard gfortran calling
-   convention, rather than the f2c calling convention.
-
-   It does not require -ff2c when compiled with gfortran.
-*/
-
-/* complexdots.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-complex cdotc_(integer *n, complex *cx, integer *incx, complex *cy, integer *incy) {
-  complex res;
-  extern /* Subroutine */ void cdotcw_(integer *, complex *, integer *, complex *, integer *, complex *);
-
-  /* Parameter adjustments */
-  --cy;
-  --cx;
-
-  /* Function Body */
-  cdotcw_(n, &cx[1], incx, &cy[1], incy, &res);
-  return res;
-} /* cdotc_ */
-
-complex cdotu_(integer *n, complex *cx, integer *incx, complex *cy, integer *incy) {
-  complex res;
-  extern /* Subroutine */ void cdotuw_(integer *, complex *, integer *, complex *, integer *, complex *);
-
-  /* Parameter adjustments */
-  --cy;
-  --cx;
-
-  /* Function Body */
-  cdotuw_(n, &cx[1], incx, &cy[1], incy, &res);
-  return res;
-} /* cdotu_ */
-
-doublecomplex zdotc_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy) {
-  doublecomplex res;
-  extern /* Subroutine */ void zdotcw_(integer *, doublecomplex *, integer *, doublecomplex *, integer *,
-                                       doublecomplex *);
-
-  /* Parameter adjustments */
-  --cy;
-  --cx;
-
-  /* Function Body */
-  zdotcw_(n, &cx[1], incx, &cy[1], incy, &res);
-  return res;
-} /* zdotc_ */
-
-doublecomplex zdotu_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy) {
-  doublecomplex res;
-  extern /* Subroutine */ void zdotuw_(integer *, doublecomplex *, integer *, doublecomplex *, integer *,
-                                       doublecomplex *);
-
-  /* Parameter adjustments */
-  --cy;
-  --cx;
-
-  /* Function Body */
-  zdotuw_(n, &cx[1], incx, &cy[1], incy, &res);
-  return res;
-} /* zdotu_ */
--- a/blas/f2c/ctbmv.c
+++ b/blas/f2c/ctbmv.c
@@ -1,586 +0,0 @@
-/* ctbmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-static inline void r_cnjg(complex *r, complex *z) {
-  r->r = z->r;
-  r->i = -(z->i);
-}
-
-/* Subroutine */ void ctbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, complex *a, integer *lda,
-                             complex *x, integer *incx) {
-  /* System generated locals */
-  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-  complex q__1, q__2, q__3;
-
-  /* Local variables */
-  integer i__, j, l, ix, jx, kx, info;
-  complex temp;
-  extern logical lsame_(char *, char *);
-  integer kplus1;
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-  logical noconj, nounit;
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  CTBMV  performs one of the matrix-vector operations */
-
-  /*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-
-  /*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-  /*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the matrix is an upper or */
-  /*           lower triangular matrix as follows: */
-
-  /*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-  /*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-  /*           Unchanged on exit. */
-
-  /*  TRANS  - CHARACTER*1. */
-  /*           On entry, TRANS specifies the operation to be performed as */
-  /*           follows: */
-
-  /*              TRANS = 'N' or 'n'   x := A*x. */
-
-  /*              TRANS = 'T' or 't'   x := A'*x. */
-
-  /*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-
-  /*           Unchanged on exit. */
-
-  /*  DIAG   - CHARACTER*1. */
-  /*           On entry, DIAG specifies whether or not A is unit */
-  /*           triangular as follows: */
-
-  /*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-  /*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-  /*                                  triangular. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  K      - INTEGER. */
-  /*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-  /*           super-diagonals of the matrix A. */
-  /*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-  /*           sub-diagonals of the matrix A. */
-  /*           K must satisfy  0 .le. K. */
-  /*           Unchanged on exit. */
-
-  /*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the upper triangular */
-  /*           band part of the matrix of coefficients, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row */
-  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
-  /*           position 2 in row k, and so on. The top left k by k triangle */
-  /*           of the array A is not referenced. */
-  /*           The following program segment will transfer an upper */
-  /*           triangular band matrix from conventional full matrix storage */
-  /*           to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = K + 1 - J */
-  /*                    DO 10, I = MAX( 1, J - K ), J */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the lower triangular */
-  /*           band part of the matrix of coefficients, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row 1 of */
-  /*           the array, the first sub-diagonal starting at position 1 in */
-  /*           row 2, and so on. The bottom right k by k triangle of the */
-  /*           array A is not referenced. */
-  /*           The following program segment will transfer a lower */
-  /*           triangular band matrix from conventional full matrix storage */
-  /*           to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = 1 - J */
-  /*                    DO 10, I = J, MIN( N, J + K ) */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-  /*           corresponding to the diagonal elements of the matrix are not */
-  /*           referenced, but are assumed to be unity. */
-  /*           Unchanged on exit. */
-
-  /*  LDA    - INTEGER. */
-  /*           On entry, LDA specifies the first dimension of A as declared */
-  /*           in the calling (sub) program. LDA must be at least */
-  /*           ( k + 1 ). */
-  /*           Unchanged on exit. */
-
-  /*  X      - COMPLEX          array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the n */
-  /*           element vector x. On exit, X is overwritten with the */
-  /*           transformed vector x. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  a_dim1 = *lda;
-  a_offset = 1 + a_dim1;
-  a -= a_offset;
-  --x;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) {
-    info = 2;
-  } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) {
-    info = 3;
-  } else if (*n < 0) {
-    info = 4;
-  } else if (*k < 0) {
-    info = 5;
-  } else if (*lda < *k + 1) {
-    info = 7;
-  } else if (*incx == 0) {
-    info = 9;
-  }
-  if (info != 0) {
-    xerbla_("CTBMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0) {
-    return;
-  }
-
-  noconj = lsame_(trans, "T");
-  nounit = lsame_(diag, "N");
-
-  /*     Set up the start point in X if the increment is not unity. This */
-  /*     will be  ( N - 1 )*INCX   too small for descending loops. */
-
-  if (*incx <= 0) {
-    kx = 1 - (*n - 1) * *incx;
-  } else if (*incx != 1) {
-    kx = 1;
-  }
-
-  /*     Start the operations. In this version the elements of A are */
-  /*     accessed sequentially with one pass through A. */
-
-  if (lsame_(trans, "N")) {
-    /*         Form  x := A*x. */
-
-    if (lsame_(uplo, "U")) {
-      kplus1 = *k + 1;
-      if (*incx == 1) {
-        i__1 = *n;
-        for (j = 1; j <= i__1; ++j) {
-          i__2 = j;
-          if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-            i__2 = j;
-            temp.r = x[i__2].r, temp.i = x[i__2].i;
-            l = kplus1 - j;
-            /* Computing MAX */
-            i__2 = 1, i__3 = j - *k;
-            i__4 = j - 1;
-            for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
-              i__2 = i__;
-              i__3 = i__;
-              i__5 = l + i__ + j * a_dim1;
-              q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, q__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r;
-              q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + q__2.i;
-              x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-              /* L10: */
-            }
-            if (nounit) {
-              i__4 = j;
-              i__2 = j;
-              i__3 = kplus1 + j * a_dim1;
-              q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[i__3].i,
-              q__1.i = x[i__2].r * a[i__3].i + x[i__2].i * a[i__3].r;
-              x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-            }
-          }
-          /* L20: */
-        }
-      } else {
-        jx = kx;
-        i__1 = *n;
-        for (j = 1; j <= i__1; ++j) {
-          i__4 = jx;
-          if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-            i__4 = jx;
-            temp.r = x[i__4].r, temp.i = x[i__4].i;
-            ix = kx;
-            l = kplus1 - j;
-            /* Computing MAX */
-            i__4 = 1, i__2 = j - *k;
-            i__3 = j - 1;
-            for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
-              i__4 = ix;
-              i__2 = ix;
-              i__5 = l + i__ + j * a_dim1;
-              q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, q__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r;
-              q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + q__2.i;
-              x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-              ix += *incx;
-              /* L30: */
-            }
-            if (nounit) {
-              i__3 = jx;
-              i__4 = jx;
-              i__2 = kplus1 + j * a_dim1;
-              q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[i__2].i,
-              q__1.i = x[i__4].r * a[i__2].i + x[i__4].i * a[i__2].r;
-              x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-            }
-          }
-          jx += *incx;
-          if (j > *k) {
-            kx += *incx;
-          }
-          /* L40: */
-        }
-      }
-    } else {
-      if (*incx == 1) {
-        for (j = *n; j >= 1; --j) {
-          i__1 = j;
-          if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-            i__1 = j;
-            temp.r = x[i__1].r, temp.i = x[i__1].i;
-            l = 1 - j;
-            /* Computing MIN */
-            i__1 = *n, i__3 = j + *k;
-            i__4 = j + 1;
-            for (i__ = min(i__1, i__3); i__ >= i__4; --i__) {
-              i__1 = i__;
-              i__3 = i__;
-              i__2 = l + i__ + j * a_dim1;
-              q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, q__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r;
-              q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + q__2.i;
-              x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-              /* L50: */
-            }
-            if (nounit) {
-              i__4 = j;
-              i__1 = j;
-              i__3 = j * a_dim1 + 1;
-              q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[i__3].i,
-              q__1.i = x[i__1].r * a[i__3].i + x[i__1].i * a[i__3].r;
-              x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-            }
-          }
-          /* L60: */
-        }
-      } else {
-        kx += (*n - 1) * *incx;
-        jx = kx;
-        for (j = *n; j >= 1; --j) {
-          i__4 = jx;
-          if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-            i__4 = jx;
-            temp.r = x[i__4].r, temp.i = x[i__4].i;
-            ix = kx;
-            l = 1 - j;
-            /* Computing MIN */
-            i__4 = *n, i__1 = j + *k;
-            i__3 = j + 1;
-            for (i__ = min(i__4, i__1); i__ >= i__3; --i__) {
-              i__4 = ix;
-              i__1 = ix;
-              i__2 = l + i__ + j * a_dim1;
-              q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, q__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r;
-              q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + q__2.i;
-              x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-              ix -= *incx;
-              /* L70: */
-            }
-            if (nounit) {
-              i__3 = jx;
-              i__4 = jx;
-              i__1 = j * a_dim1 + 1;
-              q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[i__1].i,
-              q__1.i = x[i__4].r * a[i__1].i + x[i__4].i * a[i__1].r;
-              x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-            }
-          }
-          jx -= *incx;
-          if (*n - j >= *k) {
-            kx -= *incx;
-          }
-          /* L80: */
-        }
-      }
-    }
-  } else {
-    /*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-    if (lsame_(uplo, "U")) {
-      kplus1 = *k + 1;
-      if (*incx == 1) {
-        for (j = *n; j >= 1; --j) {
-          i__3 = j;
-          temp.r = x[i__3].r, temp.i = x[i__3].i;
-          l = kplus1 - j;
-          if (noconj) {
-            if (nounit) {
-              i__3 = kplus1 + j * a_dim1;
-              q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, q__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r;
-              temp.r = q__1.r, temp.i = q__1.i;
-            }
-            /* Computing MAX */
-            i__4 = 1, i__1 = j - *k;
-            i__3 = max(i__4, i__1);
-            for (i__ = j - 1; i__ >= i__3; --i__) {
-              i__4 = l + i__ + j * a_dim1;
-              i__1 = i__;
-              q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i,
-              q__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r;
-              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-              temp.r = q__1.r, temp.i = q__1.i;
-              /* L90: */
-            }
-          } else {
-            if (nounit) {
-              r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-              q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r;
-              temp.r = q__1.r, temp.i = q__1.i;
-            }
-            /* Computing MAX */
-            i__4 = 1, i__1 = j - *k;
-            i__3 = max(i__4, i__1);
-            for (i__ = j - 1; i__ >= i__3; --i__) {
-              r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-              i__4 = i__;
-              q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-              temp.r = q__1.r, temp.i = q__1.i;
-              /* L100: */
-            }
-          }
-          i__3 = j;
-          x[i__3].r = temp.r, x[i__3].i = temp.i;
-          /* L110: */
-        }
-      } else {
-        kx += (*n - 1) * *incx;
-        jx = kx;
-        for (j = *n; j >= 1; --j) {
-          i__3 = jx;
-          temp.r = x[i__3].r, temp.i = x[i__3].i;
-          kx -= *incx;
-          ix = kx;
-          l = kplus1 - j;
-          if (noconj) {
-            if (nounit) {
-              i__3 = kplus1 + j * a_dim1;
-              q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, q__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r;
-              temp.r = q__1.r, temp.i = q__1.i;
-            }
-            /* Computing MAX */
-            i__4 = 1, i__1 = j - *k;
-            i__3 = max(i__4, i__1);
-            for (i__ = j - 1; i__ >= i__3; --i__) {
-              i__4 = l + i__ + j * a_dim1;
-              i__1 = ix;
-              q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i,
-              q__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r;
-              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-              temp.r = q__1.r, temp.i = q__1.i;
-              ix -= *incx;
-              /* L120: */
-            }
-          } else {
-            if (nounit) {
-              r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-              q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r;
-              temp.r = q__1.r, temp.i = q__1.i;
-            }
-            /* Computing MAX */
-            i__4 = 1, i__1 = j - *k;
-            i__3 = max(i__4, i__1);
-            for (i__ = j - 1; i__ >= i__3; --i__) {
-              r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-              i__4 = ix;
-              q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-              temp.r = q__1.r, temp.i = q__1.i;
-              ix -= *incx;
-              /* L130: */
-            }
-          }
-          i__3 = jx;
-          x[i__3].r = temp.r, x[i__3].i = temp.i;
-          jx -= *incx;
-          /* L140: */
-        }
-      }
-    } else {
-      if (*incx == 1) {
-        i__3 = *n;
-        for (j = 1; j <= i__3; ++j) {
-          i__4 = j;
-          temp.r = x[i__4].r, temp.i = x[i__4].i;
-          l = 1 - j;
-          if (noconj) {
-            if (nounit) {
-              i__4 = j * a_dim1 + 1;
-              q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, q__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r;
-              temp.r = q__1.r, temp.i = q__1.i;
-            }
-            /* Computing MIN */
-            i__1 = *n, i__2 = j + *k;
-            i__4 = min(i__1, i__2);
-            for (i__ = j + 1; i__ <= i__4; ++i__) {
-              i__1 = l + i__ + j * a_dim1;
-              i__2 = i__;
-              q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i,
-              q__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r;
-              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-              temp.r = q__1.r, temp.i = q__1.i;
-              /* L150: */
-            }
-          } else {
-            if (nounit) {
-              r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-              q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r;
-              temp.r = q__1.r, temp.i = q__1.i;
-            }
-            /* Computing MIN */
-            i__1 = *n, i__2 = j + *k;
-            i__4 = min(i__1, i__2);
-            for (i__ = j + 1; i__ <= i__4; ++i__) {
-              r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-              i__1 = i__;
-              q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, q__2.i = q__3.r * x[i__1].i + q__3.i * x[i__1].r;
-              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-              temp.r = q__1.r, temp.i = q__1.i;
-              /* L160: */
-            }
-          }
-          i__4 = j;
-          x[i__4].r = temp.r, x[i__4].i = temp.i;
-          /* L170: */
-        }
-      } else {
-        jx = kx;
-        i__3 = *n;
-        for (j = 1; j <= i__3; ++j) {
-          i__4 = jx;
-          temp.r = x[i__4].r, temp.i = x[i__4].i;
-          kx += *incx;
-          ix = kx;
-          l = 1 - j;
-          if (noconj) {
-            if (nounit) {
-              i__4 = j * a_dim1 + 1;
-              q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, q__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r;
-              temp.r = q__1.r, temp.i = q__1.i;
-            }
-            /* Computing MIN */
-            i__1 = *n, i__2 = j + *k;
-            i__4 = min(i__1, i__2);
-            for (i__ = j + 1; i__ <= i__4; ++i__) {
-              i__1 = l + i__ + j * a_dim1;
-              i__2 = ix;
-              q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i,
-              q__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r;
-              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-              temp.r = q__1.r, temp.i = q__1.i;
-              ix += *incx;
-              /* L180: */
-            }
-          } else {
-            if (nounit) {
-              r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-              q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r;
-              temp.r = q__1.r, temp.i = q__1.i;
-            }
-            /* Computing MIN */
-            i__1 = *n, i__2 = j + *k;
-            i__4 = min(i__1, i__2);
-            for (i__ = j + 1; i__ <= i__4; ++i__) {
-              r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-              i__1 = ix;
-              q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, q__2.i = q__3.r * x[i__1].i + q__3.i * x[i__1].r;
-              q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-              temp.r = q__1.r, temp.i = q__1.i;
-              ix += *incx;
-              /* L190: */
-            }
-          }
-          i__4 = jx;
-          x[i__4].r = temp.r, x[i__4].i = temp.i;
-          jx += *incx;
-          /* L200: */
-        }
-      }
-    }
-  }
-
-  /*     End of CTBMV . */
-
-} /* ctbmv_ */
--- a/blas/f2c/datatypes.h
+++ b/blas/f2c/datatypes.h
@@ -1,27 +0,0 @@
-/* This contains a limited subset of the typedefs exposed by f2c
-   for use by the Eigen BLAS C-only implementation.
-*/
-
-#ifndef __EIGEN_DATATYPES_H__
-#define __EIGEN_DATATYPES_H__
-
-typedef int integer;
-typedef unsigned int uinteger;
-typedef float real;
-typedef double doublereal;
-typedef struct {
-  real r, i;
-} complex;
-typedef struct {
-  doublereal r, i;
-} doublecomplex;
-typedef int logical;
-
-#define abs(x) ((x) >= 0 ? (x) : -(x))
-#define dabs(x) (doublereal) abs(x)
-#define min(a, b) ((a) <= (b) ? (a) : (b))
-#define max(a, b) ((a) >= (b) ? (a) : (b))
-#define dmin(a, b) (doublereal) min(a, b)
-#define dmax(a, b) (doublereal) max(a, b)
-
-#endif
--- a/blas/f2c/drotm.c
+++ b/blas/f2c/drotm.c
@@ -1,213 +0,0 @@
-/* drotm.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void drotm_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy,
-                             doublereal *dparam) {
-  /* Initialized data */
-
-  static doublereal zero = 0.;
-  static doublereal two = 2.;
-
-  /* System generated locals */
-  integer i__1, i__2;
-
-  /* Local variables */
-  integer i__;
-  doublereal w, z__;
-  integer kx, ky;
-  doublereal dh11, dh12, dh21, dh22, dflag;
-  integer nsteps;
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
-
-  /*     (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */
-  /*     (DY**T) */
-
-  /*     DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
-  /*     LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */
-  /*     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
-
-  /*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
-
-  /*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
-  /*     H=(          )    (          )    (          )    (          ) */
-  /*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
-  /*     SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */
-
-  /*  Arguments */
-  /*  ========= */
-
-  /*  N      (input) INTEGER */
-  /*         number of elements in input vector(s) */
-
-  /*  DX     (input/output) DOUBLE PRECISION array, dimension N */
-  /*         double precision vector with N elements */
-
-  /*  INCX   (input) INTEGER */
-  /*         storage spacing between elements of DX */
-
-  /*  DY     (input/output) DOUBLE PRECISION array, dimension N */
-  /*         double precision vector with N elements */
-
-  /*  INCY   (input) INTEGER */
-  /*         storage spacing between elements of DY */
-
-  /*  DPARAM (input/output)  DOUBLE PRECISION array, dimension 5 */
-  /*     DPARAM(1)=DFLAG */
-  /*     DPARAM(2)=DH11 */
-  /*     DPARAM(3)=DH21 */
-  /*     DPARAM(4)=DH12 */
-  /*     DPARAM(5)=DH22 */
-
-  /*  ===================================================================== */
-
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. Data statements .. */
-  /* Parameter adjustments */
-  --dparam;
-  --dy;
-  --dx;
-
-  /* Function Body */
-  /*     .. */
-
-  dflag = dparam[1];
-  if (*n <= 0 || dflag + two == zero) {
-    goto L140;
-  }
-  if (!(*incx == *incy && *incx > 0)) {
-    goto L70;
-  }
-
-  nsteps = *n * *incx;
-  if (dflag < 0.) {
-    goto L50;
-  } else if (dflag == 0) {
-    goto L10;
-  } else {
-    goto L30;
-  }
-L10:
-  dh12 = dparam[4];
-  dh21 = dparam[3];
-  i__1 = nsteps;
-  i__2 = *incx;
-  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-    w = dx[i__];
-    z__ = dy[i__];
-    dx[i__] = w + z__ * dh12;
-    dy[i__] = w * dh21 + z__;
-    /* L20: */
-  }
-  goto L140;
-L30:
-  dh11 = dparam[2];
-  dh22 = dparam[5];
-  i__2 = nsteps;
-  i__1 = *incx;
-  for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-    w = dx[i__];
-    z__ = dy[i__];
-    dx[i__] = w * dh11 + z__;
-    dy[i__] = -w + dh22 * z__;
-    /* L40: */
-  }
-  goto L140;
-L50:
-  dh11 = dparam[2];
-  dh12 = dparam[4];
-  dh21 = dparam[3];
-  dh22 = dparam[5];
-  i__1 = nsteps;
-  i__2 = *incx;
-  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-    w = dx[i__];
-    z__ = dy[i__];
-    dx[i__] = w * dh11 + z__ * dh12;
-    dy[i__] = w * dh21 + z__ * dh22;
-    /* L60: */
-  }
-  goto L140;
-L70:
-  kx = 1;
-  ky = 1;
-  if (*incx < 0) {
-    kx = (1 - *n) * *incx + 1;
-  }
-  if (*incy < 0) {
-    ky = (1 - *n) * *incy + 1;
-  }
-
-  if (dflag < 0.) {
-    goto L120;
-  } else if (dflag == 0) {
-    goto L80;
-  } else {
-    goto L100;
-  }
-L80:
-  dh12 = dparam[4];
-  dh21 = dparam[3];
-  i__2 = *n;
-  for (i__ = 1; i__ <= i__2; ++i__) {
-    w = dx[kx];
-    z__ = dy[ky];
-    dx[kx] = w + z__ * dh12;
-    dy[ky] = w * dh21 + z__;
-    kx += *incx;
-    ky += *incy;
-    /* L90: */
-  }
-  goto L140;
-L100:
-  dh11 = dparam[2];
-  dh22 = dparam[5];
-  i__2 = *n;
-  for (i__ = 1; i__ <= i__2; ++i__) {
-    w = dx[kx];
-    z__ = dy[ky];
-    dx[kx] = w * dh11 + z__;
-    dy[ky] = -w + dh22 * z__;
-    kx += *incx;
-    ky += *incy;
-    /* L110: */
-  }
-  goto L140;
-L120:
-  dh11 = dparam[2];
-  dh12 = dparam[4];
-  dh21 = dparam[3];
-  dh22 = dparam[5];
-  i__2 = *n;
-  for (i__ = 1; i__ <= i__2; ++i__) {
-    w = dx[kx];
-    z__ = dy[ky];
-    dx[kx] = w * dh11 + z__ * dh12;
-    dy[ky] = w * dh21 + z__ * dh22;
-    kx += *incx;
-    ky += *incy;
-    /* L130: */
-  }
-L140:
-  return;
-} /* drotm_ */
--- a/blas/f2c/drotmg.c
+++ b/blas/f2c/drotmg.c
@@ -1,293 +0,0 @@
-/* drotmg.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void drotmg_(doublereal *dd1, doublereal *dd2, doublereal *dx1, doublereal *dy1, doublereal *dparam) {
-  /* Initialized data */
-
-  static doublereal zero = 0.;
-  static doublereal one = 1.;
-  static doublereal two = 2.;
-  static doublereal gam = 4096.;
-  static doublereal gamsq = 16777216.;
-  static doublereal rgamsq = 5.9604645e-8;
-
-  /* Format strings */
-  static char fmt_120[] = "";
-  static char fmt_150[] = "";
-  static char fmt_180[] = "";
-  static char fmt_210[] = "";
-
-  /* System generated locals */
-  doublereal d__1;
-
-  /* Local variables */
-  doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22;
-  integer igo;
-  doublereal dflag, dtemp;
-
-  /* Assigned format variables */
-  static char *igo_fmt;
-  (void)igo_fmt;
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
-  /*     THE SECOND COMPONENT OF THE 2-VECTOR  (DSQRT(DD1)*DX1,DSQRT(DD2)* */
-  /*     DY2)**T. */
-  /*     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
-
-  /*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
-
-  /*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
-  /*     H=(          )    (          )    (          )    (          ) */
-  /*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
-  /*     LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */
-  /*     RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */
-  /*     VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */
-
-  /*     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
-  /*     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
-  /*     OF DD1 AND DD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
-
-  /*  Arguments */
-  /*  ========= */
-
-  /*  DD1    (input/output) DOUBLE PRECISION */
-
-  /*  DD2    (input/output) DOUBLE PRECISION */
-
-  /*  DX1    (input/output) DOUBLE PRECISION */
-
-  /*  DY1    (input) DOUBLE PRECISION */
-
-  /*  DPARAM (input/output)  DOUBLE PRECISION array, dimension 5 */
-  /*     DPARAM(1)=DFLAG */
-  /*     DPARAM(2)=DH11 */
-  /*     DPARAM(3)=DH21 */
-  /*     DPARAM(4)=DH12 */
-  /*     DPARAM(5)=DH22 */
-
-  /*  ===================================================================== */
-
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-  /*     .. Data statements .. */
-
-  /* Parameter adjustments */
-  --dparam;
-
-  /* Function Body */
-  /*     .. */
-  if (!(*dd1 < zero)) {
-    goto L10;
-  }
-  /*       GO ZERO-H-D-AND-DX1.. */
-  goto L60;
-L10:
-  /*     CASE-DD1-NONNEGATIVE */
-  dp2 = *dd2 * *dy1;
-  if (!(dp2 == zero)) {
-    goto L20;
-  }
-  dflag = -two;
-  goto L260;
-/*     REGULAR-CASE.. */
-L20:
-  dp1 = *dd1 * *dx1;
-  dq2 = dp2 * *dy1;
-  dq1 = dp1 * *dx1;
-
-  if (!(abs(dq1) > abs(dq2))) {
-    goto L40;
-  }
-  dh21 = -(*dy1) / *dx1;
-  dh12 = dp2 / dp1;
-
-  du = one - dh12 * dh21;
-
-  if (!(du <= zero)) {
-    goto L30;
-  }
-  /*         GO ZERO-H-D-AND-DX1.. */
-  goto L60;
-L30:
-  dflag = zero;
-  *dd1 /= du;
-  *dd2 /= du;
-  *dx1 *= du;
-  /*         GO SCALE-CHECK.. */
-  goto L100;
-L40:
-  if (!(dq2 < zero)) {
-    goto L50;
-  }
-  /*         GO ZERO-H-D-AND-DX1.. */
-  goto L60;
-L50:
-  dflag = one;
-  dh11 = dp1 / dp2;
-  dh22 = *dx1 / *dy1;
-  du = one + dh11 * dh22;
-  dtemp = *dd2 / du;
-  *dd2 = *dd1 / du;
-  *dd1 = dtemp;
-  *dx1 = *dy1 * du;
-  /*         GO SCALE-CHECK */
-  goto L100;
-/*     PROCEDURE..ZERO-H-D-AND-DX1.. */
-L60:
-  dflag = -one;
-  dh11 = zero;
-  dh12 = zero;
-  dh21 = zero;
-  dh22 = zero;
-
-  *dd1 = zero;
-  *dd2 = zero;
-  *dx1 = zero;
-  /*         RETURN.. */
-  goto L220;
-/*     PROCEDURE..FIX-H.. */
-L70:
-  if (!(dflag >= zero)) {
-    goto L90;
-  }
-
-  if (!(dflag == zero)) {
-    goto L80;
-  }
-  dh11 = one;
-  dh22 = one;
-  dflag = -one;
-  goto L90;
-L80:
-  dh21 = -one;
-  dh12 = one;
-  dflag = -one;
-L90:
-  switch (igo) {
-    case 0:
-      goto L120;
-    case 1:
-      goto L150;
-    case 2:
-      goto L180;
-    case 3:
-      goto L210;
-  }
-/*     PROCEDURE..SCALE-CHECK */
-L100:
-L110:
-  if (!(*dd1 <= rgamsq)) {
-    goto L130;
-  }
-  if (*dd1 == zero) {
-    goto L160;
-  }
-  igo = 0;
-  igo_fmt = fmt_120;
-  /*              FIX-H.. */
-  goto L70;
-L120:
-  /* Computing 2nd power */
-  d__1 = gam;
-  *dd1 *= d__1 * d__1;
-  *dx1 /= gam;
-  dh11 /= gam;
-  dh12 /= gam;
-  goto L110;
-L130:
-L140:
-  if (!(*dd1 >= gamsq)) {
-    goto L160;
-  }
-  igo = 1;
-  igo_fmt = fmt_150;
-  /*              FIX-H.. */
-  goto L70;
-L150:
-  /* Computing 2nd power */
-  d__1 = gam;
-  *dd1 /= d__1 * d__1;
-  *dx1 *= gam;
-  dh11 *= gam;
-  dh12 *= gam;
-  goto L140;
-L160:
-L170:
-  if (!(abs(*dd2) <= rgamsq)) {
-    goto L190;
-  }
-  if (*dd2 == zero) {
-    goto L220;
-  }
-  igo = 2;
-  igo_fmt = fmt_180;
-  /*              FIX-H.. */
-  goto L70;
-L180:
-  /* Computing 2nd power */
-  d__1 = gam;
-  *dd2 *= d__1 * d__1;
-  dh21 /= gam;
-  dh22 /= gam;
-  goto L170;
-L190:
-L200:
-  if (!(abs(*dd2) >= gamsq)) {
-    goto L220;
-  }
-  igo = 3;
-  igo_fmt = fmt_210;
-  /*              FIX-H.. */
-  goto L70;
-L210:
-  /* Computing 2nd power */
-  d__1 = gam;
-  *dd2 /= d__1 * d__1;
-  dh21 *= gam;
-  dh22 *= gam;
-  goto L200;
-L220:
-  if (dflag < 0.) {
-    goto L250;
-  } else if (dflag == 0) {
-    goto L230;
-  } else {
-    goto L240;
-  }
-L230:
-  dparam[3] = dh21;
-  dparam[4] = dh12;
-  goto L260;
-L240:
-  dparam[2] = dh11;
-  dparam[5] = dh22;
-  goto L260;
-L250:
-  dparam[2] = dh11;
-  dparam[3] = dh21;
-  dparam[4] = dh12;
-  dparam[5] = dh22;
-L260:
-  dparam[1] = dflag;
-} /* drotmg_ */
--- a/blas/f2c/dsbmv.c
+++ b/blas/f2c/dsbmv.c
@@ -1,356 +0,0 @@
-/* dsbmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void dsbmv_(char *uplo, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda,
-                             doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) {
-  /* System generated locals */
-  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-  /* Local variables */
-  integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-  doublereal temp1, temp2;
-  extern logical lsame_(char *, char *);
-  integer kplus1;
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  DSBMV  performs the matrix-vector  operation */
-
-  /*     y := alpha*A*x + beta*y, */
-
-  /*  where alpha and beta are scalars, x and y are n element vectors and */
-  /*  A is an n by n symmetric band matrix, with k super-diagonals. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the upper or lower */
-  /*           triangular part of the band matrix A is being supplied as */
-  /*           follows: */
-
-  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-  /*                                  being supplied. */
-
-  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-  /*                                  being supplied. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  K      - INTEGER. */
-  /*           On entry, K specifies the number of super-diagonals of the */
-  /*           matrix A. K must satisfy  0 .le. K. */
-  /*           Unchanged on exit. */
-
-  /*  ALPHA  - DOUBLE PRECISION. */
-  /*           On entry, ALPHA specifies the scalar alpha. */
-  /*           Unchanged on exit. */
-
-  /*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the upper triangular */
-  /*           band part of the symmetric matrix, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row */
-  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
-  /*           position 2 in row k, and so on. The top left k by k triangle */
-  /*           of the array A is not referenced. */
-  /*           The following program segment will transfer the upper */
-  /*           triangular part of a symmetric band matrix from conventional */
-  /*           full matrix storage to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = K + 1 - J */
-  /*                    DO 10, I = MAX( 1, J - K ), J */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the lower triangular */
-  /*           band part of the symmetric matrix, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row 1 of */
-  /*           the array, the first sub-diagonal starting at position 1 in */
-  /*           row 2, and so on. The bottom right k by k triangle of the */
-  /*           array A is not referenced. */
-  /*           The following program segment will transfer the lower */
-  /*           triangular part of a symmetric band matrix from conventional */
-  /*           full matrix storage to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = 1 - J */
-  /*                    DO 10, I = J, MIN( N, J + K ) */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Unchanged on exit. */
-
-  /*  LDA    - INTEGER. */
-  /*           On entry, LDA specifies the first dimension of A as declared */
-  /*           in the calling (sub) program. LDA must be at least */
-  /*           ( k + 1 ). */
-  /*           Unchanged on exit. */
-
-  /*  X      - DOUBLE PRECISION array of DIMENSION at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the */
-  /*           vector x. */
-  /*           Unchanged on exit. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  BETA   - DOUBLE PRECISION. */
-  /*           On entry, BETA specifies the scalar beta. */
-  /*           Unchanged on exit. */
-
-  /*  Y      - DOUBLE PRECISION array of DIMENSION at least */
-  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-  /*           Before entry, the incremented array Y must contain the */
-  /*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-  /*  INCY   - INTEGER. */
-  /*           On entry, INCY specifies the increment for the elements of */
-  /*           Y. INCY must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  a_dim1 = *lda;
-  a_offset = 1 + a_dim1;
-  a -= a_offset;
-  --x;
-  --y;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (*n < 0) {
-    info = 2;
-  } else if (*k < 0) {
-    info = 3;
-  } else if (*lda < *k + 1) {
-    info = 6;
-  } else if (*incx == 0) {
-    info = 8;
-  } else if (*incy == 0) {
-    info = 11;
-  }
-  if (info != 0) {
-    xerbla_("DSBMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
-    return;
-  }
-
-  /*     Set up the start points in  X  and  Y. */
-
-  if (*incx > 0) {
-    kx = 1;
-  } else {
-    kx = 1 - (*n - 1) * *incx;
-  }
-  if (*incy > 0) {
-    ky = 1;
-  } else {
-    ky = 1 - (*n - 1) * *incy;
-  }
-
-  /*     Start the operations. In this version the elements of the array A */
-  /*     are accessed sequentially with one pass through A. */
-
-  /*     First form  y := beta*y. */
-
-  if (*beta != 1.) {
-    if (*incy == 1) {
-      if (*beta == 0.) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[i__] = 0.;
-          /* L10: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[i__] = *beta * y[i__];
-          /* L20: */
-        }
-      }
-    } else {
-      iy = ky;
-      if (*beta == 0.) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[iy] = 0.;
-          iy += *incy;
-          /* L30: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[iy] = *beta * y[iy];
-          iy += *incy;
-          /* L40: */
-        }
-      }
-    }
-  }
-  if (*alpha == 0.) {
-    return;
-  }
-  if (lsame_(uplo, "U")) {
-    /*        Form  y  when upper triangle of A is stored. */
-
-    kplus1 = *k + 1;
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[j];
-        temp2 = 0.;
-        l = kplus1 - j;
-        /* Computing MAX */
-        i__2 = 1, i__3 = j - *k;
-        i__4 = j - 1;
-        for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
-          y[i__] += temp1 * a[l + i__ + j * a_dim1];
-          temp2 += a[l + i__ + j * a_dim1] * x[i__];
-          /* L50: */
-        }
-        y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-        /* L60: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[jx];
-        temp2 = 0.;
-        ix = kx;
-        iy = ky;
-        l = kplus1 - j;
-        /* Computing MAX */
-        i__4 = 1, i__2 = j - *k;
-        i__3 = j - 1;
-        for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
-          y[iy] += temp1 * a[l + i__ + j * a_dim1];
-          temp2 += a[l + i__ + j * a_dim1] * x[ix];
-          ix += *incx;
-          iy += *incy;
-          /* L70: */
-        }
-        y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-        jx += *incx;
-        jy += *incy;
-        if (j > *k) {
-          kx += *incx;
-          ky += *incy;
-        }
-        /* L80: */
-      }
-    }
-  } else {
-    /*        Form  y  when lower triangle of A is stored. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[j];
-        temp2 = 0.;
-        y[j] += temp1 * a[j * a_dim1 + 1];
-        l = 1 - j;
-        /* Computing MIN */
-        i__4 = *n, i__2 = j + *k;
-        i__3 = min(i__4, i__2);
-        for (i__ = j + 1; i__ <= i__3; ++i__) {
-          y[i__] += temp1 * a[l + i__ + j * a_dim1];
-          temp2 += a[l + i__ + j * a_dim1] * x[i__];
-          /* L90: */
-        }
-        y[j] += *alpha * temp2;
-        /* L100: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[jx];
-        temp2 = 0.;
-        y[jy] += temp1 * a[j * a_dim1 + 1];
-        l = 1 - j;
-        ix = jx;
-        iy = jy;
-        /* Computing MIN */
-        i__4 = *n, i__2 = j + *k;
-        i__3 = min(i__4, i__2);
-        for (i__ = j + 1; i__ <= i__3; ++i__) {
-          ix += *incx;
-          iy += *incy;
-          y[iy] += temp1 * a[l + i__ + j * a_dim1];
-          temp2 += a[l + i__ + j * a_dim1] * x[ix];
-          /* L110: */
-        }
-        y[jy] += *alpha * temp2;
-        jx += *incx;
-        jy += *incy;
-        /* L120: */
-      }
-    }
-  }
-
-  /*     End of DSBMV . */
-
-} /* dsbmv_ */
--- a/blas/f2c/dspmv.c
+++ b/blas/f2c/dspmv.c
@@ -1,308 +0,0 @@
-/* dspmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void dspmv_(char *uplo, integer *n, doublereal *alpha, doublereal *ap, doublereal *x, integer *incx,
-                             doublereal *beta, doublereal *y, integer *incy) {
-  /* System generated locals */
-  integer i__1, i__2;
-
-  /* Local variables */
-  integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-  doublereal temp1, temp2;
-  extern logical lsame_(char *, char *);
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  DSPMV  performs the matrix-vector operation */
-
-  /*     y := alpha*A*x + beta*y, */
-
-  /*  where alpha and beta are scalars, x and y are n element vectors and */
-  /*  A is an n by n symmetric matrix, supplied in packed form. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the upper or lower */
-  /*           triangular part of the matrix A is supplied in the packed */
-  /*           array AP as follows: */
-
-  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-  /*                                  supplied in AP. */
-
-  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-  /*                                  supplied in AP. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  ALPHA  - DOUBLE PRECISION. */
-  /*           On entry, ALPHA specifies the scalar alpha. */
-  /*           Unchanged on exit. */
-
-  /*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-  /*           ( ( n*( n + 1 ) )/2 ). */
-  /*           Before entry with UPLO = 'U' or 'u', the array AP must */
-  /*           contain the upper triangular part of the symmetric matrix */
-  /*           packed sequentially, column by column, so that AP( 1 ) */
-  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-  /*           and a( 2, 2 ) respectively, and so on. */
-  /*           Before entry with UPLO = 'L' or 'l', the array AP must */
-  /*           contain the lower triangular part of the symmetric matrix */
-  /*           packed sequentially, column by column, so that AP( 1 ) */
-  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-  /*           and a( 3, 1 ) respectively, and so on. */
-  /*           Unchanged on exit. */
-
-  /*  X      - DOUBLE PRECISION array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the n */
-  /*           element vector x. */
-  /*           Unchanged on exit. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  BETA   - DOUBLE PRECISION. */
-  /*           On entry, BETA specifies the scalar beta. When BETA is */
-  /*           supplied as zero then Y need not be set on input. */
-  /*           Unchanged on exit. */
-
-  /*  Y      - DOUBLE PRECISION array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-  /*           Before entry, the incremented array Y must contain the n */
-  /*           element vector y. On exit, Y is overwritten by the updated */
-  /*           vector y. */
-
-  /*  INCY   - INTEGER. */
-  /*           On entry, INCY specifies the increment for the elements of */
-  /*           Y. INCY must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  --y;
-  --x;
-  --ap;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (*n < 0) {
-    info = 2;
-  } else if (*incx == 0) {
-    info = 6;
-  } else if (*incy == 0) {
-    info = 9;
-  }
-  if (info != 0) {
-    xerbla_("DSPMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
-    return;
-  }
-
-  /*     Set up the start points in  X  and  Y. */
-
-  if (*incx > 0) {
-    kx = 1;
-  } else {
-    kx = 1 - (*n - 1) * *incx;
-  }
-  if (*incy > 0) {
-    ky = 1;
-  } else {
-    ky = 1 - (*n - 1) * *incy;
-  }
-
-  /*     Start the operations. In this version the elements of the array AP */
-  /*     are accessed sequentially with one pass through AP. */
-
-  /*     First form  y := beta*y. */
-
-  if (*beta != 1.) {
-    if (*incy == 1) {
-      if (*beta == 0.) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[i__] = 0.;
-          /* L10: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[i__] = *beta * y[i__];
-          /* L20: */
-        }
-      }
-    } else {
-      iy = ky;
-      if (*beta == 0.) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[iy] = 0.;
-          iy += *incy;
-          /* L30: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[iy] = *beta * y[iy];
-          iy += *incy;
-          /* L40: */
-        }
-      }
-    }
-  }
-  if (*alpha == 0.) {
-    return;
-  }
-  kk = 1;
-  if (lsame_(uplo, "U")) {
-    /*        Form  y  when AP contains the upper triangle. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[j];
-        temp2 = 0.;
-        k = kk;
-        i__2 = j - 1;
-        for (i__ = 1; i__ <= i__2; ++i__) {
-          y[i__] += temp1 * ap[k];
-          temp2 += ap[k] * x[i__];
-          ++k;
-          /* L50: */
-        }
-        y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-        kk += j;
-        /* L60: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[jx];
-        temp2 = 0.;
-        ix = kx;
-        iy = ky;
-        i__2 = kk + j - 2;
-        for (k = kk; k <= i__2; ++k) {
-          y[iy] += temp1 * ap[k];
-          temp2 += ap[k] * x[ix];
-          ix += *incx;
-          iy += *incy;
-          /* L70: */
-        }
-        y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-        jx += *incx;
-        jy += *incy;
-        kk += j;
-        /* L80: */
-      }
-    }
-  } else {
-    /*        Form  y  when AP contains the lower triangle. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[j];
-        temp2 = 0.;
-        y[j] += temp1 * ap[kk];
-        k = kk + 1;
-        i__2 = *n;
-        for (i__ = j + 1; i__ <= i__2; ++i__) {
-          y[i__] += temp1 * ap[k];
-          temp2 += ap[k] * x[i__];
-          ++k;
-          /* L90: */
-        }
-        y[j] += *alpha * temp2;
-        kk += *n - j + 1;
-        /* L100: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[jx];
-        temp2 = 0.;
-        y[jy] += temp1 * ap[kk];
-        ix = jx;
-        iy = jy;
-        i__2 = kk + *n - j;
-        for (k = kk + 1; k <= i__2; ++k) {
-          ix += *incx;
-          iy += *incy;
-          y[iy] += temp1 * ap[k];
-          temp2 += ap[k] * x[ix];
-          /* L110: */
-        }
-        y[jy] += *alpha * temp2;
-        jx += *incx;
-        jy += *incy;
-        kk += *n - j + 1;
-        /* L120: */
-      }
-    }
-  }
-
-  /*     End of DSPMV . */
-
-} /* dspmv_ */
--- a/blas/f2c/dtbmv.c
+++ b/blas/f2c/dtbmv.c
@@ -1,417 +0,0 @@
-/* dtbmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void dtbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda,
-                             doublereal *x, integer *incx) {
-  /* System generated locals */
-  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-  /* Local variables */
-  integer i__, j, l, ix, jx, kx, info;
-  doublereal temp;
-  extern logical lsame_(char *, char *);
-  integer kplus1;
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-  logical nounit;
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  DTBMV  performs one of the matrix-vector operations */
-
-  /*     x := A*x,   or   x := A'*x, */
-
-  /*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-  /*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the matrix is an upper or */
-  /*           lower triangular matrix as follows: */
-
-  /*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-  /*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-  /*           Unchanged on exit. */
-
-  /*  TRANS  - CHARACTER*1. */
-  /*           On entry, TRANS specifies the operation to be performed as */
-  /*           follows: */
-
-  /*              TRANS = 'N' or 'n'   x := A*x. */
-
-  /*              TRANS = 'T' or 't'   x := A'*x. */
-
-  /*              TRANS = 'C' or 'c'   x := A'*x. */
-
-  /*           Unchanged on exit. */
-
-  /*  DIAG   - CHARACTER*1. */
-  /*           On entry, DIAG specifies whether or not A is unit */
-  /*           triangular as follows: */
-
-  /*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-  /*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-  /*                                  triangular. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  K      - INTEGER. */
-  /*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-  /*           super-diagonals of the matrix A. */
-  /*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-  /*           sub-diagonals of the matrix A. */
-  /*           K must satisfy  0 .le. K. */
-  /*           Unchanged on exit. */
-
-  /*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the upper triangular */
-  /*           band part of the matrix of coefficients, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row */
-  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
-  /*           position 2 in row k, and so on. The top left k by k triangle */
-  /*           of the array A is not referenced. */
-  /*           The following program segment will transfer an upper */
-  /*           triangular band matrix from conventional full matrix storage */
-  /*           to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = K + 1 - J */
-  /*                    DO 10, I = MAX( 1, J - K ), J */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the lower triangular */
-  /*           band part of the matrix of coefficients, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row 1 of */
-  /*           the array, the first sub-diagonal starting at position 1 in */
-  /*           row 2, and so on. The bottom right k by k triangle of the */
-  /*           array A is not referenced. */
-  /*           The following program segment will transfer a lower */
-  /*           triangular band matrix from conventional full matrix storage */
-  /*           to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = 1 - J */
-  /*                    DO 10, I = J, MIN( N, J + K ) */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-  /*           corresponding to the diagonal elements of the matrix are not */
-  /*           referenced, but are assumed to be unity. */
-  /*           Unchanged on exit. */
-
-  /*  LDA    - INTEGER. */
-  /*           On entry, LDA specifies the first dimension of A as declared */
-  /*           in the calling (sub) program. LDA must be at least */
-  /*           ( k + 1 ). */
-  /*           Unchanged on exit. */
-
-  /*  X      - DOUBLE PRECISION array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the n */
-  /*           element vector x. On exit, X is overwritten with the */
-  /*           transformed vector x. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  a_dim1 = *lda;
-  a_offset = 1 + a_dim1;
-  a -= a_offset;
-  --x;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) {
-    info = 2;
-  } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) {
-    info = 3;
-  } else if (*n < 0) {
-    info = 4;
-  } else if (*k < 0) {
-    info = 5;
-  } else if (*lda < *k + 1) {
-    info = 7;
-  } else if (*incx == 0) {
-    info = 9;
-  }
-  if (info != 0) {
-    xerbla_("DTBMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0) {
-    return;
-  }
-
-  nounit = lsame_(diag, "N");
-
-  /*     Set up the start point in X if the increment is not unity. This */
-  /*     will be  ( N - 1 )*INCX   too small for descending loops. */
-
-  if (*incx <= 0) {
-    kx = 1 - (*n - 1) * *incx;
-  } else if (*incx != 1) {
-    kx = 1;
-  }
-
-  /*     Start the operations. In this version the elements of A are */
-  /*     accessed sequentially with one pass through A. */
-
-  if (lsame_(trans, "N")) {
-    /*         Form  x := A*x. */
-
-    if (lsame_(uplo, "U")) {
-      kplus1 = *k + 1;
-      if (*incx == 1) {
-        i__1 = *n;
-        for (j = 1; j <= i__1; ++j) {
-          if (x[j] != 0.) {
-            temp = x[j];
-            l = kplus1 - j;
-            /* Computing MAX */
-            i__2 = 1, i__3 = j - *k;
-            i__4 = j - 1;
-            for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
-              x[i__] += temp * a[l + i__ + j * a_dim1];
-              /* L10: */
-            }
-            if (nounit) {
-              x[j] *= a[kplus1 + j * a_dim1];
-            }
-          }
-          /* L20: */
-        }
-      } else {
-        jx = kx;
-        i__1 = *n;
-        for (j = 1; j <= i__1; ++j) {
-          if (x[jx] != 0.) {
-            temp = x[jx];
-            ix = kx;
-            l = kplus1 - j;
-            /* Computing MAX */
-            i__4 = 1, i__2 = j - *k;
-            i__3 = j - 1;
-            for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
-              x[ix] += temp * a[l + i__ + j * a_dim1];
-              ix += *incx;
-              /* L30: */
-            }
-            if (nounit) {
-              x[jx] *= a[kplus1 + j * a_dim1];
-            }
-          }
-          jx += *incx;
-          if (j > *k) {
-            kx += *incx;
-          }
-          /* L40: */
-        }
-      }
-    } else {
-      if (*incx == 1) {
-        for (j = *n; j >= 1; --j) {
-          if (x[j] != 0.) {
-            temp = x[j];
-            l = 1 - j;
-            /* Computing MIN */
-            i__1 = *n, i__3 = j + *k;
-            i__4 = j + 1;
-            for (i__ = min(i__1, i__3); i__ >= i__4; --i__) {
-              x[i__] += temp * a[l + i__ + j * a_dim1];
-              /* L50: */
-            }
-            if (nounit) {
-              x[j] *= a[j * a_dim1 + 1];
-            }
-          }
-          /* L60: */
-        }
-      } else {
-        kx += (*n - 1) * *incx;
-        jx = kx;
-        for (j = *n; j >= 1; --j) {
-          if (x[jx] != 0.) {
-            temp = x[jx];
-            ix = kx;
-            l = 1 - j;
-            /* Computing MIN */
-            i__4 = *n, i__1 = j + *k;
-            i__3 = j + 1;
-            for (i__ = min(i__4, i__1); i__ >= i__3; --i__) {
-              x[ix] += temp * a[l + i__ + j * a_dim1];
-              ix -= *incx;
-              /* L70: */
-            }
-            if (nounit) {
-              x[jx] *= a[j * a_dim1 + 1];
-            }
-          }
-          jx -= *incx;
-          if (*n - j >= *k) {
-            kx -= *incx;
-          }
-          /* L80: */
-        }
-      }
-    }
-  } else {
-    /*        Form  x := A'*x. */
-
-    if (lsame_(uplo, "U")) {
-      kplus1 = *k + 1;
-      if (*incx == 1) {
-        for (j = *n; j >= 1; --j) {
-          temp = x[j];
-          l = kplus1 - j;
-          if (nounit) {
-            temp *= a[kplus1 + j * a_dim1];
-          }
-          /* Computing MAX */
-          i__4 = 1, i__1 = j - *k;
-          i__3 = max(i__4, i__1);
-          for (i__ = j - 1; i__ >= i__3; --i__) {
-            temp += a[l + i__ + j * a_dim1] * x[i__];
-            /* L90: */
-          }
-          x[j] = temp;
-          /* L100: */
-        }
-      } else {
-        kx += (*n - 1) * *incx;
-        jx = kx;
-        for (j = *n; j >= 1; --j) {
-          temp = x[jx];
-          kx -= *incx;
-          ix = kx;
-          l = kplus1 - j;
-          if (nounit) {
-            temp *= a[kplus1 + j * a_dim1];
-          }
-          /* Computing MAX */
-          i__4 = 1, i__1 = j - *k;
-          i__3 = max(i__4, i__1);
-          for (i__ = j - 1; i__ >= i__3; --i__) {
-            temp += a[l + i__ + j * a_dim1] * x[ix];
-            ix -= *incx;
-            /* L110: */
-          }
-          x[jx] = temp;
-          jx -= *incx;
-          /* L120: */
-        }
-      }
-    } else {
-      if (*incx == 1) {
-        i__3 = *n;
-        for (j = 1; j <= i__3; ++j) {
-          temp = x[j];
-          l = 1 - j;
-          if (nounit) {
-            temp *= a[j * a_dim1 + 1];
-          }
-          /* Computing MIN */
-          i__1 = *n, i__2 = j + *k;
-          i__4 = min(i__1, i__2);
-          for (i__ = j + 1; i__ <= i__4; ++i__) {
-            temp += a[l + i__ + j * a_dim1] * x[i__];
-            /* L130: */
-          }
-          x[j] = temp;
-          /* L140: */
-        }
-      } else {
-        jx = kx;
-        i__3 = *n;
-        for (j = 1; j <= i__3; ++j) {
-          temp = x[jx];
-          kx += *incx;
-          ix = kx;
-          l = 1 - j;
-          if (nounit) {
-            temp *= a[j * a_dim1 + 1];
-          }
-          /* Computing MIN */
-          i__1 = *n, i__2 = j + *k;
-          i__4 = min(i__1, i__2);
-          for (i__ = j + 1; i__ <= i__4; ++i__) {
-            temp += a[l + i__ + j * a_dim1] * x[ix];
-            ix += *incx;
-            /* L150: */
-          }
-          x[jx] = temp;
-          jx += *incx;
-          /* L160: */
-        }
-      }
-    }
-  }
-
-  /*     End of DTBMV . */
-
-} /* dtbmv_ */
--- a/blas/f2c/lsame.c
+++ b/blas/f2c/lsame.c
@@ -1,109 +0,0 @@
-/* lsame.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-logical lsame_(char *ca, char *cb) {
-  /* System generated locals */
-  logical ret_val;
-
-  /* Local variables */
-  integer inta, intb, zcode;
-
-  /*  -- LAPACK auxiliary routine (version 3.1) -- */
-  /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-  /*     November 2006 */
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  LSAME returns .TRUE. if CA is the same letter as CB regardless of */
-  /*  case. */
-
-  /*  Arguments */
-  /*  ========= */
-
-  /*  CA      (input) CHARACTER*1 */
-
-  /*  CB      (input) CHARACTER*1 */
-  /*          CA and CB specify the single characters to be compared. */
-
-  /* ===================================================================== */
-
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-
-  /*     Test if the characters are equal */
-
-  ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
-  if (ret_val) {
-    return ret_val;
-  }
-
-  /*     Now test for equivalence if both characters are alphabetic. */
-
-  zcode = 'Z';
-
-  /*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime */
-  /*     machines, on which ICHAR returns a value with bit 8 set. */
-  /*     ICHAR('A') on Prime machines returns 193 which is the same as */
-  /*     ICHAR('A') on an EBCDIC machine. */
-
-  inta = *(unsigned char *)ca;
-  intb = *(unsigned char *)cb;
-
-  if (zcode == 90 || zcode == 122) {
-    /*        ASCII is assumed - ZCODE is the ASCII code of either lower or */
-    /*        upper case 'Z'. */
-
-    if (inta >= 97 && inta <= 122) {
-      inta += -32;
-    }
-    if (intb >= 97 && intb <= 122) {
-      intb += -32;
-    }
-
-  } else if (zcode == 233 || zcode == 169) {
-    /*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */
-    /*        upper case 'Z'. */
-
-    if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta >= 162 && inta <= 169)) {
-      inta += 64;
-    }
-    if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb >= 162 && intb <= 169)) {
-      intb += 64;
-    }
-
-  } else if (zcode == 218 || zcode == 250) {
-    /*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code */
-    /*        plus 128 of either lower or upper case 'Z'. */
-
-    if (inta >= 225 && inta <= 250) {
-      inta += -32;
-    }
-    if (intb >= 225 && intb <= 250) {
-      intb += -32;
-    }
-  }
-  ret_val = inta == intb;
-
-  /*     RETURN */
-
-  /*     End of LSAME */
-
-  return ret_val;
-} /* lsame_ */
--- a/blas/f2c/srotm.c
+++ b/blas/f2c/srotm.c
@@ -1,212 +0,0 @@
-/* srotm.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void srotm_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *sparam) {
-  /* Initialized data */
-
-  static real zero = 0.f;
-  static real two = 2.f;
-
-  /* System generated locals */
-  integer i__1, i__2;
-
-  /* Local variables */
-  integer i__;
-  real w, z__;
-  integer kx, ky;
-  real sh11, sh12, sh21, sh22, sflag;
-  integer nsteps;
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
-
-  /*     (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */
-  /*     (DX**T) */
-
-  /*     SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
-  /*     LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */
-  /*     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
-
-  /*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
-
-  /*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
-  /*     H=(          )    (          )    (          )    (          ) */
-  /*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
-  /*     SEE  SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */
-
-  /*  Arguments */
-  /*  ========= */
-
-  /*  N      (input) INTEGER */
-  /*         number of elements in input vector(s) */
-
-  /*  SX     (input/output) REAL array, dimension N */
-  /*         double precision vector with N elements */
-
-  /*  INCX   (input) INTEGER */
-  /*         storage spacing between elements of SX */
-
-  /*  SY     (input/output) REAL array, dimension N */
-  /*         double precision vector with N elements */
-
-  /*  INCY   (input) INTEGER */
-  /*         storage spacing between elements of SY */
-
-  /*  SPARAM (input/output)  REAL array, dimension 5 */
-  /*     SPARAM(1)=SFLAG */
-  /*     SPARAM(2)=SH11 */
-  /*     SPARAM(3)=SH21 */
-  /*     SPARAM(4)=SH12 */
-  /*     SPARAM(5)=SH22 */
-
-  /*  ===================================================================== */
-
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. Data statements .. */
-  /* Parameter adjustments */
-  --sparam;
-  --sy;
-  --sx;
-
-  /* Function Body */
-  /*     .. */
-
-  sflag = sparam[1];
-  if (*n <= 0 || sflag + two == zero) {
-    goto L140;
-  }
-  if (!(*incx == *incy && *incx > 0)) {
-    goto L70;
-  }
-
-  nsteps = *n * *incx;
-  if (sflag < 0.f) {
-    goto L50;
-  } else if (sflag == 0) {
-    goto L10;
-  } else {
-    goto L30;
-  }
-L10:
-  sh12 = sparam[4];
-  sh21 = sparam[3];
-  i__1 = nsteps;
-  i__2 = *incx;
-  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-    w = sx[i__];
-    z__ = sy[i__];
-    sx[i__] = w + z__ * sh12;
-    sy[i__] = w * sh21 + z__;
-    /* L20: */
-  }
-  goto L140;
-L30:
-  sh11 = sparam[2];
-  sh22 = sparam[5];
-  i__2 = nsteps;
-  i__1 = *incx;
-  for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-    w = sx[i__];
-    z__ = sy[i__];
-    sx[i__] = w * sh11 + z__;
-    sy[i__] = -w + sh22 * z__;
-    /* L40: */
-  }
-  goto L140;
-L50:
-  sh11 = sparam[2];
-  sh12 = sparam[4];
-  sh21 = sparam[3];
-  sh22 = sparam[5];
-  i__1 = nsteps;
-  i__2 = *incx;
-  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-    w = sx[i__];
-    z__ = sy[i__];
-    sx[i__] = w * sh11 + z__ * sh12;
-    sy[i__] = w * sh21 + z__ * sh22;
-    /* L60: */
-  }
-  goto L140;
-L70:
-  kx = 1;
-  ky = 1;
-  if (*incx < 0) {
-    kx = (1 - *n) * *incx + 1;
-  }
-  if (*incy < 0) {
-    ky = (1 - *n) * *incy + 1;
-  }
-
-  if (sflag < 0.f) {
-    goto L120;
-  } else if (sflag == 0) {
-    goto L80;
-  } else {
-    goto L100;
-  }
-L80:
-  sh12 = sparam[4];
-  sh21 = sparam[3];
-  i__2 = *n;
-  for (i__ = 1; i__ <= i__2; ++i__) {
-    w = sx[kx];
-    z__ = sy[ky];
-    sx[kx] = w + z__ * sh12;
-    sy[ky] = w * sh21 + z__;
-    kx += *incx;
-    ky += *incy;
-    /* L90: */
-  }
-  goto L140;
-L100:
-  sh11 = sparam[2];
-  sh22 = sparam[5];
-  i__2 = *n;
-  for (i__ = 1; i__ <= i__2; ++i__) {
-    w = sx[kx];
-    z__ = sy[ky];
-    sx[kx] = w * sh11 + z__;
-    sy[ky] = -w + sh22 * z__;
-    kx += *incx;
-    ky += *incy;
-    /* L110: */
-  }
-  goto L140;
-L120:
-  sh11 = sparam[2];
-  sh12 = sparam[4];
-  sh21 = sparam[3];
-  sh22 = sparam[5];
-  i__2 = *n;
-  for (i__ = 1; i__ <= i__2; ++i__) {
-    w = sx[kx];
-    z__ = sy[ky];
-    sx[kx] = w * sh11 + z__ * sh12;
-    sy[ky] = w * sh21 + z__ * sh22;
-    kx += *incx;
-    ky += *incy;
-    /* L130: */
-  }
-L140:
-  return;
-} /* srotm_ */
--- a/blas/f2c/srotmg.c
+++ b/blas/f2c/srotmg.c
@@ -1,293 +0,0 @@
-/* srotmg.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real *sparam) {
-  /* Initialized data */
-
-  static real zero = 0.f;
-  static real one = 1.f;
-  static real two = 2.f;
-  static real gam = 4096.f;
-  static real gamsq = 16777200.f;
-  static real rgamsq = 5.96046e-8f;
-
-  /* Format strings */
-  static char fmt_120[] = "";
-  static char fmt_150[] = "";
-  static char fmt_180[] = "";
-  static char fmt_210[] = "";
-
-  /* System generated locals */
-  real r__1;
-
-  /* Local variables */
-  real su, sp1, sp2, sq1, sq2, sh11, sh12, sh21, sh22;
-  integer igo;
-  real sflag, stemp;
-
-  /* Assigned format variables */
-  static char *igo_fmt;
-  (void)igo_fmt;
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
-  /*     THE SECOND COMPONENT OF THE 2-VECTOR  (SQRT(SD1)*SX1,SQRT(SD2)* */
-  /*     SY2)**T. */
-  /*     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
-
-  /*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
-
-  /*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
-  /*     H=(          )    (          )    (          )    (          ) */
-  /*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
-  /*     LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */
-  /*     RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */
-  /*     VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */
-
-  /*     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
-  /*     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
-  /*     OF SD1 AND SD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
-
-  /*  Arguments */
-  /*  ========= */
-
-  /*  SD1    (input/output) REAL */
-
-  /*  SD2    (input/output) REAL */
-
-  /*  SX1    (input/output) REAL */
-
-  /*  SY1    (input) REAL */
-
-  /*  SPARAM (input/output)  REAL array, dimension 5 */
-  /*     SPARAM(1)=SFLAG */
-  /*     SPARAM(2)=SH11 */
-  /*     SPARAM(3)=SH21 */
-  /*     SPARAM(4)=SH12 */
-  /*     SPARAM(5)=SH22 */
-
-  /*  ===================================================================== */
-
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-  /*     .. Data statements .. */
-
-  /* Parameter adjustments */
-  --sparam;
-
-  /* Function Body */
-  /*     .. */
-  if (!(*sd1 < zero)) {
-    goto L10;
-  }
-  /*       GO ZERO-H-D-AND-SX1.. */
-  goto L60;
-L10:
-  /*     CASE-SD1-NONNEGATIVE */
-  sp2 = *sd2 * *sy1;
-  if (!(sp2 == zero)) {
-    goto L20;
-  }
-  sflag = -two;
-  goto L260;
-/*     REGULAR-CASE.. */
-L20:
-  sp1 = *sd1 * *sx1;
-  sq2 = sp2 * *sy1;
-  sq1 = sp1 * *sx1;
-
-  if (!(dabs(sq1) > dabs(sq2))) {
-    goto L40;
-  }
-  sh21 = -(*sy1) / *sx1;
-  sh12 = sp2 / sp1;
-
-  su = one - sh12 * sh21;
-
-  if (!(su <= zero)) {
-    goto L30;
-  }
-  /*         GO ZERO-H-D-AND-SX1.. */
-  goto L60;
-L30:
-  sflag = zero;
-  *sd1 /= su;
-  *sd2 /= su;
-  *sx1 *= su;
-  /*         GO SCALE-CHECK.. */
-  goto L100;
-L40:
-  if (!(sq2 < zero)) {
-    goto L50;
-  }
-  /*         GO ZERO-H-D-AND-SX1.. */
-  goto L60;
-L50:
-  sflag = one;
-  sh11 = sp1 / sp2;
-  sh22 = *sx1 / *sy1;
-  su = one + sh11 * sh22;
-  stemp = *sd2 / su;
-  *sd2 = *sd1 / su;
-  *sd1 = stemp;
-  *sx1 = *sy1 * su;
-  /*         GO SCALE-CHECK */
-  goto L100;
-/*     PROCEDURE..ZERO-H-D-AND-SX1.. */
-L60:
-  sflag = -one;
-  sh11 = zero;
-  sh12 = zero;
-  sh21 = zero;
-  sh22 = zero;
-
-  *sd1 = zero;
-  *sd2 = zero;
-  *sx1 = zero;
-  /*         RETURN.. */
-  goto L220;
-/*     PROCEDURE..FIX-H.. */
-L70:
-  if (!(sflag >= zero)) {
-    goto L90;
-  }
-
-  if (!(sflag == zero)) {
-    goto L80;
-  }
-  sh11 = one;
-  sh22 = one;
-  sflag = -one;
-  goto L90;
-L80:
-  sh21 = -one;
-  sh12 = one;
-  sflag = -one;
-L90:
-  switch (igo) {
-    case 0:
-      goto L120;
-    case 1:
-      goto L150;
-    case 2:
-      goto L180;
-    case 3:
-      goto L210;
-  }
-/*     PROCEDURE..SCALE-CHECK */
-L100:
-L110:
-  if (!(*sd1 <= rgamsq)) {
-    goto L130;
-  }
-  if (*sd1 == zero) {
-    goto L160;
-  }
-  igo = 0;
-  igo_fmt = fmt_120;
-  /*              FIX-H.. */
-  goto L70;
-L120:
-  /* Computing 2nd power */
-  r__1 = gam;
-  *sd1 *= r__1 * r__1;
-  *sx1 /= gam;
-  sh11 /= gam;
-  sh12 /= gam;
-  goto L110;
-L130:
-L140:
-  if (!(*sd1 >= gamsq)) {
-    goto L160;
-  }
-  igo = 1;
-  igo_fmt = fmt_150;
-  /*              FIX-H.. */
-  goto L70;
-L150:
-  /* Computing 2nd power */
-  r__1 = gam;
-  *sd1 /= r__1 * r__1;
-  *sx1 *= gam;
-  sh11 *= gam;
-  sh12 *= gam;
-  goto L140;
-L160:
-L170:
-  if (!(dabs(*sd2) <= rgamsq)) {
-    goto L190;
-  }
-  if (*sd2 == zero) {
-    goto L220;
-  }
-  igo = 2;
-  igo_fmt = fmt_180;
-  /*              FIX-H.. */
-  goto L70;
-L180:
-  /* Computing 2nd power */
-  r__1 = gam;
-  *sd2 *= r__1 * r__1;
-  sh21 /= gam;
-  sh22 /= gam;
-  goto L170;
-L190:
-L200:
-  if (!(dabs(*sd2) >= gamsq)) {
-    goto L220;
-  }
-  igo = 3;
-  igo_fmt = fmt_210;
-  /*              FIX-H.. */
-  goto L70;
-L210:
-  /* Computing 2nd power */
-  r__1 = gam;
-  *sd2 /= r__1 * r__1;
-  sh21 *= gam;
-  sh22 *= gam;
-  goto L200;
-L220:
-  if (sflag < 0.f) {
-    goto L250;
-  } else if (sflag == 0) {
-    goto L230;
-  } else {
-    goto L240;
-  }
-L230:
-  sparam[3] = sh21;
-  sparam[4] = sh12;
-  goto L260;
-L240:
-  sparam[2] = sh11;
-  sparam[5] = sh22;
-  goto L260;
-L250:
-  sparam[2] = sh11;
-  sparam[3] = sh21;
-  sparam[4] = sh12;
-  sparam[5] = sh22;
-L260:
-  sparam[1] = sflag;
-} /* srotmg_ */
--- a/blas/f2c/ssbmv.c
+++ b/blas/f2c/ssbmv.c
@@ -1,359 +0,0 @@
-/* ssbmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void ssbmv_(char *uplo, integer *n, integer *k, real *alpha, real *a, integer *lda, real *x,
-                             integer *incx, real *beta, real *y, integer *incy) {
-  /* System generated locals */
-  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-  /* Local variables */
-  integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-  real temp1, temp2;
-  extern logical lsame_(char *, char *);
-  integer kplus1;
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  SSBMV  performs the matrix-vector  operation */
-
-  /*     y := alpha*A*x + beta*y, */
-
-  /*  where alpha and beta are scalars, x and y are n element vectors and */
-  /*  A is an n by n symmetric band matrix, with k super-diagonals. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the upper or lower */
-  /*           triangular part of the band matrix A is being supplied as */
-  /*           follows: */
-
-  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-  /*                                  being supplied. */
-
-  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-  /*                                  being supplied. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  K      - INTEGER. */
-  /*           On entry, K specifies the number of super-diagonals of the */
-  /*           matrix A. K must satisfy  0 .le. K. */
-  /*           Unchanged on exit. */
-
-  /*  ALPHA  - REAL            . */
-  /*           On entry, ALPHA specifies the scalar alpha. */
-  /*           Unchanged on exit. */
-
-  /*  A      - REAL             array of DIMENSION ( LDA, n ). */
-  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the upper triangular */
-  /*           band part of the symmetric matrix, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row */
-  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
-  /*           position 2 in row k, and so on. The top left k by k triangle */
-  /*           of the array A is not referenced. */
-  /*           The following program segment will transfer the upper */
-  /*           triangular part of a symmetric band matrix from conventional */
-  /*           full matrix storage to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = K + 1 - J */
-  /*                    DO 10, I = MAX( 1, J - K ), J */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the lower triangular */
-  /*           band part of the symmetric matrix, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row 1 of */
-  /*           the array, the first sub-diagonal starting at position 1 in */
-  /*           row 2, and so on. The bottom right k by k triangle of the */
-  /*           array A is not referenced. */
-  /*           The following program segment will transfer the lower */
-  /*           triangular part of a symmetric band matrix from conventional */
-  /*           full matrix storage to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = 1 - J */
-  /*                    DO 10, I = J, MIN( N, J + K ) */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Unchanged on exit. */
-
-  /*  LDA    - INTEGER. */
-  /*           On entry, LDA specifies the first dimension of A as declared */
-  /*           in the calling (sub) program. LDA must be at least */
-  /*           ( k + 1 ). */
-  /*           Unchanged on exit. */
-
-  /*  X      - REAL             array of DIMENSION at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the */
-  /*           vector x. */
-  /*           Unchanged on exit. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  BETA   - REAL            . */
-  /*           On entry, BETA specifies the scalar beta. */
-  /*           Unchanged on exit. */
-
-  /*  Y      - REAL             array of DIMENSION at least */
-  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-  /*           Before entry, the incremented array Y must contain the */
-  /*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-  /*  INCY   - INTEGER. */
-  /*           On entry, INCY specifies the increment for the elements of */
-  /*           Y. INCY must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  a_dim1 = *lda;
-  a_offset = 1 + a_dim1;
-  a -= a_offset;
-  --x;
-  --y;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (*n < 0) {
-    info = 2;
-  } else if (*k < 0) {
-    info = 3;
-  } else if (*lda < *k + 1) {
-    info = 6;
-  } else if (*incx == 0) {
-    info = 8;
-  } else if (*incy == 0) {
-    info = 11;
-  }
-  if (info != 0) {
-    xerbla_("SSBMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
-    return;
-  }
-
-  /*     Set up the start points in  X  and  Y. */
-
-  if (*incx > 0) {
-    kx = 1;
-  } else {
-    kx = 1 - (*n - 1) * *incx;
-  }
-  if (*incy > 0) {
-    ky = 1;
-  } else {
-    ky = 1 - (*n - 1) * *incy;
-  }
-
-  /*     Start the operations. In this version the elements of the array A */
-  /*     are accessed sequentially with one pass through A. */
-
-  /*     First form  y := beta*y. */
-
-  if (*beta != 1.f) {
-    if (*incy == 1) {
-      if (*beta == 0.f) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[i__] = 0.f;
-          /* L10: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[i__] = *beta * y[i__];
-          /* L20: */
-        }
-      }
-    } else {
-      iy = ky;
-      if (*beta == 0.f) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[iy] = 0.f;
-          iy += *incy;
-          /* L30: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[iy] = *beta * y[iy];
-          iy += *incy;
-          /* L40: */
-        }
-      }
-    }
-  }
-  if (*alpha == 0.f) {
-    return;
-  }
-  if (lsame_(uplo, "U")) {
-    /*        Form  y  when upper triangle of A is stored. */
-
-    kplus1 = *k + 1;
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[j];
-        temp2 = 0.f;
-        l = kplus1 - j;
-        /* Computing MAX */
-        i__2 = 1, i__3 = j - *k;
-        i__4 = j - 1;
-        for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
-          y[i__] += temp1 * a[l + i__ + j * a_dim1];
-          temp2 += a[l + i__ + j * a_dim1] * x[i__];
-          /* L50: */
-        }
-        y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-        /* L60: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[jx];
-        temp2 = 0.f;
-        ix = kx;
-        iy = ky;
-        l = kplus1 - j;
-        /* Computing MAX */
-        i__4 = 1, i__2 = j - *k;
-        i__3 = j - 1;
-        for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
-          y[iy] += temp1 * a[l + i__ + j * a_dim1];
-          temp2 += a[l + i__ + j * a_dim1] * x[ix];
-          ix += *incx;
-          iy += *incy;
-          /* L70: */
-        }
-        y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-        jx += *incx;
-        jy += *incy;
-        if (j > *k) {
-          kx += *incx;
-          ky += *incy;
-        }
-        /* L80: */
-      }
-    }
-  } else {
-    /*        Form  y  when lower triangle of A is stored. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[j];
-        temp2 = 0.f;
-        y[j] += temp1 * a[j * a_dim1 + 1];
-        l = 1 - j;
-        /* Computing MIN */
-        i__4 = *n, i__2 = j + *k;
-        i__3 = min(i__4, i__2);
-        for (i__ = j + 1; i__ <= i__3; ++i__) {
-          y[i__] += temp1 * a[l + i__ + j * a_dim1];
-          temp2 += a[l + i__ + j * a_dim1] * x[i__];
-          /* L90: */
-        }
-        y[j] += *alpha * temp2;
-        /* L100: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[jx];
-        temp2 = 0.f;
-        y[jy] += temp1 * a[j * a_dim1 + 1];
-        l = 1 - j;
-        ix = jx;
-        iy = jy;
-        /* Computing MIN */
-        i__4 = *n, i__2 = j + *k;
-        i__3 = min(i__4, i__2);
-        for (i__ = j + 1; i__ <= i__3; ++i__) {
-          ix += *incx;
-          iy += *incy;
-          y[iy] += temp1 * a[l + i__ + j * a_dim1];
-          temp2 += a[l + i__ + j * a_dim1] * x[ix];
-          /* L110: */
-        }
-        y[jy] += *alpha * temp2;
-        jx += *incx;
-        jy += *incy;
-        /* L120: */
-      }
-    }
-  }
-
-  /*     End of SSBMV . */
-
-} /* ssbmv_ */
--- a/blas/f2c/sspmv.c
+++ b/blas/f2c/sspmv.c
@@ -1,308 +0,0 @@
-/* sspmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void sspmv_(char *uplo, integer *n, real *alpha, real *ap, real *x, integer *incx, real *beta, real *y,
-                             integer *incy) {
-  /* System generated locals */
-  integer i__1, i__2;
-
-  /* Local variables */
-  integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-  real temp1, temp2;
-  extern logical lsame_(char *, char *);
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  SSPMV  performs the matrix-vector operation */
-
-  /*     y := alpha*A*x + beta*y, */
-
-  /*  where alpha and beta are scalars, x and y are n element vectors and */
-  /*  A is an n by n symmetric matrix, supplied in packed form. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the upper or lower */
-  /*           triangular part of the matrix A is supplied in the packed */
-  /*           array AP as follows: */
-
-  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-  /*                                  supplied in AP. */
-
-  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-  /*                                  supplied in AP. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  ALPHA  - REAL            . */
-  /*           On entry, ALPHA specifies the scalar alpha. */
-  /*           Unchanged on exit. */
-
-  /*  AP     - REAL             array of DIMENSION at least */
-  /*           ( ( n*( n + 1 ) )/2 ). */
-  /*           Before entry with UPLO = 'U' or 'u', the array AP must */
-  /*           contain the upper triangular part of the symmetric matrix */
-  /*           packed sequentially, column by column, so that AP( 1 ) */
-  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-  /*           and a( 2, 2 ) respectively, and so on. */
-  /*           Before entry with UPLO = 'L' or 'l', the array AP must */
-  /*           contain the lower triangular part of the symmetric matrix */
-  /*           packed sequentially, column by column, so that AP( 1 ) */
-  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-  /*           and a( 3, 1 ) respectively, and so on. */
-  /*           Unchanged on exit. */
-
-  /*  X      - REAL             array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the n */
-  /*           element vector x. */
-  /*           Unchanged on exit. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  BETA   - REAL            . */
-  /*           On entry, BETA specifies the scalar beta. When BETA is */
-  /*           supplied as zero then Y need not be set on input. */
-  /*           Unchanged on exit. */
-
-  /*  Y      - REAL             array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-  /*           Before entry, the incremented array Y must contain the n */
-  /*           element vector y. On exit, Y is overwritten by the updated */
-  /*           vector y. */
-
-  /*  INCY   - INTEGER. */
-  /*           On entry, INCY specifies the increment for the elements of */
-  /*           Y. INCY must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  --y;
-  --x;
-  --ap;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (*n < 0) {
-    info = 2;
-  } else if (*incx == 0) {
-    info = 6;
-  } else if (*incy == 0) {
-    info = 9;
-  }
-  if (info != 0) {
-    xerbla_("SSPMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
-    return;
-  }
-
-  /*     Set up the start points in  X  and  Y. */
-
-  if (*incx > 0) {
-    kx = 1;
-  } else {
-    kx = 1 - (*n - 1) * *incx;
-  }
-  if (*incy > 0) {
-    ky = 1;
-  } else {
-    ky = 1 - (*n - 1) * *incy;
-  }
-
-  /*     Start the operations. In this version the elements of the array AP */
-  /*     are accessed sequentially with one pass through AP. */
-
-  /*     First form  y := beta*y. */
-
-  if (*beta != 1.f) {
-    if (*incy == 1) {
-      if (*beta == 0.f) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[i__] = 0.f;
-          /* L10: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[i__] = *beta * y[i__];
-          /* L20: */
-        }
-      }
-    } else {
-      iy = ky;
-      if (*beta == 0.f) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[iy] = 0.f;
-          iy += *incy;
-          /* L30: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          y[iy] = *beta * y[iy];
-          iy += *incy;
-          /* L40: */
-        }
-      }
-    }
-  }
-  if (*alpha == 0.f) {
-    return;
-  }
-  kk = 1;
-  if (lsame_(uplo, "U")) {
-    /*        Form  y  when AP contains the upper triangle. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[j];
-        temp2 = 0.f;
-        k = kk;
-        i__2 = j - 1;
-        for (i__ = 1; i__ <= i__2; ++i__) {
-          y[i__] += temp1 * ap[k];
-          temp2 += ap[k] * x[i__];
-          ++k;
-          /* L50: */
-        }
-        y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-        kk += j;
-        /* L60: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[jx];
-        temp2 = 0.f;
-        ix = kx;
-        iy = ky;
-        i__2 = kk + j - 2;
-        for (k = kk; k <= i__2; ++k) {
-          y[iy] += temp1 * ap[k];
-          temp2 += ap[k] * x[ix];
-          ix += *incx;
-          iy += *incy;
-          /* L70: */
-        }
-        y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-        jx += *incx;
-        jy += *incy;
-        kk += j;
-        /* L80: */
-      }
-    }
-  } else {
-    /*        Form  y  when AP contains the lower triangle. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[j];
-        temp2 = 0.f;
-        y[j] += temp1 * ap[kk];
-        k = kk + 1;
-        i__2 = *n;
-        for (i__ = j + 1; i__ <= i__2; ++i__) {
-          y[i__] += temp1 * ap[k];
-          temp2 += ap[k] * x[i__];
-          ++k;
-          /* L90: */
-        }
-        y[j] += *alpha * temp2;
-        kk += *n - j + 1;
-        /* L100: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        temp1 = *alpha * x[jx];
-        temp2 = 0.f;
-        y[jy] += temp1 * ap[kk];
-        ix = jx;
-        iy = jy;
-        i__2 = kk + *n - j;
-        for (k = kk + 1; k <= i__2; ++k) {
-          ix += *incx;
-          iy += *incy;
-          y[iy] += temp1 * ap[k];
-          temp2 += ap[k] * x[ix];
-          /* L110: */
-        }
-        y[jy] += *alpha * temp2;
-        jx += *incx;
-        jy += *incy;
-        kk += *n - j + 1;
-        /* L120: */
-      }
-    }
-  }
-
-  /*     End of SSPMV . */
-
-} /* sspmv_ */
--- a/blas/f2c/stbmv.c
+++ b/blas/f2c/stbmv.c
@@ -1,417 +0,0 @@
-/* stbmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-/* Subroutine */ void stbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda,
-                             real *x, integer *incx) {
-  /* System generated locals */
-  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-  /* Local variables */
-  integer i__, j, l, ix, jx, kx, info;
-  real temp;
-  extern logical lsame_(char *, char *);
-  integer kplus1;
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-  logical nounit;
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  STBMV  performs one of the matrix-vector operations */
-
-  /*     x := A*x,   or   x := A'*x, */
-
-  /*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-  /*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the matrix is an upper or */
-  /*           lower triangular matrix as follows: */
-
-  /*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-  /*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-  /*           Unchanged on exit. */
-
-  /*  TRANS  - CHARACTER*1. */
-  /*           On entry, TRANS specifies the operation to be performed as */
-  /*           follows: */
-
-  /*              TRANS = 'N' or 'n'   x := A*x. */
-
-  /*              TRANS = 'T' or 't'   x := A'*x. */
-
-  /*              TRANS = 'C' or 'c'   x := A'*x. */
-
-  /*           Unchanged on exit. */
-
-  /*  DIAG   - CHARACTER*1. */
-  /*           On entry, DIAG specifies whether or not A is unit */
-  /*           triangular as follows: */
-
-  /*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-  /*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-  /*                                  triangular. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  K      - INTEGER. */
-  /*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-  /*           super-diagonals of the matrix A. */
-  /*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-  /*           sub-diagonals of the matrix A. */
-  /*           K must satisfy  0 .le. K. */
-  /*           Unchanged on exit. */
-
-  /*  A      - REAL             array of DIMENSION ( LDA, n ). */
-  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the upper triangular */
-  /*           band part of the matrix of coefficients, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row */
-  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
-  /*           position 2 in row k, and so on. The top left k by k triangle */
-  /*           of the array A is not referenced. */
-  /*           The following program segment will transfer an upper */
-  /*           triangular band matrix from conventional full matrix storage */
-  /*           to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = K + 1 - J */
-  /*                    DO 10, I = MAX( 1, J - K ), J */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the lower triangular */
-  /*           band part of the matrix of coefficients, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row 1 of */
-  /*           the array, the first sub-diagonal starting at position 1 in */
-  /*           row 2, and so on. The bottom right k by k triangle of the */
-  /*           array A is not referenced. */
-  /*           The following program segment will transfer a lower */
-  /*           triangular band matrix from conventional full matrix storage */
-  /*           to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = 1 - J */
-  /*                    DO 10, I = J, MIN( N, J + K ) */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-  /*           corresponding to the diagonal elements of the matrix are not */
-  /*           referenced, but are assumed to be unity. */
-  /*           Unchanged on exit. */
-
-  /*  LDA    - INTEGER. */
-  /*           On entry, LDA specifies the first dimension of A as declared */
-  /*           in the calling (sub) program. LDA must be at least */
-  /*           ( k + 1 ). */
-  /*           Unchanged on exit. */
-
-  /*  X      - REAL             array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the n */
-  /*           element vector x. On exit, X is overwritten with the */
-  /*           transformed vector x. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  a_dim1 = *lda;
-  a_offset = 1 + a_dim1;
-  a -= a_offset;
-  --x;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) {
-    info = 2;
-  } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) {
-    info = 3;
-  } else if (*n < 0) {
-    info = 4;
-  } else if (*k < 0) {
-    info = 5;
-  } else if (*lda < *k + 1) {
-    info = 7;
-  } else if (*incx == 0) {
-    info = 9;
-  }
-  if (info != 0) {
-    xerbla_("STBMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0) {
-    return;
-  }
-
-  nounit = lsame_(diag, "N");
-
-  /*     Set up the start point in X if the increment is not unity. This */
-  /*     will be  ( N - 1 )*INCX   too small for descending loops. */
-
-  if (*incx <= 0) {
-    kx = 1 - (*n - 1) * *incx;
-  } else if (*incx != 1) {
-    kx = 1;
-  }
-
-  /*     Start the operations. In this version the elements of A are */
-  /*     accessed sequentially with one pass through A. */
-
-  if (lsame_(trans, "N")) {
-    /*         Form  x := A*x. */
-
-    if (lsame_(uplo, "U")) {
-      kplus1 = *k + 1;
-      if (*incx == 1) {
-        i__1 = *n;
-        for (j = 1; j <= i__1; ++j) {
-          if (x[j] != 0.f) {
-            temp = x[j];
-            l = kplus1 - j;
-            /* Computing MAX */
-            i__2 = 1, i__3 = j - *k;
-            i__4 = j - 1;
-            for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
-              x[i__] += temp * a[l + i__ + j * a_dim1];
-              /* L10: */
-            }
-            if (nounit) {
-              x[j] *= a[kplus1 + j * a_dim1];
-            }
-          }
-          /* L20: */
-        }
-      } else {
-        jx = kx;
-        i__1 = *n;
-        for (j = 1; j <= i__1; ++j) {
-          if (x[jx] != 0.f) {
-            temp = x[jx];
-            ix = kx;
-            l = kplus1 - j;
-            /* Computing MAX */
-            i__4 = 1, i__2 = j - *k;
-            i__3 = j - 1;
-            for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
-              x[ix] += temp * a[l + i__ + j * a_dim1];
-              ix += *incx;
-              /* L30: */
-            }
-            if (nounit) {
-              x[jx] *= a[kplus1 + j * a_dim1];
-            }
-          }
-          jx += *incx;
-          if (j > *k) {
-            kx += *incx;
-          }
-          /* L40: */
-        }
-      }
-    } else {
-      if (*incx == 1) {
-        for (j = *n; j >= 1; --j) {
-          if (x[j] != 0.f) {
-            temp = x[j];
-            l = 1 - j;
-            /* Computing MIN */
-            i__1 = *n, i__3 = j + *k;
-            i__4 = j + 1;
-            for (i__ = min(i__1, i__3); i__ >= i__4; --i__) {
-              x[i__] += temp * a[l + i__ + j * a_dim1];
-              /* L50: */
-            }
-            if (nounit) {
-              x[j] *= a[j * a_dim1 + 1];
-            }
-          }
-          /* L60: */
-        }
-      } else {
-        kx += (*n - 1) * *incx;
-        jx = kx;
-        for (j = *n; j >= 1; --j) {
-          if (x[jx] != 0.f) {
-            temp = x[jx];
-            ix = kx;
-            l = 1 - j;
-            /* Computing MIN */
-            i__4 = *n, i__1 = j + *k;
-            i__3 = j + 1;
-            for (i__ = min(i__4, i__1); i__ >= i__3; --i__) {
-              x[ix] += temp * a[l + i__ + j * a_dim1];
-              ix -= *incx;
-              /* L70: */
-            }
-            if (nounit) {
-              x[jx] *= a[j * a_dim1 + 1];
-            }
-          }
-          jx -= *incx;
-          if (*n - j >= *k) {
-            kx -= *incx;
-          }
-          /* L80: */
-        }
-      }
-    }
-  } else {
-    /*        Form  x := A'*x. */
-
-    if (lsame_(uplo, "U")) {
-      kplus1 = *k + 1;
-      if (*incx == 1) {
-        for (j = *n; j >= 1; --j) {
-          temp = x[j];
-          l = kplus1 - j;
-          if (nounit) {
-            temp *= a[kplus1 + j * a_dim1];
-          }
-          /* Computing MAX */
-          i__4 = 1, i__1 = j - *k;
-          i__3 = max(i__4, i__1);
-          for (i__ = j - 1; i__ >= i__3; --i__) {
-            temp += a[l + i__ + j * a_dim1] * x[i__];
-            /* L90: */
-          }
-          x[j] = temp;
-          /* L100: */
-        }
-      } else {
-        kx += (*n - 1) * *incx;
-        jx = kx;
-        for (j = *n; j >= 1; --j) {
-          temp = x[jx];
-          kx -= *incx;
-          ix = kx;
-          l = kplus1 - j;
-          if (nounit) {
-            temp *= a[kplus1 + j * a_dim1];
-          }
-          /* Computing MAX */
-          i__4 = 1, i__1 = j - *k;
-          i__3 = max(i__4, i__1);
-          for (i__ = j - 1; i__ >= i__3; --i__) {
-            temp += a[l + i__ + j * a_dim1] * x[ix];
-            ix -= *incx;
-            /* L110: */
-          }
-          x[jx] = temp;
-          jx -= *incx;
-          /* L120: */
-        }
-      }
-    } else {
-      if (*incx == 1) {
-        i__3 = *n;
-        for (j = 1; j <= i__3; ++j) {
-          temp = x[j];
-          l = 1 - j;
-          if (nounit) {
-            temp *= a[j * a_dim1 + 1];
-          }
-          /* Computing MIN */
-          i__1 = *n, i__2 = j + *k;
-          i__4 = min(i__1, i__2);
-          for (i__ = j + 1; i__ <= i__4; ++i__) {
-            temp += a[l + i__ + j * a_dim1] * x[i__];
-            /* L130: */
-          }
-          x[j] = temp;
-          /* L140: */
-        }
-      } else {
-        jx = kx;
-        i__3 = *n;
-        for (j = 1; j <= i__3; ++j) {
-          temp = x[jx];
-          kx += *incx;
-          ix = kx;
-          l = 1 - j;
-          if (nounit) {
-            temp *= a[j * a_dim1 + 1];
-          }
-          /* Computing MIN */
-          i__1 = *n, i__2 = j + *k;
-          i__4 = min(i__1, i__2);
-          for (i__ = j + 1; i__ <= i__4; ++i__) {
-            temp += a[l + i__ + j * a_dim1] * x[ix];
-            ix += *incx;
-            /* L150: */
-          }
-          x[jx] = temp;
-          jx += *incx;
-          /* L160: */
-        }
-      }
-    }
-  }
-
-  /*     End of STBMV . */
-
-} /* stbmv_ */
--- a/blas/f2c/zhbmv.c
+++ b/blas/f2c/zhbmv.c
@@ -1,456 +0,0 @@
-/* zhbmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-static inline void d_cnjg(doublecomplex *r, doublecomplex *z) {
-  r->r = z->r;
-  r->i = -(z->i);
-}
-
-/* Subroutine */ void zhbmv_(char *uplo, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda,
-                             doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy) {
-  /* System generated locals */
-  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-  doublereal d__1;
-  doublecomplex z__1, z__2, z__3, z__4;
-
-  /* Local variables */
-  integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-  doublecomplex temp1, temp2;
-  extern logical lsame_(char *, char *);
-  integer kplus1;
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  ZHBMV  performs the matrix-vector  operation */
-
-  /*     y := alpha*A*x + beta*y, */
-
-  /*  where alpha and beta are scalars, x and y are n element vectors and */
-  /*  A is an n by n hermitian band matrix, with k super-diagonals. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the upper or lower */
-  /*           triangular part of the band matrix A is being supplied as */
-  /*           follows: */
-
-  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-  /*                                  being supplied. */
-
-  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-  /*                                  being supplied. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  K      - INTEGER. */
-  /*           On entry, K specifies the number of super-diagonals of the */
-  /*           matrix A. K must satisfy  0 .le. K. */
-  /*           Unchanged on exit. */
-
-  /*  ALPHA  - COMPLEX*16      . */
-  /*           On entry, ALPHA specifies the scalar alpha. */
-  /*           Unchanged on exit. */
-
-  /*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the upper triangular */
-  /*           band part of the hermitian matrix, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row */
-  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
-  /*           position 2 in row k, and so on. The top left k by k triangle */
-  /*           of the array A is not referenced. */
-  /*           The following program segment will transfer the upper */
-  /*           triangular part of a hermitian band matrix from conventional */
-  /*           full matrix storage to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = K + 1 - J */
-  /*                    DO 10, I = MAX( 1, J - K ), J */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the lower triangular */
-  /*           band part of the hermitian matrix, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row 1 of */
-  /*           the array, the first sub-diagonal starting at position 1 in */
-  /*           row 2, and so on. The bottom right k by k triangle of the */
-  /*           array A is not referenced. */
-  /*           The following program segment will transfer the lower */
-  /*           triangular part of a hermitian band matrix from conventional */
-  /*           full matrix storage to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = 1 - J */
-  /*                    DO 10, I = J, MIN( N, J + K ) */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Note that the imaginary parts of the diagonal elements need */
-  /*           not be set and are assumed to be zero. */
-  /*           Unchanged on exit. */
-
-  /*  LDA    - INTEGER. */
-  /*           On entry, LDA specifies the first dimension of A as declared */
-  /*           in the calling (sub) program. LDA must be at least */
-  /*           ( k + 1 ). */
-  /*           Unchanged on exit. */
-
-  /*  X      - COMPLEX*16       array of DIMENSION at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the */
-  /*           vector x. */
-  /*           Unchanged on exit. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  BETA   - COMPLEX*16      . */
-  /*           On entry, BETA specifies the scalar beta. */
-  /*           Unchanged on exit. */
-
-  /*  Y      - COMPLEX*16       array of DIMENSION at least */
-  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-  /*           Before entry, the incremented array Y must contain the */
-  /*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-  /*  INCY   - INTEGER. */
-  /*           On entry, INCY specifies the increment for the elements of */
-  /*           Y. INCY must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  a_dim1 = *lda;
-  a_offset = 1 + a_dim1;
-  a -= a_offset;
-  --x;
-  --y;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (*n < 0) {
-    info = 2;
-  } else if (*k < 0) {
-    info = 3;
-  } else if (*lda < *k + 1) {
-    info = 6;
-  } else if (*incx == 0) {
-    info = 8;
-  } else if (*incy == 0) {
-    info = 11;
-  }
-  if (info != 0) {
-    xerbla_("ZHBMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.))) {
-    return;
-  }
-
-  /*     Set up the start points in  X  and  Y. */
-
-  if (*incx > 0) {
-    kx = 1;
-  } else {
-    kx = 1 - (*n - 1) * *incx;
-  }
-  if (*incy > 0) {
-    ky = 1;
-  } else {
-    ky = 1 - (*n - 1) * *incy;
-  }
-
-  /*     Start the operations. In this version the elements of the array A */
-  /*     are accessed sequentially with one pass through A. */
-
-  /*     First form  y := beta*y. */
-
-  if (beta->r != 1. || beta->i != 0.) {
-    if (*incy == 1) {
-      if (beta->r == 0. && beta->i == 0.) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = i__;
-          y[i__2].r = 0., y[i__2].i = 0.;
-          /* L10: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = i__;
-          i__3 = i__;
-          z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
-          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-          /* L20: */
-        }
-      }
-    } else {
-      iy = ky;
-      if (beta->r == 0. && beta->i == 0.) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = iy;
-          y[i__2].r = 0., y[i__2].i = 0.;
-          iy += *incy;
-          /* L30: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = iy;
-          i__3 = iy;
-          z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
-          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-          iy += *incy;
-          /* L40: */
-        }
-      }
-    }
-  }
-  if (alpha->r == 0. && alpha->i == 0.) {
-    return;
-  }
-  if (lsame_(uplo, "U")) {
-    /*        Form  y  when upper triangle of A is stored. */
-
-    kplus1 = *k + 1;
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = j;
-        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = z__1.r, temp1.i = z__1.i;
-        temp2.r = 0., temp2.i = 0.;
-        l = kplus1 - j;
-        /* Computing MAX */
-        i__2 = 1, i__3 = j - *k;
-        i__4 = j - 1;
-        for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
-          i__2 = i__;
-          i__3 = i__;
-          i__5 = l + i__ + j * a_dim1;
-          z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
-          z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-          d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-          i__2 = i__;
-          z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r;
-          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-          temp2.r = z__1.r, temp2.i = z__1.i;
-          /* L50: */
-        }
-        i__4 = j;
-        i__2 = j;
-        i__3 = kplus1 + j * a_dim1;
-        d__1 = a[i__3].r;
-        z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-        z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i;
-        z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-        y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-        /* L60: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__4 = jx;
-        z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4].r;
-        temp1.r = z__1.r, temp1.i = z__1.i;
-        temp2.r = 0., temp2.i = 0.;
-        ix = kx;
-        iy = ky;
-        l = kplus1 - j;
-        /* Computing MAX */
-        i__4 = 1, i__2 = j - *k;
-        i__3 = j - 1;
-        for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
-          i__4 = iy;
-          i__2 = iy;
-          i__5 = l + i__ + j * a_dim1;
-          z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
-          z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-          y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-          d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-          i__4 = ix;
-          z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-          temp2.r = z__1.r, temp2.i = z__1.i;
-          ix += *incx;
-          iy += *incy;
-          /* L70: */
-        }
-        i__3 = jy;
-        i__4 = jy;
-        i__2 = kplus1 + j * a_dim1;
-        d__1 = a[i__2].r;
-        z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-        z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i;
-        z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-        jx += *incx;
-        jy += *incy;
-        if (j > *k) {
-          kx += *incx;
-          ky += *incy;
-        }
-        /* L80: */
-      }
-    }
-  } else {
-    /*        Form  y  when lower triangle of A is stored. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__3 = j;
-        z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-        temp1.r = z__1.r, temp1.i = z__1.i;
-        temp2.r = 0., temp2.i = 0.;
-        i__3 = j;
-        i__4 = j;
-        i__2 = j * a_dim1 + 1;
-        d__1 = a[i__2].r;
-        z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-        z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-        l = 1 - j;
-        /* Computing MIN */
-        i__4 = *n, i__2 = j + *k;
-        i__3 = min(i__4, i__2);
-        for (i__ = j + 1; i__ <= i__3; ++i__) {
-          i__4 = i__;
-          i__2 = i__;
-          i__5 = l + i__ + j * a_dim1;
-          z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
-          z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-          y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-          d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-          i__4 = i__;
-          z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-          temp2.r = z__1.r, temp2.i = z__1.i;
-          /* L90: */
-        }
-        i__3 = j;
-        i__4 = j;
-        z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-        /* L100: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__3 = jx;
-        z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-        temp1.r = z__1.r, temp1.i = z__1.i;
-        temp2.r = 0., temp2.i = 0.;
-        i__3 = jy;
-        i__4 = jy;
-        i__2 = j * a_dim1 + 1;
-        d__1 = a[i__2].r;
-        z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-        z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-        l = 1 - j;
-        ix = jx;
-        iy = jy;
-        /* Computing MIN */
-        i__4 = *n, i__2 = j + *k;
-        i__3 = min(i__4, i__2);
-        for (i__ = j + 1; i__ <= i__3; ++i__) {
-          ix += *incx;
-          iy += *incy;
-          i__4 = iy;
-          i__2 = iy;
-          i__5 = l + i__ + j * a_dim1;
-          z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r;
-          z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-          y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-          d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-          i__4 = ix;
-          z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-          temp2.r = z__1.r, temp2.i = z__1.i;
-          /* L110: */
-        }
-        i__3 = jy;
-        i__4 = jy;
-        z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-        y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-        jx += *incx;
-        jy += *incy;
-        /* L120: */
-      }
-    }
-  }
-
-  /*     End of ZHBMV . */
-
-} /* zhbmv_ */
--- a/blas/f2c/zhpmv.c
+++ b/blas/f2c/zhpmv.c
@@ -1,407 +0,0 @@
-/* zhpmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-static inline void d_cnjg(doublecomplex *r, doublecomplex *z) {
-  r->r = z->r;
-  r->i = -(z->i);
-}
-
-/* Subroutine */ void zhpmv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x,
-                             integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy) {
-  /* System generated locals */
-  integer i__1, i__2, i__3, i__4, i__5;
-  doublereal d__1;
-  doublecomplex z__1, z__2, z__3, z__4;
-
-  /* Local variables */
-  integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-  doublecomplex temp1, temp2;
-  extern logical lsame_(char *, char *);
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  ZHPMV  performs the matrix-vector operation */
-
-  /*     y := alpha*A*x + beta*y, */
-
-  /*  where alpha and beta are scalars, x and y are n element vectors and */
-  /*  A is an n by n hermitian matrix, supplied in packed form. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the upper or lower */
-  /*           triangular part of the matrix A is supplied in the packed */
-  /*           array AP as follows: */
-
-  /*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-  /*                                  supplied in AP. */
-
-  /*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-  /*                                  supplied in AP. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  ALPHA  - COMPLEX*16      . */
-  /*           On entry, ALPHA specifies the scalar alpha. */
-  /*           Unchanged on exit. */
-
-  /*  AP     - COMPLEX*16       array of DIMENSION at least */
-  /*           ( ( n*( n + 1 ) )/2 ). */
-  /*           Before entry with UPLO = 'U' or 'u', the array AP must */
-  /*           contain the upper triangular part of the hermitian matrix */
-  /*           packed sequentially, column by column, so that AP( 1 ) */
-  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-  /*           and a( 2, 2 ) respectively, and so on. */
-  /*           Before entry with UPLO = 'L' or 'l', the array AP must */
-  /*           contain the lower triangular part of the hermitian matrix */
-  /*           packed sequentially, column by column, so that AP( 1 ) */
-  /*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-  /*           and a( 3, 1 ) respectively, and so on. */
-  /*           Note that the imaginary parts of the diagonal elements need */
-  /*           not be set and are assumed to be zero. */
-  /*           Unchanged on exit. */
-
-  /*  X      - COMPLEX*16       array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the n */
-  /*           element vector x. */
-  /*           Unchanged on exit. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  BETA   - COMPLEX*16      . */
-  /*           On entry, BETA specifies the scalar beta. When BETA is */
-  /*           supplied as zero then Y need not be set on input. */
-  /*           Unchanged on exit. */
-
-  /*  Y      - COMPLEX*16       array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-  /*           Before entry, the incremented array Y must contain the n */
-  /*           element vector y. On exit, Y is overwritten by the updated */
-  /*           vector y. */
-
-  /*  INCY   - INTEGER. */
-  /*           On entry, INCY specifies the increment for the elements of */
-  /*           Y. INCY must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  --y;
-  --x;
-  --ap;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (*n < 0) {
-    info = 2;
-  } else if (*incx == 0) {
-    info = 6;
-  } else if (*incy == 0) {
-    info = 9;
-  }
-  if (info != 0) {
-    xerbla_("ZHPMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.))) {
-    return;
-  }
-
-  /*     Set up the start points in  X  and  Y. */
-
-  if (*incx > 0) {
-    kx = 1;
-  } else {
-    kx = 1 - (*n - 1) * *incx;
-  }
-  if (*incy > 0) {
-    ky = 1;
-  } else {
-    ky = 1 - (*n - 1) * *incy;
-  }
-
-  /*     Start the operations. In this version the elements of the array AP */
-  /*     are accessed sequentially with one pass through AP. */
-
-  /*     First form  y := beta*y. */
-
-  if (beta->r != 1. || beta->i != 0.) {
-    if (*incy == 1) {
-      if (beta->r == 0. && beta->i == 0.) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = i__;
-          y[i__2].r = 0., y[i__2].i = 0.;
-          /* L10: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = i__;
-          i__3 = i__;
-          z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
-          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-          /* L20: */
-        }
-      }
-    } else {
-      iy = ky;
-      if (beta->r == 0. && beta->i == 0.) {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = iy;
-          y[i__2].r = 0., y[i__2].i = 0.;
-          iy += *incy;
-          /* L30: */
-        }
-      } else {
-        i__1 = *n;
-        for (i__ = 1; i__ <= i__1; ++i__) {
-          i__2 = iy;
-          i__3 = iy;
-          z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r;
-          y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-          iy += *incy;
-          /* L40: */
-        }
-      }
-    }
-  }
-  if (alpha->r == 0. && alpha->i == 0.) {
-    return;
-  }
-  kk = 1;
-  if (lsame_(uplo, "U")) {
-    /*        Form  y  when AP contains the upper triangle. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = j;
-        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = z__1.r, temp1.i = z__1.i;
-        temp2.r = 0., temp2.i = 0.;
-        k = kk;
-        i__2 = j - 1;
-        for (i__ = 1; i__ <= i__2; ++i__) {
-          i__3 = i__;
-          i__4 = i__;
-          i__5 = k;
-          z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
-          z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-          y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-          d_cnjg(&z__3, &ap[k]);
-          i__3 = i__;
-          z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-          temp2.r = z__1.r, temp2.i = z__1.i;
-          ++k;
-          /* L50: */
-        }
-        i__2 = j;
-        i__3 = j;
-        i__4 = kk + j - 1;
-        d__1 = ap[i__4].r;
-        z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-        z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-        z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-        kk += j;
-        /* L60: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = jx;
-        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = z__1.r, temp1.i = z__1.i;
-        temp2.r = 0., temp2.i = 0.;
-        ix = kx;
-        iy = ky;
-        i__2 = kk + j - 2;
-        for (k = kk; k <= i__2; ++k) {
-          i__3 = iy;
-          i__4 = iy;
-          i__5 = k;
-          z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
-          z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-          y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-          d_cnjg(&z__3, &ap[k]);
-          i__3 = ix;
-          z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-          temp2.r = z__1.r, temp2.i = z__1.i;
-          ix += *incx;
-          iy += *incy;
-          /* L70: */
-        }
-        i__2 = jy;
-        i__3 = jy;
-        i__4 = kk + j - 1;
-        d__1 = ap[i__4].r;
-        z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-        z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-        z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-        jx += *incx;
-        jy += *incy;
-        kk += j;
-        /* L80: */
-      }
-    }
-  } else {
-    /*        Form  y  when AP contains the lower triangle. */
-
-    if (*incx == 1 && *incy == 1) {
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = j;
-        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = z__1.r, temp1.i = z__1.i;
-        temp2.r = 0., temp2.i = 0.;
-        i__2 = j;
-        i__3 = j;
-        i__4 = kk;
-        d__1 = ap[i__4].r;
-        z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-        z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-        k = kk + 1;
-        i__2 = *n;
-        for (i__ = j + 1; i__ <= i__2; ++i__) {
-          i__3 = i__;
-          i__4 = i__;
-          i__5 = k;
-          z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
-          z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-          y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-          d_cnjg(&z__3, &ap[k]);
-          i__3 = i__;
-          z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-          temp2.r = z__1.r, temp2.i = z__1.i;
-          ++k;
-          /* L90: */
-        }
-        i__2 = j;
-        i__3 = j;
-        z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-        kk += *n - j + 1;
-        /* L100: */
-      }
-    } else {
-      jx = kx;
-      jy = ky;
-      i__1 = *n;
-      for (j = 1; j <= i__1; ++j) {
-        i__2 = jx;
-        z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-        temp1.r = z__1.r, temp1.i = z__1.i;
-        temp2.r = 0., temp2.i = 0.;
-        i__2 = jy;
-        i__3 = jy;
-        i__4 = kk;
-        d__1 = ap[i__4].r;
-        z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-        z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-        ix = jx;
-        iy = jy;
-        i__2 = kk + *n - j;
-        for (k = kk + 1; k <= i__2; ++k) {
-          ix += *incx;
-          iy += *incy;
-          i__3 = iy;
-          i__4 = iy;
-          i__5 = k;
-          z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r;
-          z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-          y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-          d_cnjg(&z__3, &ap[k]);
-          i__3 = ix;
-          z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-          z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-          temp2.r = z__1.r, temp2.i = z__1.i;
-          /* L110: */
-        }
-        i__2 = jy;
-        i__3 = jy;
-        z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r;
-        z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-        y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-        jx += *incx;
-        jy += *incy;
-        kk += *n - j + 1;
-        /* L120: */
-      }
-    }
-  }
-
-  /*     End of ZHPMV . */
-
-} /* zhpmv_ */
--- a/blas/f2c/ztbmv.c
+++ b/blas/f2c/ztbmv.c
@@ -1,586 +0,0 @@
-/* ztbmv.f -- translated by f2c (version 20100827).
-   You must link the resulting object file with libf2c:
-        on Microsoft Windows system, link with libf2c.lib;
-        on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-        or, if you install libf2c.a in a standard place, with -lf2c -lm
-        -- in that order, at the end of the command line, as in
-                cc *.o -lf2c -lm
-        Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-                http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "datatypes.h"
-
-static inline void d_cnjg(doublecomplex *r, doublecomplex *z) {
-  r->r = z->r;
-  r->i = -(z->i);
-}
-
-/* Subroutine */ void ztbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublecomplex *a,
-                             integer *lda, doublecomplex *x, integer *incx) {
-  /* System generated locals */
-  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-  doublecomplex z__1, z__2, z__3;
-
-  /* Local variables */
-  integer i__, j, l, ix, jx, kx, info;
-  doublecomplex temp;
-  extern logical lsame_(char *, char *);
-  integer kplus1;
-  extern /* Subroutine */ void xerbla_(const char *, integer *);
-  logical noconj, nounit;
-
-  /*     .. Scalar Arguments .. */
-  /*     .. */
-  /*     .. Array Arguments .. */
-  /*     .. */
-
-  /*  Purpose */
-  /*  ======= */
-
-  /*  ZTBMV  performs one of the matrix-vector operations */
-
-  /*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-
-  /*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-  /*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-
-  /*  Arguments */
-  /*  ========== */
-
-  /*  UPLO   - CHARACTER*1. */
-  /*           On entry, UPLO specifies whether the matrix is an upper or */
-  /*           lower triangular matrix as follows: */
-
-  /*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-  /*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-  /*           Unchanged on exit. */
-
-  /*  TRANS  - CHARACTER*1. */
-  /*           On entry, TRANS specifies the operation to be performed as */
-  /*           follows: */
-
-  /*              TRANS = 'N' or 'n'   x := A*x. */
-
-  /*              TRANS = 'T' or 't'   x := A'*x. */
-
-  /*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-
-  /*           Unchanged on exit. */
-
-  /*  DIAG   - CHARACTER*1. */
-  /*           On entry, DIAG specifies whether or not A is unit */
-  /*           triangular as follows: */
-
-  /*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-  /*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-  /*                                  triangular. */
-
-  /*           Unchanged on exit. */
-
-  /*  N      - INTEGER. */
-  /*           On entry, N specifies the order of the matrix A. */
-  /*           N must be at least zero. */
-  /*           Unchanged on exit. */
-
-  /*  K      - INTEGER. */
-  /*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-  /*           super-diagonals of the matrix A. */
-  /*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-  /*           sub-diagonals of the matrix A. */
-  /*           K must satisfy  0 .le. K. */
-  /*           Unchanged on exit. */
-
-  /*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-  /*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the upper triangular */
-  /*           band part of the matrix of coefficients, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row */
-  /*           ( k + 1 ) of the array, the first super-diagonal starting at */
-  /*           position 2 in row k, and so on. The top left k by k triangle */
-  /*           of the array A is not referenced. */
-  /*           The following program segment will transfer an upper */
-  /*           triangular band matrix from conventional full matrix storage */
-  /*           to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = K + 1 - J */
-  /*                    DO 10, I = MAX( 1, J - K ), J */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-  /*           by n part of the array A must contain the lower triangular */
-  /*           band part of the matrix of coefficients, supplied column by */
-  /*           column, with the leading diagonal of the matrix in row 1 of */
-  /*           the array, the first sub-diagonal starting at position 1 in */
-  /*           row 2, and so on. The bottom right k by k triangle of the */
-  /*           array A is not referenced. */
-  /*           The following program segment will transfer a lower */
-  /*           triangular band matrix from conventional full matrix storage */
-  /*           to band storage: */
-
-  /*                 DO 20, J = 1, N */
-  /*                    M = 1 - J */
-  /*                    DO 10, I = J, MIN( N, J + K ) */
-  /*                       A( M + I, J ) = matrix( I, J ) */
-  /*              10    CONTINUE */
-  /*              20 CONTINUE */
-
-  /*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-  /*           corresponding to the diagonal elements of the matrix are not */
-  /*           referenced, but are assumed to be unity. */
-  /*           Unchanged on exit. */
-
-  /*  LDA    - INTEGER. */
-  /*           On entry, LDA specifies the first dimension of A as declared */
-  /*           in the calling (sub) program. LDA must be at least */
-  /*           ( k + 1 ). */
-  /*           Unchanged on exit. */
-
-  /*  X      - COMPLEX*16       array of dimension at least */
-  /*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-  /*           Before entry, the incremented array X must contain the n */
-  /*           element vector x. On exit, X is overwritten with the */
-  /*           transformed vector x. */
-
-  /*  INCX   - INTEGER. */
-  /*           On entry, INCX specifies the increment for the elements of */
-  /*           X. INCX must not be zero. */
-  /*           Unchanged on exit. */
-
-  /*  Further Details */
-  /*  =============== */
-
-  /*  Level 2 Blas routine. */
-
-  /*  -- Written on 22-October-1986. */
-  /*     Jack Dongarra, Argonne National Lab. */
-  /*     Jeremy Du Croz, Nag Central Office. */
-  /*     Sven Hammarling, Nag Central Office. */
-  /*     Richard Hanson, Sandia National Labs. */
-
-  /*  ===================================================================== */
-
-  /*     .. Parameters .. */
-  /*     .. */
-  /*     .. Local Scalars .. */
-  /*     .. */
-  /*     .. External Functions .. */
-  /*     .. */
-  /*     .. External Subroutines .. */
-  /*     .. */
-  /*     .. Intrinsic Functions .. */
-  /*     .. */
-
-  /*     Test the input parameters. */
-
-  /* Parameter adjustments */
-  a_dim1 = *lda;
-  a_offset = 1 + a_dim1;
-  a -= a_offset;
-  --x;
-
-  /* Function Body */
-  info = 0;
-  if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) {
-    info = 1;
-  } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) {
-    info = 2;
-  } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) {
-    info = 3;
-  } else if (*n < 0) {
-    info = 4;
-  } else if (*k < 0) {
-    info = 5;
-  } else if (*lda < *k + 1) {
-    info = 7;
-  } else if (*incx == 0) {
-    info = 9;
-  }
-  if (info != 0) {
-    xerbla_("ZTBMV ", &info);
-    return;
-  }
-
-  /*     Quick return if possible. */
-
-  if (*n == 0) {
-    return;
-  }
-
-  noconj = lsame_(trans, "T");
-  nounit = lsame_(diag, "N");
-
-  /*     Set up the start point in X if the increment is not unity. This */
-  /*     will be  ( N - 1 )*INCX   too small for descending loops. */
-
-  if (*incx <= 0) {
-    kx = 1 - (*n - 1) * *incx;
-  } else if (*incx != 1) {
-    kx = 1;
-  }
-
-  /*     Start the operations. In this version the elements of A are */
-  /*     accessed sequentially with one pass through A. */
-
-  if (lsame_(trans, "N")) {
-    /*         Form  x := A*x. */
-
-    if (lsame_(uplo, "U")) {
-      kplus1 = *k + 1;
-      if (*incx == 1) {
-        i__1 = *n;
-        for (j = 1; j <= i__1; ++j) {
-          i__2 = j;
-          if (x[i__2].r != 0. || x[i__2].i != 0.) {
-            i__2 = j;
-            temp.r = x[i__2].r, temp.i = x[i__2].i;
-            l = kplus1 - j;
-            /* Computing MAX */
-            i__2 = 1, i__3 = j - *k;
-            i__4 = j - 1;
-            for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) {
-              i__2 = i__;
-              i__3 = i__;
-              i__5 = l + i__ + j * a_dim1;
-              z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r;
-              z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + z__2.i;
-              x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-              /* L10: */
-            }
-            if (nounit) {
-              i__4 = j;
-              i__2 = j;
-              i__3 = kplus1 + j * a_dim1;
-              z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[i__3].i,
-              z__1.i = x[i__2].r * a[i__3].i + x[i__2].i * a[i__3].r;
-              x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-            }
-          }
-          /* L20: */
-        }
-      } else {
-        jx = kx;
-        i__1 = *n;
-        for (j = 1; j <= i__1; ++j) {
-          i__4 = jx;
-          if (x[i__4].r != 0. || x[i__4].i != 0.) {
-            i__4 = jx;
-            temp.r = x[i__4].r, temp.i = x[i__4].i;
-            ix = kx;
-            l = kplus1 - j;
-            /* Computing MAX */
-            i__4 = 1, i__2 = j - *k;
-            i__3 = j - 1;
-            for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) {
-              i__4 = ix;
-              i__2 = ix;
-              i__5 = l + i__ + j * a_dim1;
-              z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r;
-              z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + z__2.i;
-              x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-              ix += *incx;
-              /* L30: */
-            }
-            if (nounit) {
-              i__3 = jx;
-              i__4 = jx;
-              i__2 = kplus1 + j * a_dim1;
-              z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[i__2].i,
-              z__1.i = x[i__4].r * a[i__2].i + x[i__4].i * a[i__2].r;
-              x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-            }
-          }
-          jx += *incx;
-          if (j > *k) {
-            kx += *incx;
-          }
-          /* L40: */
-        }
-      }
-    } else {
-      if (*incx == 1) {
-        for (j = *n; j >= 1; --j) {
-          i__1 = j;
-          if (x[i__1].r != 0. || x[i__1].i != 0.) {
-            i__1 = j;
-            temp.r = x[i__1].r, temp.i = x[i__1].i;
-            l = 1 - j;
-            /* Computing MIN */
-            i__1 = *n, i__3 = j + *k;
-            i__4 = j + 1;
-            for (i__ = min(i__1, i__3); i__ >= i__4; --i__) {
-              i__1 = i__;
-              i__3 = i__;
-              i__2 = l + i__ + j * a_dim1;
-              z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, z__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r;
-              z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + z__2.i;
-              x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-              /* L50: */
-            }
-            if (nounit) {
-              i__4 = j;
-              i__1 = j;
-              i__3 = j * a_dim1 + 1;
-              z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[i__3].i,
-              z__1.i = x[i__1].r * a[i__3].i + x[i__1].i * a[i__3].r;
-              x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-            }
-          }
-          /* L60: */
-        }
-      } else {
-        kx += (*n - 1) * *incx;
-        jx = kx;
-        for (j = *n; j >= 1; --j) {
-          i__4 = jx;
-          if (x[i__4].r != 0. || x[i__4].i != 0.) {
-            i__4 = jx;
-            temp.r = x[i__4].r, temp.i = x[i__4].i;
-            ix = kx;
-            l = 1 - j;
-            /* Computing MIN */
-            i__4 = *n, i__1 = j + *k;
-            i__3 = j + 1;
-            for (i__ = min(i__4, i__1); i__ >= i__3; --i__) {
-              i__4 = ix;
-              i__1 = ix;
-              i__2 = l + i__ + j * a_dim1;
-              z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, z__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r;
-              z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + z__2.i;
-              x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-              ix -= *incx;
-              /* L70: */
-            }
-            if (nounit) {
-              i__3 = jx;
-              i__4 = jx;
-              i__1 = j * a_dim1 + 1;
-              z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[i__1].i,
-              z__1.i = x[i__4].r * a[i__1].i + x[i__4].i * a[i__1].r;
-              x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-            }
-          }
-          jx -= *incx;
-          if (*n - j >= *k) {
-            kx -= *incx;
-          }
-          /* L80: */
-        }
-      }
-    }
-  } else {
-    /*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-    if (lsame_(uplo, "U")) {
-      kplus1 = *k + 1;
-      if (*incx == 1) {
-        for (j = *n; j >= 1; --j) {
-          i__3 = j;
-          temp.r = x[i__3].r, temp.i = x[i__3].i;
-          l = kplus1 - j;
-          if (noconj) {
-            if (nounit) {
-              i__3 = kplus1 + j * a_dim1;
-              z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, z__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r;
-              temp.r = z__1.r, temp.i = z__1.i;
-            }
-            /* Computing MAX */
-            i__4 = 1, i__1 = j - *k;
-            i__3 = max(i__4, i__1);
-            for (i__ = j - 1; i__ >= i__3; --i__) {
-              i__4 = l + i__ + j * a_dim1;
-              i__1 = i__;
-              z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i,
-              z__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r;
-              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-              temp.r = z__1.r, temp.i = z__1.i;
-              /* L90: */
-            }
-          } else {
-            if (nounit) {
-              d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-              z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r;
-              temp.r = z__1.r, temp.i = z__1.i;
-            }
-            /* Computing MAX */
-            i__4 = 1, i__1 = j - *k;
-            i__3 = max(i__4, i__1);
-            for (i__ = j - 1; i__ >= i__3; --i__) {
-              d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-              i__4 = i__;
-              z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-              temp.r = z__1.r, temp.i = z__1.i;
-              /* L100: */
-            }
-          }
-          i__3 = j;
-          x[i__3].r = temp.r, x[i__3].i = temp.i;
-          /* L110: */
-        }
-      } else {
-        kx += (*n - 1) * *incx;
-        jx = kx;
-        for (j = *n; j >= 1; --j) {
-          i__3 = jx;
-          temp.r = x[i__3].r, temp.i = x[i__3].i;
-          kx -= *incx;
-          ix = kx;
-          l = kplus1 - j;
-          if (noconj) {
-            if (nounit) {
-              i__3 = kplus1 + j * a_dim1;
-              z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, z__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r;
-              temp.r = z__1.r, temp.i = z__1.i;
-            }
-            /* Computing MAX */
-            i__4 = 1, i__1 = j - *k;
-            i__3 = max(i__4, i__1);
-            for (i__ = j - 1; i__ >= i__3; --i__) {
-              i__4 = l + i__ + j * a_dim1;
-              i__1 = ix;
-              z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i,
-              z__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r;
-              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-              temp.r = z__1.r, temp.i = z__1.i;
-              ix -= *incx;
-              /* L120: */
-            }
-          } else {
-            if (nounit) {
-              d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-              z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r;
-              temp.r = z__1.r, temp.i = z__1.i;
-            }
-            /* Computing MAX */
-            i__4 = 1, i__1 = j - *k;
-            i__3 = max(i__4, i__1);
-            for (i__ = j - 1; i__ >= i__3; --i__) {
-              d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-              i__4 = ix;
-              z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-              temp.r = z__1.r, temp.i = z__1.i;
-              ix -= *incx;
-              /* L130: */
-            }
-          }
-          i__3 = jx;
-          x[i__3].r = temp.r, x[i__3].i = temp.i;
-          jx -= *incx;
-          /* L140: */
-        }
-      }
-    } else {
-      if (*incx == 1) {
-        i__3 = *n;
-        for (j = 1; j <= i__3; ++j) {
-          i__4 = j;
-          temp.r = x[i__4].r, temp.i = x[i__4].i;
-          l = 1 - j;
-          if (noconj) {
-            if (nounit) {
-              i__4 = j * a_dim1 + 1;
-              z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, z__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r;
-              temp.r = z__1.r, temp.i = z__1.i;
-            }
-            /* Computing MIN */
-            i__1 = *n, i__2 = j + *k;
-            i__4 = min(i__1, i__2);
-            for (i__ = j + 1; i__ <= i__4; ++i__) {
-              i__1 = l + i__ + j * a_dim1;
-              i__2 = i__;
-              z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i,
-              z__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r;
-              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-              temp.r = z__1.r, temp.i = z__1.i;
-              /* L150: */
-            }
-          } else {
-            if (nounit) {
-              d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-              z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r;
-              temp.r = z__1.r, temp.i = z__1.i;
-            }
-            /* Computing MIN */
-            i__1 = *n, i__2 = j + *k;
-            i__4 = min(i__1, i__2);
-            for (i__ = j + 1; i__ <= i__4; ++i__) {
-              d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-              i__1 = i__;
-              z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, z__2.i = z__3.r * x[i__1].i + z__3.i * x[i__1].r;
-              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-              temp.r = z__1.r, temp.i = z__1.i;
-              /* L160: */
-            }
-          }
-          i__4 = j;
-          x[i__4].r = temp.r, x[i__4].i = temp.i;
-          /* L170: */
-        }
-      } else {
-        jx = kx;
-        i__3 = *n;
-        for (j = 1; j <= i__3; ++j) {
-          i__4 = jx;
-          temp.r = x[i__4].r, temp.i = x[i__4].i;
-          kx += *incx;
-          ix = kx;
-          l = 1 - j;
-          if (noconj) {
-            if (nounit) {
-              i__4 = j * a_dim1 + 1;
-              z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, z__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r;
-              temp.r = z__1.r, temp.i = z__1.i;
-            }
-            /* Computing MIN */
-            i__1 = *n, i__2 = j + *k;
-            i__4 = min(i__1, i__2);
-            for (i__ = j + 1; i__ <= i__4; ++i__) {
-              i__1 = l + i__ + j * a_dim1;
-              i__2 = ix;
-              z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i,
-              z__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r;
-              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-              temp.r = z__1.r, temp.i = z__1.i;
-              ix += *incx;
-              /* L180: */
-            }
-          } else {
-            if (nounit) {
-              d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-              z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r;
-              temp.r = z__1.r, temp.i = z__1.i;
-            }
-            /* Computing MIN */
-            i__1 = *n, i__2 = j + *k;
-            i__4 = min(i__1, i__2);
-            for (i__ = j + 1; i__ <= i__4; ++i__) {
-              d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-              i__1 = ix;
-              z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, z__2.i = z__3.r * x[i__1].i + z__3.i * x[i__1].r;
-              z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-              temp.r = z__1.r, temp.i = z__1.i;
-              ix += *incx;
-              /* L190: */
-            }
-          }
-          i__4 = jx;
-          x[i__4].r = temp.r, x[i__4].i = temp.i;
-          jx += *incx;
-          /* L200: */
-        }
-      }
-    }
-  }
-
-  /*     End of ZTBMV . */
-
-} /* ztbmv_ */
--- a/blas/level1_real_impl.h
+++ b/blas/level1_real_impl.h
@@ -108,23 +108,171 @@ EIGEN_BLAS_FUNC(rot)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scala
    Eigen::internal::apply_rotation_in_the_plane(vx, vy, Eigen::JacobiRotation<Scalar>(c, s));
 }

-/*
-// performs rotation of points in the modified plane.
-EIGEN_BLAS_FUNC(rotm)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *param)
-{
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  Scalar* y = reinterpret_cast<Scalar*>(py);
+// Applies modified Givens rotation H to vectors x and y.
+//   param[0] = flag:
+//     -1: H = [[h11, h12], [h21, h22]]         (all 4 elements from param)
+//      0: H = [[1, h12], [h21, 1]]              (h12, h21 from param)
+//      1: H = [[h11, 1], [-1, h22]]             (h11, h22 from param)
+//     -2: H = identity                           (no-op)
+//   param[1..4] = h11, h21, h12, h22
+EIGEN_BLAS_FUNC(rotm)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *param) {
+  Scalar *x = reinterpret_cast<Scalar *>(px);
+  Scalar *y = reinterpret_cast<Scalar *>(py);

-  // TODO
+  Scalar flag = param[0];
+  if (*n <= 0 || flag == Scalar(-2)) return;

-  return 0;
+  Scalar h11, h12, h21, h22;
+  if (flag < Scalar(0)) {
+    h11 = param[1];
+    h21 = param[2];
+    h12 = param[3];
+    h22 = param[4];
+  } else if (flag == Scalar(0)) {
+    h11 = Scalar(1);
+    h21 = param[2];
+    h12 = param[3];
+    h22 = Scalar(1);
+  } else {
+    h11 = param[1];
+    h21 = Scalar(-1);
+    h12 = Scalar(1);
+    h22 = param[4];
+  }
+
+  int kx = *incx > 0 ? 0 : (1 - *n) * *incx;
+  int ky = *incy > 0 ? 0 : (1 - *n) * *incy;
+
+  for (int i = 0; i < *n; ++i) {
+    Scalar w = x[kx];
+    Scalar z = y[ky];
+    x[kx] = h11 * w + h12 * z;
+    y[ky] = h21 * w + h22 * z;
+    kx += *incx;
+    ky += *incy;
+  }
 }

-// computes the modified parameters for a Givens rotation.
-EIGEN_BLAS_FUNC(rotmg)(Scalar *d1, Scalar *d2, Scalar *x1, Scalar *x2, Scalar *param)
-{
-  // TODO
+// Constructs the modified Givens transformation matrix H which zeros the second
+// component of (sqrt(d1)*x1, sqrt(d2)*y1)^T.
+EIGEN_BLAS_FUNC(rotmg)(Scalar *d1, Scalar *d2, Scalar *x1, Scalar *y1, Scalar *param) {
+  using std::abs;

-  return 0;
+  const Scalar gam = Scalar(4096);
+  const Scalar gamsq = gam * gam;
+  const Scalar rgamsq = Scalar(1) / gamsq;
+
+  Scalar flag, h11 = Scalar(0), h12 = Scalar(0), h21 = Scalar(0), h22 = Scalar(0);
+
+  if (*d1 < Scalar(0)) {
+    // Negative d1: zero everything.
+    flag = Scalar(-1);
+    *d1 = *d2 = *x1 = Scalar(0);
+  } else {
+    Scalar p2 = *d2 * *y1;
+    if (p2 == Scalar(0)) {
+      // d2*y1 == 0: identity transform.
+      param[0] = Scalar(-2);
+      return;
+    }
+
+    Scalar p1 = *d1 * *x1;
+    Scalar q2 = p2 * *y1;
+    Scalar q1 = p1 * *x1;
+    bool do_scale = true;
+
+    if (abs(q1) > abs(q2)) {
+      h21 = -(*y1) / *x1;
+      h12 = p2 / p1;
+      Scalar u = Scalar(1) - h12 * h21;
+      if (u <= Scalar(0)) {
+        flag = Scalar(-1);
+        h11 = h12 = h21 = h22 = Scalar(0);
+        *d1 = *d2 = *x1 = Scalar(0);
+        do_scale = false;
+      } else {
+        flag = Scalar(0);
+        *d1 /= u;
+        *d2 /= u;
+        *x1 *= u;
+      }
+    } else if (q2 < Scalar(0)) {
+      flag = Scalar(-1);
+      h11 = h12 = h21 = h22 = Scalar(0);
+      *d1 = *d2 = *x1 = Scalar(0);
+      do_scale = false;
+    } else {
+      flag = Scalar(1);
+      h11 = p1 / p2;
+      h22 = *x1 / *y1;
+      Scalar u = Scalar(1) + h11 * h22;
+      Scalar temp = *d2 / u;
+      *d2 = *d1 / u;
+      *d1 = temp;
+      *x1 = *y1 * u;
+    }
+
+    if (do_scale) {
+      // Converts compact H representation (flag 0 or 1) to full form (flag -1)
+      // so that scaling factors can be absorbed into all four elements.
+      auto fix_h = [&]() {
+        if (flag >= Scalar(0)) {
+          if (flag == Scalar(0)) {
+            h11 = Scalar(1);
+            h22 = Scalar(1);
+          } else {
+            h21 = Scalar(-1);
+            h12 = Scalar(1);
+          }
+          flag = Scalar(-1);
+        }
+      };
+
+      // Scale d1 up if too small.
+      while (*d1 <= rgamsq && *d1 != Scalar(0)) {
+        fix_h();
+        *d1 *= gamsq;
+        *x1 /= gam;
+        h11 /= gam;
+        h12 /= gam;
+      }
+      // Scale d1 down if too large.
+      while (*d1 >= gamsq) {
+        fix_h();
+        *d1 /= gamsq;
+        *x1 *= gam;
+        h11 *= gam;
+        h12 *= gam;
+      }
+      // Scale |d2| up if too small.
+      while (abs(*d2) <= rgamsq && *d2 != Scalar(0)) {
+        fix_h();
+        *d2 *= gamsq;
+        h21 /= gam;
+        h22 /= gam;
+      }
+      // Scale |d2| down if too large.
+      while (abs(*d2) >= gamsq) {
+        fix_h();
+        *d2 /= gamsq;
+        h21 *= gam;
+        h22 *= gam;
+      }
+    }
+  }
+
+  // Store result in param array.
+  if (flag < Scalar(0)) {
+    param[1] = h11;
+    param[2] = h21;
+    param[3] = h12;
+    param[4] = h22;
+  } else if (flag == Scalar(0)) {
+    param[2] = h21;
+    param[3] = h12;
+  } else {
+    param[1] = h11;
+    param[4] = h22;
+  }
+  param[0] = flag;
 }
-*/
--- a/blas/level2_cplx_impl.h
+++ b/blas/level2_cplx_impl.h
@@ -72,31 +72,186 @@ EIGEN_BLAS_FUNC(hemv)
  if (actual_y != y) delete[] copy_back(actual_y, y, *n, *incy);
 }

-/**  ZHBMV  performs the matrix-vector  operation
+/**  HBMV  performs the matrix-vector operation
 *
 *     y := alpha*A*x + beta*y,
 *
 *  where alpha and beta are scalars, x and y are n element vectors and
 *  A is an n by n hermitian band matrix, with k super-diagonals.
+ *  Diagonal elements are real; off-diagonal contributions use conjugation.
 */
-// EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
-//                           RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
-// {
-//   return 1;
-// }
+EIGEN_BLAS_FUNC(hbmv)
+(char *uplo, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta,
+ RealScalar *py, int *incy) {
+  const Scalar alpha = *reinterpret_cast<const Scalar *>(palpha);
+  const Scalar beta = *reinterpret_cast<const Scalar *>(pbeta);
+  const Scalar *a = reinterpret_cast<const Scalar *>(pa);
+  const Scalar *x = reinterpret_cast<const Scalar *>(px);
+  Scalar *y = reinterpret_cast<Scalar *>(py);

-/**  ZHPMV  performs the matrix-vector operation
+  int info = 0;
+  if (UPLO(*uplo) == INVALID)
+    info = 1;
+  else if (*n < 0)
+    info = 2;
+  else if (*k < 0)
+    info = 3;
+  else if (*lda < *k + 1)
+    info = 6;
+  else if (*incx == 0)
+    info = 8;
+  else if (*incy == 0)
+    info = 11;
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HBMV ", &info);
+
+  if (*n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return;
+
+  int kx = *incx > 0 ? 0 : (1 - *n) * *incx;
+  int ky = *incy > 0 ? 0 : (1 - *n) * *incy;
+
+  // First form y := beta*y.
+  if (beta != Scalar(1)) {
+    int iy = ky;
+    for (int i = 0; i < *n; ++i) {
+      y[iy] = (beta == Scalar(0)) ? Scalar(0) : beta * y[iy];
+      iy += *incy;
+    }
+  }
+
+  if (alpha == Scalar(0)) return;
+
+  if (UPLO(*uplo) == UP) {
+    // Upper triangle: A[i,j] at a[(k+i-j) + j*lda], diagonal at row k.
+    int jx = kx, jy = ky;
+    for (int j = 0; j < *n; ++j) {
+      Scalar temp1 = alpha * x[jx];
+      Scalar temp2 = Scalar(0);
+      int ix = kx, iy = ky;
+      for (int i = std::max(0, j - *k); i < j; ++i) {
+        Scalar aij = a[(*k + i - j) + j * *lda];
+        y[iy] += temp1 * aij;
+        temp2 += Eigen::numext::conj(aij) * x[ix];
+        ix += *incx;
+        iy += *incy;
+      }
+      // Diagonal is real.
+      y[jy] += Scalar(Eigen::numext::real(a[*k + j * *lda])) * temp1 + alpha * temp2;
+      jx += *incx;
+      jy += *incy;
+      if (j >= *k) {
+        kx += *incx;
+        ky += *incy;
+      }
+    }
+  } else {
+    // Lower triangle: A[i,j] at a[(i-j) + j*lda], diagonal at row 0.
+    int jx = kx, jy = ky;
+    for (int j = 0; j < *n; ++j) {
+      Scalar temp1 = alpha * x[jx];
+      Scalar temp2 = Scalar(0);
+      // Diagonal is real.
+      y[jy] += Scalar(Eigen::numext::real(a[j * *lda])) * temp1;
+      int ix = jx, iy = jy;
+      for (int i = j + 1; i <= std::min(*n - 1, j + *k); ++i) {
+        ix += *incx;
+        iy += *incy;
+        Scalar aij = a[(i - j) + j * *lda];
+        y[iy] += temp1 * aij;
+        temp2 += Eigen::numext::conj(aij) * x[ix];
+      }
+      y[jy] += alpha * temp2;
+      jx += *incx;
+      jy += *incy;
+    }
+  }
+}
+
+/**  HPMV  performs the matrix-vector operation
 *
 *     y := alpha*A*x + beta*y,
 *
 *  where alpha and beta are scalars, x and y are n element vectors and
 *  A is an n by n hermitian matrix, supplied in packed form.
+ *  Diagonal elements are real; off-diagonal contributions use conjugation.
 */
-// EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar
-// *beta, RealScalar *y, int *incy)
-// {
-//   return 1;
-// }
+EIGEN_BLAS_FUNC(hpmv)
+(char *uplo, int *n, RealScalar *palpha, RealScalar *pap, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py,
+ int *incy) {
+  const Scalar alpha = *reinterpret_cast<const Scalar *>(palpha);
+  const Scalar beta = *reinterpret_cast<const Scalar *>(pbeta);
+  const Scalar *ap = reinterpret_cast<const Scalar *>(pap);
+  const Scalar *x = reinterpret_cast<const Scalar *>(px);
+  Scalar *y = reinterpret_cast<Scalar *>(py);
+
+  int info = 0;
+  if (UPLO(*uplo) == INVALID)
+    info = 1;
+  else if (*n < 0)
+    info = 2;
+  else if (*incx == 0)
+    info = 6;
+  else if (*incy == 0)
+    info = 9;
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "HPMV ", &info);
+
+  if (*n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return;
+
+  int kx = *incx > 0 ? 0 : (1 - *n) * *incx;
+  int ky = *incy > 0 ? 0 : (1 - *n) * *incy;
+
+  // First form y := beta*y.
+  if (beta != Scalar(1)) {
+    int iy = ky;
+    for (int i = 0; i < *n; ++i) {
+      y[iy] = (beta == Scalar(0)) ? Scalar(0) : beta * y[iy];
+      iy += *incy;
+    }
+  }
+
+  if (alpha == Scalar(0)) return;
+
+  int kk = 0;
+  if (UPLO(*uplo) == UP) {
+    // Upper triangle packed.
+    int jx = kx, jy = ky;
+    for (int j = 0; j < *n; ++j) {
+      Scalar temp1 = alpha * x[jx];
+      Scalar temp2 = Scalar(0);
+      int ix = kx, iy = ky;
+      for (int i = 0; i < j; ++i) {
+        y[iy] += temp1 * ap[kk + i];
+        temp2 += Eigen::numext::conj(ap[kk + i]) * x[ix];
+        ix += *incx;
+        iy += *incy;
+      }
+      // Diagonal is real.
+      y[jy] += Scalar(Eigen::numext::real(ap[kk + j])) * temp1 + alpha * temp2;
+      jx += *incx;
+      jy += *incy;
+      kk += j + 1;
+    }
+  } else {
+    // Lower triangle packed.
+    int jx = kx, jy = ky;
+    for (int j = 0; j < *n; ++j) {
+      Scalar temp1 = alpha * x[jx];
+      Scalar temp2 = Scalar(0);
+      // Diagonal is real.
+      y[jy] += Scalar(Eigen::numext::real(ap[kk])) * temp1;
+      int ix = jx, iy = jy;
+      for (int i = 1; i < *n - j; ++i) {
+        ix += *incx;
+        iy += *incy;
+        y[iy] += temp1 * ap[kk + i];
+        temp2 += Eigen::numext::conj(ap[kk + i]) * x[ix];
+      }
+      y[jy] += alpha * temp2;
+      jx += *incx;
+      jy += *incy;
+      kk += *n - j;
+    }
+  }
+}

 /**  ZHPR    performs the hermitian rank 1 operation
 *
--- a/blas/level2_impl.h
+++ b/blas/level2_impl.h
@@ -303,61 +303,92 @@ EIGEN_BLAS_FUNC(gbmv)
  if (actual_y != y) delete[] copy_back(actual_y, y, actual_m, *incy);
 }

-#if 0
 /**  TBMV  performs one of the matrix-vector operations
-  *
-  *     x := A*x,   or   x := A'*x,
-  *
-  *  where x is an n element vector and  A is an n by n unit, or non-unit,
-  *  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-  */
-EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx)
-{
-  Scalar* a = reinterpret_cast<Scalar*>(pa);
-  Scalar* x = reinterpret_cast<Scalar*>(px);
-  int coeff_rows = *k + 1;
+ *
+ *     x := A*x,   or   x := A'*x,   or   x := conjg(A')*x,
+ *
+ *  where x is an n element vector and A is an n by n unit, or non-unit,
+ *  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+ *
+ *  Band storage: upper triangle stores A[i,j] at a[(k+i-j) + j*lda],
+ *  lower triangle stores A[i,j] at a[(i-j) + j*lda].
+ */
+EIGEN_BLAS_FUNC(tbmv)
+(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) {
+  Scalar *a = reinterpret_cast<Scalar *>(pa);
+  Scalar *x = reinterpret_cast<Scalar *>(px);

  int info = 0;
-       if(UPLO(*uplo)==INVALID)                                       info = 1;
-  else if(OP(*opa)==INVALID)                                          info = 2;
-  else if(DIAG(*diag)==INVALID)                                       info = 3;
-  else if(*n<0)                                                       info = 4;
-  else if(*k<0)                                                       info = 5;
-  else if(*lda<coeff_rows)                                            info = 7;
-  else if(*incx==0)                                                   info = 9;
-  if(info)
-    return xerbla_(SCALAR_SUFFIX_UP"TBMV ",&info,6);
+  if (UPLO(*uplo) == INVALID)
+    info = 1;
+  else if (OP(*opa) == INVALID)
+    info = 2;
+  else if (DIAG(*diag) == INVALID)
+    info = 3;
+  else if (*n < 0)
+    info = 4;
+  else if (*k < 0)
+    info = 5;
+  else if (*lda < *k + 1)
+    info = 7;
+  else if (*incx == 0)
+    info = 9;
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "TBMV ", &info);

-  if(*n==0) return;
+  if (*n == 0) return;

-  int actual_n = *n;
+  Scalar *actual_x = get_compact_vector(x, *n, *incx);

-  Scalar* actual_x = get_compact_vector(x,actual_n,*incx);
+  bool upper = (UPLO(*uplo) == UP);
+  int op = OP(*opa);
+  bool unit = (DIAG(*diag) == UNIT);

-  MatrixType mat_coeffs(a,coeff_rows,*n,*lda);
+  if (op == NOTR) {
+    if (upper) {
+      // x := A*x, upper band. Process columns left to right.
+      for (int j = 0; j < *n; ++j) {
+        if (actual_x[j] != Scalar(0)) {
+          Scalar temp = actual_x[j];
+          for (int i = std::max(0, j - *k); i < j; ++i) actual_x[i] += temp * a[(*k + i - j) + j * *lda];
+          if (!unit) actual_x[j] = temp * a[*k + j * *lda];
+        }
+      }
+    } else {
+      // x := A*x, lower band. Process columns right to left.
+      for (int j = *n - 1; j >= 0; --j) {
+        if (actual_x[j] != Scalar(0)) {
+          Scalar temp = actual_x[j];
+          for (int i = j + 1; i <= std::min(*n - 1, j + *k); ++i) actual_x[i] += temp * a[(i - j) + j * *lda];
+          if (!unit) actual_x[j] = temp * a[j * *lda];
+        }
+      }
+    }
+  } else {
+    // Transpose or conjugate transpose.
+    bool do_conj = (op == ADJ);
+    auto maybe_conj = [do_conj](Scalar val) -> Scalar { return do_conj ? Eigen::numext::conj(val) : val; };

-  int ku = UPLO(*uplo)==UPPER ? *k : 0;
-  int kl = UPLO(*uplo)==LOWER ? *k : 0;
-
-  for(int j=0; j<*n; ++j)
-  {
-    int start = std::max(0,j - ku);
-    int end = std::min((*m)-1,j + kl);
-    int len = end - start + 1;
-    int offset = (ku) - j + start;
-
-    if(OP(*trans)==NOTR)
-      make_vector(actual_y+start,len) += (alpha*actual_x[j]) * mat_coeffs.col(j).segment(offset,len);
-    else if(OP(*trans)==TR)
-      actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).transpose() * make_vector(actual_x+start,len) ).value();
-    else
-      actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).adjoint()   * make_vector(actual_x+start,len) ).value();
+    if (upper) {
+      // x := op(A)*x, upper band. Process columns right to left.
+      for (int j = *n - 1; j >= 0; --j) {
+        Scalar temp = actual_x[j];
+        if (!unit) temp *= maybe_conj(a[*k + j * *lda]);
+        for (int i = std::max(0, j - *k); i < j; ++i) temp += maybe_conj(a[(*k + i - j) + j * *lda]) * actual_x[i];
+        actual_x[j] = temp;
+      }
+    } else {
+      // x := op(A)*x, lower band. Process columns left to right.
+      for (int j = 0; j < *n; ++j) {
+        Scalar temp = actual_x[j];
+        if (!unit) temp *= maybe_conj(a[j * *lda]);
+        for (int i = j + 1; i <= std::min(*n - 1, j + *k); ++i) temp += maybe_conj(a[(i - j) + j * *lda]) * actual_x[i];
+        actual_x[j] = temp;
+      }
+    }
  }

-  if(actual_x!=x) delete[] actual_x;
-  if(actual_y!=y) delete[] copy_back(actual_y,y,actual_m,*incy);
+  if (actual_x != x) delete[] copy_back(actual_x, x, *n, *incx);
 }
-#endif

 /**  DTBSV  solves one of the systems of equations
 *
--- a/blas/level2_real_impl.h
+++ b/blas/level2_real_impl.h
@@ -158,32 +158,187 @@ EIGEN_BLAS_FUNC(syr2)
  //   func[code](*n, a, *inca, b, *incb, c, *ldc, alpha);
 }

-/**  DSBMV  performs the matrix-vector  operation
+/**  SBMV  performs the matrix-vector operation
 *
 *     y := alpha*A*x + beta*y,
 *
 *  where alpha and beta are scalars, x and y are n element vectors and
 *  A is an n by n symmetric band matrix, with k super-diagonals.
+ *
+ *  Band storage: upper triangle stores A[i,j] at a[(k+i-j) + j*lda],
+ *  lower triangle stores A[i,j] at a[(i-j) + j*lda].
 */
-// EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda,
-//                            RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy)
-// {
-//   return 1;
-// }
+EIGEN_BLAS_FUNC(sbmv)
+(char *uplo, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta,
+ RealScalar *py, int *incy) {
+  const Scalar alpha = *reinterpret_cast<const Scalar *>(palpha);
+  const Scalar beta = *reinterpret_cast<const Scalar *>(pbeta);
+  const Scalar *a = reinterpret_cast<const Scalar *>(pa);
+  const Scalar *x = reinterpret_cast<const Scalar *>(px);
+  Scalar *y = reinterpret_cast<Scalar *>(py);

-/**  DSPMV  performs the matrix-vector operation
+  int info = 0;
+  if (UPLO(*uplo) == INVALID)
+    info = 1;
+  else if (*n < 0)
+    info = 2;
+  else if (*k < 0)
+    info = 3;
+  else if (*lda < *k + 1)
+    info = 6;
+  else if (*incx == 0)
+    info = 8;
+  else if (*incy == 0)
+    info = 11;
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SBMV ", &info);
+
+  if (*n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return;
+
+  int kx = *incx > 0 ? 0 : (1 - *n) * *incx;
+  int ky = *incy > 0 ? 0 : (1 - *n) * *incy;
+
+  // First form y := beta*y.
+  if (beta != Scalar(1)) {
+    int iy = ky;
+    for (int i = 0; i < *n; ++i) {
+      y[iy] = (beta == Scalar(0)) ? Scalar(0) : beta * y[iy];
+      iy += *incy;
+    }
+  }
+
+  if (alpha == Scalar(0)) return;
+
+  if (UPLO(*uplo) == UP) {
+    // Upper triangle: A[i,j] at a[(k+i-j) + j*lda], diagonal at row k.
+    int jx = kx, jy = ky;
+    for (int j = 0; j < *n; ++j) {
+      Scalar temp1 = alpha * x[jx];
+      Scalar temp2 = Scalar(0);
+      int ix = kx, iy = ky;
+      for (int i = std::max(0, j - *k); i < j; ++i) {
+        Scalar aij = a[(*k + i - j) + j * *lda];
+        y[iy] += temp1 * aij;
+        temp2 += aij * x[ix];
+        ix += *incx;
+        iy += *incy;
+      }
+      y[jy] += temp1 * a[*k + j * *lda] + alpha * temp2;
+      jx += *incx;
+      jy += *incy;
+      if (j >= *k) {
+        kx += *incx;
+        ky += *incy;
+      }
+    }
+  } else {
+    // Lower triangle: A[i,j] at a[(i-j) + j*lda], diagonal at row 0.
+    int jx = kx, jy = ky;
+    for (int j = 0; j < *n; ++j) {
+      Scalar temp1 = alpha * x[jx];
+      Scalar temp2 = Scalar(0);
+      y[jy] += temp1 * a[j * *lda];
+      int ix = jx, iy = jy;
+      for (int i = j + 1; i <= std::min(*n - 1, j + *k); ++i) {
+        ix += *incx;
+        iy += *incy;
+        Scalar aij = a[(i - j) + j * *lda];
+        y[iy] += temp1 * aij;
+        temp2 += aij * x[ix];
+      }
+      y[jy] += alpha * temp2;
+      jx += *incx;
+      jy += *incy;
+    }
+  }
+}
+
+/**  SPMV  performs the matrix-vector operation
 *
 *     y := alpha*A*x + beta*y,
 *
 *  where alpha and beta are scalars, x and y are n element vectors and
 *  A is an n by n symmetric matrix, supplied in packed form.
 *
+ *  Packed storage: upper triangle stores columns sequentially so that
+ *  column j occupies positions kk..kk+j (where kk = j*(j+1)/2),
+ *  lower triangle stores column j at positions kk..kk+(n-j-1).
 */
-// EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar
-// *beta, RealScalar *y, int *incy)
-// {
-//   return 1;
-// }
+EIGEN_BLAS_FUNC(spmv)
+(char *uplo, int *n, RealScalar *palpha, RealScalar *pap, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py,
+ int *incy) {
+  const Scalar alpha = *reinterpret_cast<const Scalar *>(palpha);
+  const Scalar beta = *reinterpret_cast<const Scalar *>(pbeta);
+  const Scalar *ap = reinterpret_cast<const Scalar *>(pap);
+  const Scalar *x = reinterpret_cast<const Scalar *>(px);
+  Scalar *y = reinterpret_cast<Scalar *>(py);
+
+  int info = 0;
+  if (UPLO(*uplo) == INVALID)
+    info = 1;
+  else if (*n < 0)
+    info = 2;
+  else if (*incx == 0)
+    info = 6;
+  else if (*incy == 0)
+    info = 9;
+  if (info) return xerbla_(SCALAR_SUFFIX_UP "SPMV ", &info);
+
+  if (*n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return;
+
+  int kx = *incx > 0 ? 0 : (1 - *n) * *incx;
+  int ky = *incy > 0 ? 0 : (1 - *n) * *incy;
+
+  // First form y := beta*y.
+  if (beta != Scalar(1)) {
+    int iy = ky;
+    for (int i = 0; i < *n; ++i) {
+      y[iy] = (beta == Scalar(0)) ? Scalar(0) : beta * y[iy];
+      iy += *incy;
+    }
+  }
+
+  if (alpha == Scalar(0)) return;
+
+  int kk = 0;
+  if (UPLO(*uplo) == UP) {
+    // Upper triangle packed.
+    int jx = kx, jy = ky;
+    for (int j = 0; j < *n; ++j) {
+      Scalar temp1 = alpha * x[jx];
+      Scalar temp2 = Scalar(0);
+      int ix = kx, iy = ky;
+      for (int i = 0; i < j; ++i) {
+        y[iy] += temp1 * ap[kk + i];
+        temp2 += ap[kk + i] * x[ix];
+        ix += *incx;
+        iy += *incy;
+      }
+      y[jy] += temp1 * ap[kk + j] + alpha * temp2;
+      jx += *incx;
+      jy += *incy;
+      kk += j + 1;
+    }
+  } else {
+    // Lower triangle packed.
+    int jx = kx, jy = ky;
+    for (int j = 0; j < *n; ++j) {
+      Scalar temp1 = alpha * x[jx];
+      Scalar temp2 = Scalar(0);
+      y[jy] += temp1 * ap[kk];
+      int ix = jx, iy = jy;
+      for (int i = 1; i < *n - j; ++i) {
+        ix += *incx;
+        iy += *incy;
+        y[iy] += temp1 * ap[kk + i];
+        temp2 += ap[kk + i] * x[ix];
+      }
+      y[jy] += alpha * temp2;
+      jx += *incx;
+      jy += *incy;
+      kk += *n - j;
+    }
+  }
+}

 /**  DSPR    performs the symmetric rank 1 operation
 *
--- a/blas/lsame.cpp
+++ b/blas/lsame.cpp
@@ -0,0 +1,15 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <cctype>
+
+#include "blas.h"
+
+// LSAME returns true if ca and cb are the same letter, regardless of case.
+extern "C" EIGEN_BLAS_API int lsame_(const char *ca, const char *cb) {
+  return std::toupper(static_cast<unsigned char>(*ca)) == std::toupper(static_cast<unsigned char>(*cb));
+}