From 4ad90a60f17c5fcf74a6dd6773cc87ca7c43866c Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen <4643818-rmlarsen1@users.noreply.gitlab.com> Date: Sun, 5 Apr 2026 16:04:41 -0700 Subject: [PATCH] Replace blas/f2c with clean C++ implementations libeigen/eigen!2402 Co-authored-by: Rasmus Munk Larsen --- benchmarks/BLAS/CMakeLists.txt | 17 + benchmarks/BLAS/bench_blas.cpp | 488 +++++++++++++++++++++++++++ benchmarks/CMakeLists.txt | 7 +- blas/CMakeLists.txt | 6 +- blas/complexdots.cpp | 72 ++++ blas/f2c/chbmv.c | 456 ------------------------- blas/f2c/chpmv.c | 407 ----------------------- blas/f2c/complexdots.c | 73 ---- blas/f2c/ctbmv.c | 586 --------------------------------- blas/f2c/datatypes.h | 27 -- blas/f2c/drotm.c | 213 ------------ blas/f2c/drotmg.c | 293 ----------------- blas/f2c/dsbmv.c | 356 -------------------- blas/f2c/dspmv.c | 308 ----------------- blas/f2c/dtbmv.c | 417 ----------------------- blas/f2c/lsame.c | 109 ------ blas/f2c/srotm.c | 212 ------------ blas/f2c/srotmg.c | 293 ----------------- blas/f2c/ssbmv.c | 359 -------------------- blas/f2c/sspmv.c | 308 ----------------- blas/f2c/stbmv.c | 417 ----------------------- blas/f2c/zhbmv.c | 456 ------------------------- blas/f2c/zhpmv.c | 407 ----------------------- blas/f2c/ztbmv.c | 586 --------------------------------- blas/level1_real_impl.h | 176 +++++++++- blas/level2_cplx_impl.h | 179 +++++++++- blas/level2_impl.h | 119 ++++--- blas/level2_real_impl.h | 179 +++++++++- blas/lsame.cpp | 15 + 29 files changed, 1170 insertions(+), 6371 deletions(-) create mode 100644 benchmarks/BLAS/CMakeLists.txt create mode 100644 benchmarks/BLAS/bench_blas.cpp create mode 100644 blas/complexdots.cpp delete mode 100644 blas/f2c/chbmv.c delete mode 100644 blas/f2c/chpmv.c delete mode 100644 blas/f2c/complexdots.c delete mode 100644 blas/f2c/ctbmv.c delete mode 100644 blas/f2c/datatypes.h delete mode 100644 blas/f2c/drotm.c delete mode 100644 blas/f2c/drotmg.c delete mode 100644 blas/f2c/dsbmv.c delete mode 100644 blas/f2c/dspmv.c delete mode 100644 blas/f2c/dtbmv.c delete mode 100644 blas/f2c/lsame.c delete mode 100644 blas/f2c/srotm.c delete mode 100644 blas/f2c/srotmg.c delete mode 100644 blas/f2c/ssbmv.c delete mode 100644 blas/f2c/sspmv.c delete mode 100644 blas/f2c/stbmv.c delete mode 100644 blas/f2c/zhbmv.c delete mode 100644 blas/f2c/zhpmv.c delete mode 100644 blas/f2c/ztbmv.c create mode 100644 blas/lsame.cpp diff --git a/benchmarks/BLAS/CMakeLists.txt b/benchmarks/BLAS/CMakeLists.txt new file mode 100644 index 000000000..7288e8386 --- /dev/null +++ b/benchmarks/BLAS/CMakeLists.txt @@ -0,0 +1,17 @@ +# Benchmarks for Eigen's built-in BLAS implementation. +# Compiles the Eigen BLAS sources directly into the benchmark executable +# so there is no external BLAS dependency. + +set(EIGEN_BLAS_SRCS + ${EIGEN_SOURCE_DIR}/blas/single.cpp + ${EIGEN_SOURCE_DIR}/blas/double.cpp + ${EIGEN_SOURCE_DIR}/blas/complex_single.cpp + ${EIGEN_SOURCE_DIR}/blas/complex_double.cpp + ${EIGEN_SOURCE_DIR}/blas/xerbla.cpp + ${EIGEN_SOURCE_DIR}/blas/lsame.cpp + ${EIGEN_SOURCE_DIR}/blas/complexdots.cpp +) + +eigen_add_benchmark(bench_blas bench_blas.cpp) +target_sources(bench_blas PRIVATE ${EIGEN_BLAS_SRCS}) +target_include_directories(bench_blas PRIVATE ${EIGEN_SOURCE_DIR}/blas) diff --git a/benchmarks/BLAS/bench_blas.cpp b/benchmarks/BLAS/bench_blas.cpp new file mode 100644 index 000000000..ac5395723 --- /dev/null +++ b/benchmarks/BLAS/bench_blas.cpp @@ -0,0 +1,488 @@ +// Benchmark for Eigen's BLAS implementation. +// +// Calls the Eigen BLAS C interface directly (the extern "C" functions defined +// in blas/{single,double,complex_single,complex_double}.cpp). +// +// Covers Level 1, 2, and 3 routines — with emphasis on the routines that +// were recently rewritten from f2c to C++: rotm, rotmg, spmv, sbmv, hbmv, +// hpmv, tbmv, lsame, and complex dot products. + +#include + +#include +#include +#include + +#include "blas/blas.h" + +using Eigen::Index; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +// Flop-rate counter (units = individual flops per call). +static benchmark::Counter GflopsCounter(double flops) { + return benchmark::Counter(flops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); +} + +// Fill a vector with random values in [-1, 1]. +template +static void fillRand(T* data, Index n) { + Eigen::Map>(data, n).setRandom(); +} + +// Fill a symmetric band matrix A in BLAS band storage (column-major). +// Upper triangle: A[i,j] stored at a[(k+i-j) + j*lda], 0 <= j-i <= k. +template +static void fillSymBandUpper(T* a, int n, int k, int lda) { + std::fill(a, a + lda * n, T(0)); + for (int j = 0; j < n; ++j) + for (int i = std::max(0, j - k); i <= j; ++i) a[(k + i - j) + j * lda] = T(std::rand()) / T(RAND_MAX) - T(0.5); +} + +// Fill a packed symmetric matrix (upper triangle, column-major). +template +static void fillSymPacked(T* ap, int n) { + int sz = n * (n + 1) / 2; + for (int i = 0; i < sz; ++i) ap[i] = T(std::rand()) / T(RAND_MAX) - T(0.5); +} + +// Fill a triangular band matrix in BLAS band storage (upper, column-major). +template +static void fillTriBandUpper(T* a, int n, int k, int lda) { + std::fill(a, a + lda * n, T(0)); + for (int j = 0; j < n; ++j) + for (int i = std::max(0, j - k); i <= j; ++i) { + T val = T(std::rand()) / T(RAND_MAX) - T(0.5); + if (i == j) val += T(n); // diagonal dominance + a[(k + i - j) + j * lda] = val; + } +} + +// --------------------------------------------------------------------------- +// Type-dispatched BLAS wrappers +// --------------------------------------------------------------------------- + +inline float blas_dot(int* n, float* x, int* incx, float* y, int* incy) { return sdot_(n, x, incx, y, incy); } +inline double blas_dot(int* n, double* x, int* incx, double* y, int* incy) { return ddot_(n, x, incx, y, incy); } + +inline void blas_axpy(int* n, float* a, float* x, int* incx, float* y, int* incy) { saxpy_(n, a, x, incx, y, incy); } +inline void blas_axpy(int* n, double* a, double* x, int* incx, double* y, int* incy) { daxpy_(n, a, x, incx, y, incy); } + +inline float blas_nrm2(int* n, float* x, int* incx) { return snrm2_(n, x, incx); } +inline double blas_nrm2(int* n, double* x, int* incx) { return dnrm2_(n, x, incx); } + +inline void blas_rotm(int* n, float* x, int* incx, float* y, int* incy, float* p) { srotm_(n, x, incx, y, incy, p); } +inline void blas_rotm(int* n, double* x, int* incx, double* y, int* incy, double* p) { drotm_(n, x, incx, y, incy, p); } + +inline void blas_rotmg(float* d1, float* d2, float* x1, float* y1, float* p) { srotmg_(d1, d2, x1, y1, p); } +inline void blas_rotmg(double* d1, double* d2, double* x1, double* y1, double* p) { drotmg_(d1, d2, x1, y1, p); } + +inline void blas_dotcw(int* n, float* cx, int* incx, float* cy, int* incy, float* res) { + cdotcw_(n, cx, incx, cy, incy, res); +} +inline void blas_dotcw(int* n, double* cx, int* incx, double* cy, int* incy, double* res) { + zdotcw_(n, cx, incx, cy, incy, res); +} + +inline void blas_gemv(char* t, int* m, int* n, float* a, float* A, int* lda, float* x, int* incx, float* b, float* y, + int* incy) { + sgemv_(t, m, n, a, A, lda, x, incx, b, y, incy); +} +inline void blas_gemv(char* t, int* m, int* n, double* a, double* A, int* lda, double* x, int* incx, double* b, + double* y, int* incy) { + dgemv_(t, m, n, a, A, lda, x, incx, b, y, incy); +} + +inline void blas_spmv(char* uplo, int* n, float* alpha, float* ap, float* x, int* incx, float* beta, float* y, + int* incy) { + sspmv_(uplo, n, alpha, ap, x, incx, beta, y, incy); +} +inline void blas_spmv(char* uplo, int* n, double* alpha, double* ap, double* x, int* incx, double* beta, double* y, + int* incy) { + dspmv_(uplo, n, alpha, ap, x, incx, beta, y, incy); +} + +inline void blas_sbmv(char* uplo, int* n, int* k, float* alpha, float* a, int* lda, float* x, int* incx, float* beta, + float* y, int* incy) { + ssbmv_(uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); +} +inline void blas_sbmv(char* uplo, int* n, int* k, double* alpha, double* a, int* lda, double* x, int* incx, + double* beta, double* y, int* incy) { + dsbmv_(uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); +} + +inline void blas_tbmv(char* uplo, char* trans, char* diag, int* n, int* k, float* a, int* lda, float* x, int* incx) { + stbmv_(uplo, trans, diag, n, k, a, lda, x, incx); +} +inline void blas_tbmv(char* uplo, char* trans, char* diag, int* n, int* k, double* a, int* lda, double* x, int* incx) { + dtbmv_(uplo, trans, diag, n, k, a, lda, x, incx); +} + +inline void blas_hbmv(char* uplo, int* n, int* k, float* alpha, float* a, int* lda, float* x, int* incx, float* beta, + float* y, int* incy) { + chbmv_(uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); +} +inline void blas_hbmv(char* uplo, int* n, int* k, double* alpha, double* a, int* lda, double* x, int* incx, + double* beta, double* y, int* incy) { + zhbmv_(uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); +} + +inline void blas_hpmv(char* uplo, int* n, float* alpha, float* ap, float* x, int* incx, float* beta, float* y, + int* incy) { + chpmv_(uplo, n, alpha, ap, x, incx, beta, y, incy); +} +inline void blas_hpmv(char* uplo, int* n, double* alpha, double* ap, double* x, int* incx, double* beta, double* y, + int* incy) { + zhpmv_(uplo, n, alpha, ap, x, incx, beta, y, incy); +} + +inline void blas_gemm(char* ta, char* tb, int* m, int* n, int* k, float* alpha, float* a, int* lda, float* b, int* ldb, + float* beta, float* c, int* ldc) { + sgemm_(ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} +inline void blas_gemm(char* ta, char* tb, int* m, int* n, int* k, double* alpha, double* a, int* lda, double* b, + int* ldb, double* beta, double* c, int* ldc) { + dgemm_(ta, tb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +// ========================================================================= +// Level 1 — Real +// ========================================================================= + +// ----- SDOT / DDOT ----- +template +static void BM_dot(benchmark::State& state) { + int n = static_cast(state.range(0)); + int one = 1; + std::vector x(n), y(n); + fillRand(x.data(), n); + fillRand(y.data(), n); + for (auto _ : state) { + T r = blas_dot(&n, x.data(), &one, y.data(), &one); + benchmark::DoNotOptimize(r); + } + state.counters["GFLOPS"] = GflopsCounter(2.0 * n); +} + +// ----- SAXPY / DAXPY ----- +template +static void BM_axpy(benchmark::State& state) { + int n = static_cast(state.range(0)); + int one = 1; + T alpha = T(2.5); + std::vector x(n), y(n); + fillRand(x.data(), n); + fillRand(y.data(), n); + for (auto _ : state) { + blas_axpy(&n, &alpha, x.data(), &one, y.data(), &one); + benchmark::DoNotOptimize(y.data()); + } + state.counters["GFLOPS"] = GflopsCounter(2.0 * n); +} + +// ----- SNRM2 / DNRM2 ----- +template +static void BM_nrm2(benchmark::State& state) { + int n = static_cast(state.range(0)); + int one = 1; + std::vector x(n); + fillRand(x.data(), n); + for (auto _ : state) { + T r = blas_nrm2(&n, x.data(), &one); + benchmark::DoNotOptimize(r); + } + // Nominal flops; Eigen's stableNorm() does more work internally. + state.counters["GFLOPS"] = GflopsCounter(2.0 * n - 1); +} + +// ----- SROTM / DROTM ----- +template +static void BM_rotm(benchmark::State& state) { + int n = static_cast(state.range(0)); + int one = 1; + std::vector x(n), y(n); + T param[5] = {T(-1), T(0.6), T(-0.8), T(0.8), T(0.6)}; // full rotation + fillRand(x.data(), n); + fillRand(y.data(), n); + for (auto _ : state) { + blas_rotm(&n, x.data(), &one, y.data(), &one, param); + benchmark::DoNotOptimize(x.data()); + benchmark::DoNotOptimize(y.data()); + } + // 4 muls + 2 adds per element pair. + state.counters["GFLOPS"] = GflopsCounter(6.0 * n); +} + +// ----- SROTMG / DROTMG ----- +template +static void BM_rotmg(benchmark::State& state) { + T d1 = T(2), d2 = T(3), x1 = T(1), y1 = T(0.5); + T param[5]; + for (auto _ : state) { + T td1 = d1, td2 = d2, tx1 = x1; + blas_rotmg(&td1, &td2, &tx1, &y1, param); + benchmark::DoNotOptimize(param); + } +} + +// ========================================================================= +// Level 1 — Complex +// ========================================================================= + +// Complex conjugate dot product via the worker functions (cdotcw_ / zdotcw_) +// which use an output pointer, avoiding the ABI ambiguity of the struct-returning +// cdotc_ / zdotc_ wrappers. +template +static void BM_dotc(benchmark::State& state) { + int n = static_cast(state.range(0)); + int one = 1; + std::vector x(2 * n), y(2 * n); // interleaved real/imag + fillRand(x.data(), 2 * n); + fillRand(y.data(), 2 * n); + T res[2]; + for (auto _ : state) { + blas_dotcw(&n, x.data(), &one, y.data(), &one, res); + benchmark::DoNotOptimize(res); + } + // Conjugate dot: 6 mul + 2 add per element = 8n flops. + state.counters["GFLOPS"] = GflopsCounter(8.0 * n); +} + +// ========================================================================= +// Level 2 — General Matrix-Vector (SGEMV / DGEMV) +// ========================================================================= + +template +static void BM_gemv(benchmark::State& state) { + int m = static_cast(state.range(0)); + int n = static_cast(state.range(1)); + int one = 1; + T alpha = T(1), beta = T(0); + char trans = 'N'; + std::vector a(m * n), x(n), y(m); + fillRand(a.data(), m * n); + fillRand(x.data(), n); + fillRand(y.data(), m); + for (auto _ : state) { + blas_gemv(&trans, &m, &n, &alpha, a.data(), &m, x.data(), &one, &beta, y.data(), &one); + benchmark::DoNotOptimize(y.data()); + } + state.counters["GFLOPS"] = GflopsCounter(2.0 * m * n); +} + +// ========================================================================= +// Level 2 — Symmetric Packed (SSPMV / DSPMV) +// ========================================================================= + +template +static void BM_spmv(benchmark::State& state) { + int n = static_cast(state.range(0)); + int one = 1; + T alpha = T(1), beta = T(0); + char uplo = 'U'; + int sz = n * (n + 1) / 2; + std::vector ap(sz), x(n), y(n); + fillSymPacked(ap.data(), n); + fillRand(x.data(), n); + fillRand(y.data(), n); + for (auto _ : state) { + blas_spmv(&uplo, &n, &alpha, ap.data(), x.data(), &one, &beta, y.data(), &one); + benchmark::DoNotOptimize(y.data()); + } + // Symmetric: each off-diag element contributes to two y entries. + state.counters["GFLOPS"] = GflopsCounter(2.0 * n * n); +} + +// ========================================================================= +// Level 2 — Symmetric Band (SSBMV / DSBMV) +// ========================================================================= + +template +static void BM_sbmv(benchmark::State& state) { + int n = static_cast(state.range(0)); + int k = static_cast(state.range(1)); + int lda = k + 1; + int one = 1; + T alpha = T(1), beta = T(0); + char uplo = 'U'; + std::vector a(lda * n), x(n), y(n); + fillSymBandUpper(a.data(), n, k, lda); + fillRand(x.data(), n); + fillRand(y.data(), n); + for (auto _ : state) { + blas_sbmv(&uplo, &n, &k, &alpha, a.data(), &lda, x.data(), &one, &beta, y.data(), &one); + benchmark::DoNotOptimize(y.data()); + } + state.counters["GFLOPS"] = GflopsCounter(2.0 * n * (2 * k + 1)); +} + +// ========================================================================= +// Level 2 — Triangular Band (STBMV / DTBMV) +// ========================================================================= + +template +static void BM_tbmv(benchmark::State& state) { + int n = static_cast(state.range(0)); + int k = static_cast(state.range(1)); + int lda = k + 1; + int one = 1; + char uplo = 'U', trans = 'N', diag = 'N'; + std::vector a(lda * n), x(n), x_orig(n); + fillTriBandUpper(a.data(), n, k, lda); + fillRand(x_orig.data(), n); + for (auto _ : state) { + state.PauseTiming(); + std::copy(x_orig.begin(), x_orig.end(), x.begin()); + state.ResumeTiming(); + blas_tbmv(&uplo, &trans, &diag, &n, &k, a.data(), &lda, x.data(), &one); + benchmark::DoNotOptimize(x.data()); + } + state.counters["GFLOPS"] = GflopsCounter(1.0 * n * (k + 1)); +} + +// ========================================================================= +// Level 2 — Hermitian Band (CHBMV / ZHBMV) +// ========================================================================= + +template +static void BM_hbmv(benchmark::State& state) { + int n = static_cast(state.range(0)); + int k = static_cast(state.range(1)); + int lda = k + 1; + int one = 1; + char uplo = 'U'; + // Complex: each element is 2 reals. + std::vector a(2 * lda * n), x(2 * n), y(2 * n); + T alpha[2] = {T(1), T(0)}; + T beta[2] = {T(0), T(0)}; + fillRand(a.data(), 2 * lda * n); + // Make diagonal real (imag part = 0). + for (int j = 0; j < n; ++j) a[2 * (k + j * lda) + 1] = T(0); + fillRand(x.data(), 2 * n); + fillRand(y.data(), 2 * n); + for (auto _ : state) { + blas_hbmv(&uplo, &n, &k, alpha, a.data(), &lda, x.data(), &one, beta, y.data(), &one); + benchmark::DoNotOptimize(y.data()); + } + // Complex hermitian band: 8*n*(2k+1) flops approximately. + state.counters["GFLOPS"] = GflopsCounter(8.0 * n * (2 * k + 1)); +} + +// ========================================================================= +// Level 2 — Hermitian Packed (CHPMV / ZHPMV) +// ========================================================================= + +template +static void BM_hpmv(benchmark::State& state) { + int n = static_cast(state.range(0)); + int one = 1; + char uplo = 'U'; + int sz = n * (n + 1) / 2; + std::vector ap(2 * sz), x(2 * n), y(2 * n); + T alpha[2] = {T(1), T(0)}; + T beta[2] = {T(0), T(0)}; + fillRand(ap.data(), 2 * sz); + // Make diagonal real. + int kk = 0; + for (int j = 0; j < n; ++j) { + ap[2 * (kk + j) + 1] = T(0); + kk += j + 1; + } + fillRand(x.data(), 2 * n); + fillRand(y.data(), 2 * n); + for (auto _ : state) { + blas_hpmv(&uplo, &n, alpha, ap.data(), x.data(), &one, beta, y.data(), &one); + benchmark::DoNotOptimize(y.data()); + } + state.counters["GFLOPS"] = GflopsCounter(8.0 * n * n); +} + +// ========================================================================= +// Level 3 — General Matrix Multiply (SGEMM / DGEMM) +// ========================================================================= + +template +static void BM_gemm(benchmark::State& state) { + int n = static_cast(state.range(0)); + T alpha = T(1), beta = T(0); + char trans = 'N'; + std::vector a(n * n), b(n * n), c(n * n); + fillRand(a.data(), n * n); + fillRand(b.data(), n * n); + fillRand(c.data(), n * n); + for (auto _ : state) { + blas_gemm(&trans, &trans, &n, &n, &n, &alpha, a.data(), &n, b.data(), &n, &beta, c.data(), &n); + benchmark::DoNotOptimize(c.data()); + } + state.counters["GFLOPS"] = GflopsCounter(2.0 * n * n * n); +} + +// ========================================================================= +// Register benchmarks +// ========================================================================= + +// clang-format off + +// --- Vector sizes for Level 1 --- +#define L1_SIZES ->Arg(64)->Arg(256)->Arg(1024)->Arg(4096)->Arg(16384)->Arg(65536) + +BENCHMARK(BM_dot) L1_SIZES ->Name("sdot"); +BENCHMARK(BM_dot) L1_SIZES ->Name("ddot"); +BENCHMARK(BM_axpy) L1_SIZES ->Name("saxpy"); +BENCHMARK(BM_axpy) L1_SIZES ->Name("daxpy"); +BENCHMARK(BM_nrm2) L1_SIZES ->Name("snrm2"); +BENCHMARK(BM_nrm2) L1_SIZES ->Name("dnrm2"); +BENCHMARK(BM_rotm) L1_SIZES ->Name("srotm"); +BENCHMARK(BM_rotm) L1_SIZES ->Name("drotm"); +BENCHMARK(BM_rotmg) ->Name("srotmg"); +BENCHMARK(BM_rotmg) ->Name("drotmg"); +BENCHMARK(BM_dotc) L1_SIZES ->Name("cdotc"); +BENCHMARK(BM_dotc) L1_SIZES ->Name("zdotc"); + +#undef L1_SIZES + +// --- Matrix sizes for Level 2 --- +// GEMV: {m, n} +#define GEMV_SIZES \ + ->Args({64, 64})->Args({256, 256})->Args({1024, 1024})->Args({4096, 4096}) \ + ->Args({4096, 64})->Args({64, 4096}) + +BENCHMARK(BM_gemv) GEMV_SIZES ->Name("sgemv"); +BENCHMARK(BM_gemv) GEMV_SIZES ->Name("dgemv"); +#undef GEMV_SIZES + +// Symmetric packed: {n} +#define SPM_SIZES ->Arg(64)->Arg(256)->Arg(1024)->Arg(4096) + +BENCHMARK(BM_spmv) SPM_SIZES ->Name("sspmv"); +BENCHMARK(BM_spmv) SPM_SIZES ->Name("dspmv"); +BENCHMARK(BM_hpmv) SPM_SIZES ->Name("chpmv"); +BENCHMARK(BM_hpmv) SPM_SIZES ->Name("zhpmv"); + +#undef SPM_SIZES + +// Band: {n, k} +#define BAND_SIZES \ + ->Args({256, 4})->Args({256, 32})->Args({1024, 4})->Args({1024, 32}) \ + ->Args({4096, 4})->Args({4096, 32})->Args({4096, 128}) + +BENCHMARK(BM_sbmv) BAND_SIZES ->Name("ssbmv"); +BENCHMARK(BM_sbmv) BAND_SIZES ->Name("dsbmv"); +BENCHMARK(BM_tbmv) BAND_SIZES ->Name("stbmv"); +BENCHMARK(BM_tbmv) BAND_SIZES ->Name("dtbmv"); +BENCHMARK(BM_hbmv) BAND_SIZES ->Name("chbmv"); +BENCHMARK(BM_hbmv) BAND_SIZES ->Name("zhbmv"); + +#undef BAND_SIZES + +// --- Square sizes for Level 3 --- +#define GEMM_SIZES ->Arg(32)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024) + +BENCHMARK(BM_gemm) GEMM_SIZES ->Name("sgemm"); +BENCHMARK(BM_gemm) GEMM_SIZES ->Name("dgemm"); + +#undef GEMM_SIZES + +// clang-format on diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index c414e66b3..3163f51d6 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -20,7 +20,11 @@ function(eigen_add_benchmark name source) if(BENCH_LIBRARIES) target_link_libraries(${name} PRIVATE ${BENCH_LIBRARIES}) endif() - target_compile_options(${name} PRIVATE -O3 -DNDEBUG) + target_compile_options(${name} PRIVATE + $<$:/O2> + $<$>:-O3> + ) + target_compile_definitions(${name} PRIVATE NDEBUG) if(BENCH_DEFINITIONS) target_compile_definitions(${name} PRIVATE ${BENCH_DEFINITIONS}) endif() @@ -38,3 +42,4 @@ add_subdirectory(FFT) add_subdirectory(Householder) add_subdirectory(Solvers) add_subdirectory(Tuning) +add_subdirectory(BLAS) diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index c8a28851b..2015ef1c2 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -6,11 +6,7 @@ if(EIGEN_BUILD_BLAS) add_custom_target(blas) set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp xerbla.cpp - f2c/srotm.c f2c/srotmg.c f2c/drotm.c f2c/drotmg.c - f2c/lsame.c f2c/dspmv.c f2c/ssbmv.c f2c/chbmv.c - f2c/sspmv.c f2c/zhbmv.c f2c/chpmv.c f2c/dsbmv.c - f2c/zhpmv.c f2c/dtbmv.c f2c/stbmv.c f2c/ctbmv.c - f2c/ztbmv.c f2c/complexdots.c + lsame.cpp complexdots.cpp ) set(EIGEN_BLAS_TARGETS "") diff --git a/blas/complexdots.cpp b/blas/complexdots.cpp new file mode 100644 index 000000000..7b8787aac --- /dev/null +++ b/blas/complexdots.cpp @@ -0,0 +1,72 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// C++ replacements for the f2c complex dot product wrappers. +// These are thin wrappers around the worker functions (cdotcw_, etc.) +// defined in level1_cplx_impl.h. +// +// Note: blas.h declares these as void, but gfortran expects complex functions +// to return by value. We define the correct signatures here and do not include +// blas.h to avoid the conflicting declarations. + +#if defined(_WIN32) +#if defined(EIGEN_BLAS_BUILD_DLL) +#define EIGEN_BLAS_CDOT_API __declspec(dllexport) +#else +#define EIGEN_BLAS_CDOT_API +#endif +#elif ((defined(__GNUC__) && __GNUC__ >= 4) || defined(__clang__)) && defined(EIGEN_BLAS_BUILD_DLL) +#define EIGEN_BLAS_CDOT_API __attribute__((visibility("default"))) +#else +#define EIGEN_BLAS_CDOT_API +#endif + +extern "C" { + +// Worker function declarations (defined in level1_cplx_impl.h via complex_single.cpp / complex_double.cpp). +void cdotcw_(int *n, float *cx, int *incx, float *cy, int *incy, float *res); +void cdotuw_(int *n, float *cx, int *incx, float *cy, int *incy, float *res); +void zdotcw_(int *n, double *cx, int *incx, double *cy, int *incy, double *res); +void zdotuw_(int *n, double *cx, int *incx, double *cy, int *incy, double *res); + +// POD complex types for C-compatible return values (matches Fortran complex layout). +struct eigen_blas_complex_float { + float r, i; +}; +struct eigen_blas_complex_double { + double r, i; +}; + +// CDOTC computes the conjugated dot product of two single-precision complex vectors. +EIGEN_BLAS_CDOT_API eigen_blas_complex_float cdotc_(int *n, float *cx, int *incx, float *cy, int *incy) { + eigen_blas_complex_float res = {0.0f, 0.0f}; + cdotcw_(n, cx, incx, cy, incy, &res.r); + return res; +} + +// CDOTU computes the unconjugated dot product of two single-precision complex vectors. +EIGEN_BLAS_CDOT_API eigen_blas_complex_float cdotu_(int *n, float *cx, int *incx, float *cy, int *incy) { + eigen_blas_complex_float res = {0.0f, 0.0f}; + cdotuw_(n, cx, incx, cy, incy, &res.r); + return res; +} + +// ZDOTC computes the conjugated dot product of two double-precision complex vectors. +EIGEN_BLAS_CDOT_API eigen_blas_complex_double zdotc_(int *n, double *cx, int *incx, double *cy, int *incy) { + eigen_blas_complex_double res = {0.0, 0.0}; + zdotcw_(n, cx, incx, cy, incy, &res.r); + return res; +} + +// ZDOTU computes the unconjugated dot product of two double-precision complex vectors. +EIGEN_BLAS_CDOT_API eigen_blas_complex_double zdotu_(int *n, double *cx, int *incx, double *cy, int *incy) { + eigen_blas_complex_double res = {0.0, 0.0}; + zdotuw_(n, cx, incx, cy, incy, &res.r); + return res; +} + +} // extern "C" diff --git a/blas/f2c/chbmv.c b/blas/f2c/chbmv.c deleted file mode 100644 index 2aa3a0170..000000000 --- a/blas/f2c/chbmv.c +++ /dev/null @@ -1,456 +0,0 @@ -/* chbmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -static inline void r_cnjg(complex *r, complex *z) { - r->r = z->r; - r->i = -(z->i); -} - -/* Subroutine */ void chbmv_(char *uplo, integer *n, integer *k, complex *alpha, complex *a, integer *lda, complex *x, - integer *incx, complex *beta, complex *y, integer *incy) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - real r__1; - complex q__1, q__2, q__3, q__4; - - /* Local variables */ - integer i__, j, l, ix, iy, jx, jy, kx, ky, info; - complex temp1, temp2; - extern logical lsame_(char *, char *); - integer kplus1; - extern /* Subroutine */ void xerbla_(const char *, integer *); - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* CHBMV performs the matrix-vector operation */ - - /* y := alpha*A*x + beta*y, */ - - /* where alpha and beta are scalars, x and y are n element vectors and */ - /* A is an n by n hermitian band matrix, with k super-diagonals. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the upper or lower */ - /* triangular part of the band matrix A is being supplied as */ - /* follows: */ - - /* UPLO = 'U' or 'u' The upper triangular part of A is */ - /* being supplied. */ - - /* UPLO = 'L' or 'l' The lower triangular part of A is */ - /* being supplied. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* K - INTEGER. */ - /* On entry, K specifies the number of super-diagonals of the */ - /* matrix A. K must satisfy 0 .le. K. */ - /* Unchanged on exit. */ - - /* ALPHA - COMPLEX . */ - /* On entry, ALPHA specifies the scalar alpha. */ - /* Unchanged on exit. */ - - /* A - COMPLEX array of DIMENSION ( LDA, n ). */ - /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ - /* by n part of the array A must contain the upper triangular */ - /* band part of the hermitian matrix, supplied column by */ - /* column, with the leading diagonal of the matrix in row */ - /* ( k + 1 ) of the array, the first super-diagonal starting at */ - /* position 2 in row k, and so on. The top left k by k triangle */ - /* of the array A is not referenced. */ - /* The following program segment will transfer the upper */ - /* triangular part of a hermitian band matrix from conventional */ - /* full matrix storage to band storage: */ - - /* DO 20, J = 1, N */ - /* M = K + 1 - J */ - /* DO 10, I = MAX( 1, J - K ), J */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ - /* by n part of the array A must contain the lower triangular */ - /* band part of the hermitian matrix, supplied column by */ - /* column, with the leading diagonal of the matrix in row 1 of */ - /* the array, the first sub-diagonal starting at position 1 in */ - /* row 2, and so on. The bottom right k by k triangle of the */ - /* array A is not referenced. */ - /* The following program segment will transfer the lower */ - /* triangular part of a hermitian band matrix from conventional */ - /* full matrix storage to band storage: */ - - /* DO 20, J = 1, N */ - /* M = 1 - J */ - /* DO 10, I = J, MIN( N, J + K ) */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Note that the imaginary parts of the diagonal elements need */ - /* not be set and are assumed to be zero. */ - /* Unchanged on exit. */ - - /* LDA - INTEGER. */ - /* On entry, LDA specifies the first dimension of A as declared */ - /* in the calling (sub) program. LDA must be at least */ - /* ( k + 1 ). */ - /* Unchanged on exit. */ - - /* X - COMPLEX array of DIMENSION at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the */ - /* vector x. */ - /* Unchanged on exit. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* BETA - COMPLEX . */ - /* On entry, BETA specifies the scalar beta. */ - /* Unchanged on exit. */ - - /* Y - COMPLEX array of DIMENSION at least */ - /* ( 1 + ( n - 1 )*abs( INCY ) ). */ - /* Before entry, the incremented array Y must contain the */ - /* vector y. On exit, Y is overwritten by the updated vector y. */ - - /* INCY - INTEGER. */ - /* On entry, INCY specifies the increment for the elements of */ - /* Y. INCY must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*k < 0) { - info = 3; - } else if (*lda < *k + 1) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("CHBMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f))) { - return; - } - - /* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - - /* Start the operations. In this version the elements of the array A */ - /* are accessed sequentially with one pass through A. */ - - /* First form y := beta*y. */ - - if (beta->r != 1.f || beta->i != 0.f) { - if (*incy == 1) { - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0.f, y[i__2].i = 0.f; - /* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - /* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0.f, y[i__2].i = 0.f; - iy += *incy; - /* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - iy += *incy; - /* L40: */ - } - } - } - } - if (alpha->r == 0.f && alpha->i == 0.f) { - return; - } - if (lsame_(uplo, "U")) { - /* Form y when upper triangle of A is stored. */ - - kplus1 = *k + 1; - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - l = kplus1 - j; - /* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { - i__2 = i__; - i__3 = i__; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__2 = i__; - q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i = q__3.r * x[i__2].i + q__3.i * x[i__2].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - /* L50: */ - } - i__4 = j; - i__2 = j; - i__3 = kplus1 + j * a_dim1; - r__1 = a[i__3].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__4].r = q__1.r, y[i__4].i = q__1.i; - /* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__4 = jx; - q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - ix = kx; - iy = ky; - l = kplus1 - j; - /* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { - i__4 = iy; - i__2 = iy; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; - q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; - y[i__4].r = q__1.r, y[i__4].i = q__1.i; - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ix += *incx; - iy += *incy; - /* L70: */ - } - i__3 = jy; - i__4 = jy; - i__2 = kplus1 + j * a_dim1; - r__1 = a[i__2].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - jx += *incx; - jy += *incy; - if (j > *k) { - kx += *incx; - ky += *incy; - } - /* L80: */ - } - } - } else { - /* Form y when lower triangle of A is stored. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__3 = j; - q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__3 = j; - i__4 = j; - i__2 = j * a_dim1 + 1; - r__1 = a[i__2].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - l = 1 - j; - /* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4, i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__; - i__2 = i__; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; - q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; - y[i__4].r = q__1.r, y[i__4].i = q__1.i; - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = i__; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - /* L90: */ - } - i__3 = j; - i__4 = j; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - /* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__3 = jx; - q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__3 = jy; - i__4 = jy; - i__2 = j * a_dim1 + 1; - r__1 = a[i__2].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - l = 1 - j; - ix = jx; - iy = jy; - /* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4, i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - ix += *incx; - iy += *incy; - i__4 = iy; - i__2 = iy; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; - q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; - y[i__4].r = q__1.r, y[i__4].i = q__1.i; - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - /* L110: */ - } - i__3 = jy; - i__4 = jy; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - jx += *incx; - jy += *incy; - /* L120: */ - } - } - } - - /* End of CHBMV . */ - -} /* chbmv_ */ diff --git a/blas/f2c/chpmv.c b/blas/f2c/chpmv.c deleted file mode 100644 index 4a38a524c..000000000 --- a/blas/f2c/chpmv.c +++ /dev/null @@ -1,407 +0,0 @@ -/* chpmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -static inline void r_cnjg(complex *r, complex *z) { - r->r = z->r; - r->i = -(z->i); -} - -/* Subroutine */ void chpmv_(char *uplo, integer *n, complex *alpha, complex *ap, complex *x, integer *incx, - complex *beta, complex *y, integer *incy) { - /* System generated locals */ - integer i__1, i__2, i__3, i__4, i__5; - real r__1; - complex q__1, q__2, q__3, q__4; - - /* Local variables */ - integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; - complex temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ void xerbla_(const char *, integer *); - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* CHPMV performs the matrix-vector operation */ - - /* y := alpha*A*x + beta*y, */ - - /* where alpha and beta are scalars, x and y are n element vectors and */ - /* A is an n by n hermitian matrix, supplied in packed form. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the upper or lower */ - /* triangular part of the matrix A is supplied in the packed */ - /* array AP as follows: */ - - /* UPLO = 'U' or 'u' The upper triangular part of A is */ - /* supplied in AP. */ - - /* UPLO = 'L' or 'l' The lower triangular part of A is */ - /* supplied in AP. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* ALPHA - COMPLEX . */ - /* On entry, ALPHA specifies the scalar alpha. */ - /* Unchanged on exit. */ - - /* AP - COMPLEX array of DIMENSION at least */ - /* ( ( n*( n + 1 ) )/2 ). */ - /* Before entry with UPLO = 'U' or 'u', the array AP must */ - /* contain the upper triangular part of the hermitian matrix */ - /* packed sequentially, column by column, so that AP( 1 ) */ - /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ - /* and a( 2, 2 ) respectively, and so on. */ - /* Before entry with UPLO = 'L' or 'l', the array AP must */ - /* contain the lower triangular part of the hermitian matrix */ - /* packed sequentially, column by column, so that AP( 1 ) */ - /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ - /* and a( 3, 1 ) respectively, and so on. */ - /* Note that the imaginary parts of the diagonal elements need */ - /* not be set and are assumed to be zero. */ - /* Unchanged on exit. */ - - /* X - COMPLEX array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the n */ - /* element vector x. */ - /* Unchanged on exit. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* BETA - COMPLEX . */ - /* On entry, BETA specifies the scalar beta. When BETA is */ - /* supplied as zero then Y need not be set on input. */ - /* Unchanged on exit. */ - - /* Y - COMPLEX array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCY ) ). */ - /* Before entry, the incremented array Y must contain the n */ - /* element vector y. On exit, Y is overwritten by the updated */ - /* vector y. */ - - /* INCY - INTEGER. */ - /* On entry, INCY specifies the increment for the elements of */ - /* Y. INCY must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - --y; - --x; - --ap; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 6; - } else if (*incy == 0) { - info = 9; - } - if (info != 0) { - xerbla_("CHPMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && beta->i == 0.f))) { - return; - } - - /* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - - /* Start the operations. In this version the elements of the array AP */ - /* are accessed sequentially with one pass through AP. */ - - /* First form y := beta*y. */ - - if (beta->r != 1.f || beta->i != 0.f) { - if (*incy == 1) { - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0.f, y[i__2].i = 0.f; - /* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - /* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0.f, y[i__2].i = 0.f; - iy += *incy; - /* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, q__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - iy += *incy; - /* L40: */ - } - } - } - } - if (alpha->r == 0.f && alpha->i == 0.f) { - return; - } - kk = 1; - if (lsame_(uplo, "U")) { - /* Form y when AP contains the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - k = kk; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = k; - q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &ap[k]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ++k; - /* L50: */ - } - i__2 = j; - i__3 = j; - i__4 = kk + j - 1; - r__1 = ap[i__4].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - kk += j; - /* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - ix = kx; - iy = ky; - i__2 = kk + j - 2; - for (k = kk; k <= i__2; ++k) { - i__3 = iy; - i__4 = iy; - i__5 = k; - q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &ap[k]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ix += *incx; - iy += *incy; - /* L70: */ - } - i__2 = jy; - i__3 = jy; - i__4 = kk + j - 1; - r__1 = ap[i__4].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - jx += *incx; - jy += *incy; - kk += j; - /* L80: */ - } - } - } else { - /* Form y when AP contains the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__2 = j; - i__3 = j; - i__4 = kk; - r__1 = ap[i__4].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - k = kk + 1; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = k; - q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &ap[k]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ++k; - /* L90: */ - } - i__2 = j; - i__3 = j; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - kk += *n - j + 1; - /* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__2 = jy; - i__3 = jy; - i__4 = kk; - r__1 = ap[i__4].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - ix = jx; - iy = jy; - i__2 = kk + *n - j; - for (k = kk + 1; k <= i__2; ++k) { - ix += *incx; - iy += *incy; - i__3 = iy; - i__4 = iy; - i__5 = k; - q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &ap[k]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - /* L110: */ - } - i__2 = jy; - i__3 = jy; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - jx += *incx; - jy += *incy; - kk += *n - j + 1; - /* L120: */ - } - } - } - - /* End of CHPMV . */ - -} /* chpmv_ */ diff --git a/blas/f2c/complexdots.c b/blas/f2c/complexdots.c deleted file mode 100644 index b0dc6c89f..000000000 --- a/blas/f2c/complexdots.c +++ /dev/null @@ -1,73 +0,0 @@ -/* This file has been modified to use the standard gfortran calling - convention, rather than the f2c calling convention. - - It does not require -ff2c when compiled with gfortran. -*/ - -/* complexdots.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -complex cdotc_(integer *n, complex *cx, integer *incx, complex *cy, integer *incy) { - complex res; - extern /* Subroutine */ void cdotcw_(integer *, complex *, integer *, complex *, integer *, complex *); - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - cdotcw_(n, &cx[1], incx, &cy[1], incy, &res); - return res; -} /* cdotc_ */ - -complex cdotu_(integer *n, complex *cx, integer *incx, complex *cy, integer *incy) { - complex res; - extern /* Subroutine */ void cdotuw_(integer *, complex *, integer *, complex *, integer *, complex *); - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - cdotuw_(n, &cx[1], incx, &cy[1], incy, &res); - return res; -} /* cdotu_ */ - -doublecomplex zdotc_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy) { - doublecomplex res; - extern /* Subroutine */ void zdotcw_(integer *, doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *); - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - zdotcw_(n, &cx[1], incx, &cy[1], incy, &res); - return res; -} /* zdotc_ */ - -doublecomplex zdotu_(integer *n, doublecomplex *cx, integer *incx, doublecomplex *cy, integer *incy) { - doublecomplex res; - extern /* Subroutine */ void zdotuw_(integer *, doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *); - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - zdotuw_(n, &cx[1], incx, &cy[1], incy, &res); - return res; -} /* zdotu_ */ diff --git a/blas/f2c/ctbmv.c b/blas/f2c/ctbmv.c deleted file mode 100644 index 2c0ce9bbf..000000000 --- a/blas/f2c/ctbmv.c +++ /dev/null @@ -1,586 +0,0 @@ -/* ctbmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -static inline void r_cnjg(complex *r, complex *z) { - r->r = z->r; - r->i = -(z->i); -} - -/* Subroutine */ void ctbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, complex *a, integer *lda, - complex *x, integer *incx) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1, q__2, q__3; - - /* Local variables */ - integer i__, j, l, ix, jx, kx, info; - complex temp; - extern logical lsame_(char *, char *); - integer kplus1; - extern /* Subroutine */ void xerbla_(const char *, integer *); - logical noconj, nounit; - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* CTBMV performs one of the matrix-vector operations */ - - /* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ - - /* where x is an n element vector and A is an n by n unit, or non-unit, */ - /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the matrix is an upper or */ - /* lower triangular matrix as follows: */ - - /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ - - /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ - - /* Unchanged on exit. */ - - /* TRANS - CHARACTER*1. */ - /* On entry, TRANS specifies the operation to be performed as */ - /* follows: */ - - /* TRANS = 'N' or 'n' x := A*x. */ - - /* TRANS = 'T' or 't' x := A'*x. */ - - /* TRANS = 'C' or 'c' x := conjg( A' )*x. */ - - /* Unchanged on exit. */ - - /* DIAG - CHARACTER*1. */ - /* On entry, DIAG specifies whether or not A is unit */ - /* triangular as follows: */ - - /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ - - /* DIAG = 'N' or 'n' A is not assumed to be unit */ - /* triangular. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* K - INTEGER. */ - /* On entry with UPLO = 'U' or 'u', K specifies the number of */ - /* super-diagonals of the matrix A. */ - /* On entry with UPLO = 'L' or 'l', K specifies the number of */ - /* sub-diagonals of the matrix A. */ - /* K must satisfy 0 .le. K. */ - /* Unchanged on exit. */ - - /* A - COMPLEX array of DIMENSION ( LDA, n ). */ - /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ - /* by n part of the array A must contain the upper triangular */ - /* band part of the matrix of coefficients, supplied column by */ - /* column, with the leading diagonal of the matrix in row */ - /* ( k + 1 ) of the array, the first super-diagonal starting at */ - /* position 2 in row k, and so on. The top left k by k triangle */ - /* of the array A is not referenced. */ - /* The following program segment will transfer an upper */ - /* triangular band matrix from conventional full matrix storage */ - /* to band storage: */ - - /* DO 20, J = 1, N */ - /* M = K + 1 - J */ - /* DO 10, I = MAX( 1, J - K ), J */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ - /* by n part of the array A must contain the lower triangular */ - /* band part of the matrix of coefficients, supplied column by */ - /* column, with the leading diagonal of the matrix in row 1 of */ - /* the array, the first sub-diagonal starting at position 1 in */ - /* row 2, and so on. The bottom right k by k triangle of the */ - /* array A is not referenced. */ - /* The following program segment will transfer a lower */ - /* triangular band matrix from conventional full matrix storage */ - /* to band storage: */ - - /* DO 20, J = 1, N */ - /* M = 1 - J */ - /* DO 10, I = J, MIN( N, J + K ) */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Note that when DIAG = 'U' or 'u' the elements of the array A */ - /* corresponding to the diagonal elements of the matrix are not */ - /* referenced, but are assumed to be unity. */ - /* Unchanged on exit. */ - - /* LDA - INTEGER. */ - /* On entry, LDA specifies the first dimension of A as declared */ - /* in the calling (sub) program. LDA must be at least */ - /* ( k + 1 ). */ - /* Unchanged on exit. */ - - /* X - COMPLEX array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the n */ - /* element vector x. On exit, X is overwritten with the */ - /* transformed vector x. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) { - info = 2; - } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < *k + 1) { - info = 7; - } else if (*incx == 0) { - info = 9; - } - if (info != 0) { - xerbla_("CTBMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0) { - return; - } - - noconj = lsame_(trans, "T"); - nounit = lsame_(diag, "N"); - - /* Set up the start point in X if the increment is not unity. This */ - /* will be ( N - 1 )*INCX too small for descending loops. */ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - - /* Start the operations. In this version the elements of A are */ - /* accessed sequentially with one pass through A. */ - - if (lsame_(trans, "N")) { - /* Form x := A*x. */ - - if (lsame_(uplo, "U")) { - kplus1 = *k + 1; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - if (x[i__2].r != 0.f || x[i__2].i != 0.f) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - l = kplus1 - j; - /* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { - i__2 = i__; - i__3 = i__; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, q__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; - q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + q__2.i; - x[i__2].r = q__1.r, x[i__2].i = q__1.i; - /* L10: */ - } - if (nounit) { - i__4 = j; - i__2 = j; - i__3 = kplus1 + j * a_dim1; - q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[i__3].i, - q__1.i = x[i__2].r * a[i__3].i + x[i__2].i * a[i__3].r; - x[i__4].r = q__1.r, x[i__4].i = q__1.i; - } - } - /* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__4 = jx; - if (x[i__4].r != 0.f || x[i__4].i != 0.f) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - ix = kx; - l = kplus1 - j; - /* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { - i__4 = ix; - i__2 = ix; - i__5 = l + i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, q__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; - q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + q__2.i; - x[i__4].r = q__1.r, x[i__4].i = q__1.i; - ix += *incx; - /* L30: */ - } - if (nounit) { - i__3 = jx; - i__4 = jx; - i__2 = kplus1 + j * a_dim1; - q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[i__2].i, - q__1.i = x[i__4].r * a[i__2].i + x[i__4].i * a[i__2].r; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } - } - jx += *incx; - if (j > *k) { - kx += *incx; - } - /* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - if (x[i__1].r != 0.f || x[i__1].i != 0.f) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - l = 1 - j; - /* Computing MIN */ - i__1 = *n, i__3 = j + *k; - i__4 = j + 1; - for (i__ = min(i__1, i__3); i__ >= i__4; --i__) { - i__1 = i__; - i__3 = i__; - i__2 = l + i__ + j * a_dim1; - q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, q__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r; - q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + q__2.i; - x[i__1].r = q__1.r, x[i__1].i = q__1.i; - /* L50: */ - } - if (nounit) { - i__4 = j; - i__1 = j; - i__3 = j * a_dim1 + 1; - q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[i__3].i, - q__1.i = x[i__1].r * a[i__3].i + x[i__1].i * a[i__3].r; - x[i__4].r = q__1.r, x[i__4].i = q__1.i; - } - } - /* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__4 = jx; - if (x[i__4].r != 0.f || x[i__4].i != 0.f) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - ix = kx; - l = 1 - j; - /* Computing MIN */ - i__4 = *n, i__1 = j + *k; - i__3 = j + 1; - for (i__ = min(i__4, i__1); i__ >= i__3; --i__) { - i__4 = ix; - i__1 = ix; - i__2 = l + i__ + j * a_dim1; - q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, q__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r; - q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + q__2.i; - x[i__4].r = q__1.r, x[i__4].i = q__1.i; - ix -= *incx; - /* L70: */ - } - if (nounit) { - i__3 = jx; - i__4 = jx; - i__1 = j * a_dim1 + 1; - q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[i__1].i, - q__1.i = x[i__4].r * a[i__1].i + x[i__4].i * a[i__1].r; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } - } - jx -= *incx; - if (*n - j >= *k) { - kx -= *incx; - } - /* L80: */ - } - } - } - } else { - /* Form x := A'*x or x := conjg( A' )*x. */ - - if (lsame_(uplo, "U")) { - kplus1 = *k + 1; - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__3 = j; - temp.r = x[i__3].r, temp.i = x[i__3].i; - l = kplus1 - j; - if (noconj) { - if (nounit) { - i__3 = kplus1 + j * a_dim1; - q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, q__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - i__4 = l + i__ + j * a_dim1; - i__1 = i__; - q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i, - q__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - /* L90: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = i__; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - /* L100: */ - } - } - i__3 = j; - x[i__3].r = temp.r, x[i__3].i = temp.i; - /* L110: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__3 = jx; - temp.r = x[i__3].r, temp.i = x[i__3].i; - kx -= *incx; - ix = kx; - l = kplus1 - j; - if (noconj) { - if (nounit) { - i__3 = kplus1 + j * a_dim1; - q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, q__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - i__4 = l + i__ + j * a_dim1; - i__1 = ix; - q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i, - q__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix -= *incx; - /* L120: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = q__3.r * x[i__4].i + q__3.i * x[i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix -= *incx; - /* L130: */ - } - } - i__3 = jx; - x[i__3].r = temp.r, x[i__3].i = temp.i; - jx -= *incx; - /* L140: */ - } - } - } else { - if (*incx == 1) { - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - i__4 = j; - temp.r = x[i__4].r, temp.i = x[i__4].i; - l = 1 - j; - if (noconj) { - if (nounit) { - i__4 = j * a_dim1 + 1; - q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, q__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r; - temp.r = q__1.r, temp.i = q__1.i; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - i__1 = l + i__ + j * a_dim1; - i__2 = i__; - q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i, - q__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - /* L150: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[j * a_dim1 + 1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__1 = i__; - q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, q__2.i = q__3.r * x[i__1].i + q__3.i * x[i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - /* L160: */ - } - } - i__4 = j; - x[i__4].r = temp.r, x[i__4].i = temp.i; - /* L170: */ - } - } else { - jx = kx; - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - kx += *incx; - ix = kx; - l = 1 - j; - if (noconj) { - if (nounit) { - i__4 = j * a_dim1 + 1; - q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, q__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r; - temp.r = q__1.r, temp.i = q__1.i; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - i__1 = l + i__ + j * a_dim1; - i__2 = ix; - q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i, - q__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix += *incx; - /* L180: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[j * a_dim1 + 1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, q__1.i = temp.r * q__2.i + temp.i * q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); - i__1 = ix; - q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, q__2.i = q__3.r * x[i__1].i + q__3.i * x[i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix += *incx; - /* L190: */ - } - } - i__4 = jx; - x[i__4].r = temp.r, x[i__4].i = temp.i; - jx += *incx; - /* L200: */ - } - } - } - } - - /* End of CTBMV . */ - -} /* ctbmv_ */ diff --git a/blas/f2c/datatypes.h b/blas/f2c/datatypes.h deleted file mode 100644 index 45b108dbd..000000000 --- a/blas/f2c/datatypes.h +++ /dev/null @@ -1,27 +0,0 @@ -/* This contains a limited subset of the typedefs exposed by f2c - for use by the Eigen BLAS C-only implementation. -*/ - -#ifndef __EIGEN_DATATYPES_H__ -#define __EIGEN_DATATYPES_H__ - -typedef int integer; -typedef unsigned int uinteger; -typedef float real; -typedef double doublereal; -typedef struct { - real r, i; -} complex; -typedef struct { - doublereal r, i; -} doublecomplex; -typedef int logical; - -#define abs(x) ((x) >= 0 ? (x) : -(x)) -#define dabs(x) (doublereal) abs(x) -#define min(a, b) ((a) <= (b) ? (a) : (b)) -#define max(a, b) ((a) >= (b) ? (a) : (b)) -#define dmin(a, b) (doublereal) min(a, b) -#define dmax(a, b) (doublereal) max(a, b) - -#endif diff --git a/blas/f2c/drotm.c b/blas/f2c/drotm.c deleted file mode 100644 index da823177c..000000000 --- a/blas/f2c/drotm.c +++ /dev/null @@ -1,213 +0,0 @@ -/* drotm.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void drotm_(integer *n, doublereal *dx, integer *incx, doublereal *dy, integer *incy, - doublereal *dparam) { - /* Initialized data */ - - static doublereal zero = 0.; - static doublereal two = 2.; - - /* System generated locals */ - integer i__1, i__2; - - /* Local variables */ - integer i__; - doublereal w, z__; - integer kx, ky; - doublereal dh11, dh12, dh21, dh22, dflag; - integer nsteps; - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ - - /* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */ - /* (DY**T) */ - - /* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ - /* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */ - /* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ - - /* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ - - /* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ - /* H=( ) ( ) ( ) ( ) */ - /* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ - /* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */ - - /* Arguments */ - /* ========= */ - - /* N (input) INTEGER */ - /* number of elements in input vector(s) */ - - /* DX (input/output) DOUBLE PRECISION array, dimension N */ - /* double precision vector with N elements */ - - /* INCX (input) INTEGER */ - /* storage spacing between elements of DX */ - - /* DY (input/output) DOUBLE PRECISION array, dimension N */ - /* double precision vector with N elements */ - - /* INCY (input) INTEGER */ - /* storage spacing between elements of DY */ - - /* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ - /* DPARAM(1)=DFLAG */ - /* DPARAM(2)=DH11 */ - /* DPARAM(3)=DH21 */ - /* DPARAM(4)=DH12 */ - /* DPARAM(5)=DH22 */ - - /* ===================================================================== */ - - /* .. Local Scalars .. */ - /* .. */ - /* .. Data statements .. */ - /* Parameter adjustments */ - --dparam; - --dy; - --dx; - - /* Function Body */ - /* .. */ - - dflag = dparam[1]; - if (*n <= 0 || dflag + two == zero) { - goto L140; - } - if (!(*incx == *incy && *incx > 0)) { - goto L70; - } - - nsteps = *n * *incx; - if (dflag < 0.) { - goto L50; - } else if (dflag == 0) { - goto L10; - } else { - goto L30; - } -L10: - dh12 = dparam[4]; - dh21 = dparam[3]; - i__1 = nsteps; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w + z__ * dh12; - dy[i__] = w * dh21 + z__; - /* L20: */ - } - goto L140; -L30: - dh11 = dparam[2]; - dh22 = dparam[5]; - i__2 = nsteps; - i__1 = *incx; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w * dh11 + z__; - dy[i__] = -w + dh22 * z__; - /* L40: */ - } - goto L140; -L50: - dh11 = dparam[2]; - dh12 = dparam[4]; - dh21 = dparam[3]; - dh22 = dparam[5]; - i__1 = nsteps; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = dx[i__]; - z__ = dy[i__]; - dx[i__] = w * dh11 + z__ * dh12; - dy[i__] = w * dh21 + z__ * dh22; - /* L60: */ - } - goto L140; -L70: - kx = 1; - ky = 1; - if (*incx < 0) { - kx = (1 - *n) * *incx + 1; - } - if (*incy < 0) { - ky = (1 - *n) * *incy + 1; - } - - if (dflag < 0.) { - goto L120; - } else if (dflag == 0) { - goto L80; - } else { - goto L100; - } -L80: - dh12 = dparam[4]; - dh21 = dparam[3]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w + z__ * dh12; - dy[ky] = w * dh21 + z__; - kx += *incx; - ky += *incy; - /* L90: */ - } - goto L140; -L100: - dh11 = dparam[2]; - dh22 = dparam[5]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w * dh11 + z__; - dy[ky] = -w + dh22 * z__; - kx += *incx; - ky += *incy; - /* L110: */ - } - goto L140; -L120: - dh11 = dparam[2]; - dh12 = dparam[4]; - dh21 = dparam[3]; - dh22 = dparam[5]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = dx[kx]; - z__ = dy[ky]; - dx[kx] = w * dh11 + z__ * dh12; - dy[ky] = w * dh21 + z__ * dh22; - kx += *incx; - ky += *incy; - /* L130: */ - } -L140: - return; -} /* drotm_ */ diff --git a/blas/f2c/drotmg.c b/blas/f2c/drotmg.c deleted file mode 100644 index 81233cbdd..000000000 --- a/blas/f2c/drotmg.c +++ /dev/null @@ -1,293 +0,0 @@ -/* drotmg.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void drotmg_(doublereal *dd1, doublereal *dd2, doublereal *dx1, doublereal *dy1, doublereal *dparam) { - /* Initialized data */ - - static doublereal zero = 0.; - static doublereal one = 1.; - static doublereal two = 2.; - static doublereal gam = 4096.; - static doublereal gamsq = 16777216.; - static doublereal rgamsq = 5.9604645e-8; - - /* Format strings */ - static char fmt_120[] = ""; - static char fmt_150[] = ""; - static char fmt_180[] = ""; - static char fmt_210[] = ""; - - /* System generated locals */ - doublereal d__1; - - /* Local variables */ - doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22; - integer igo; - doublereal dflag, dtemp; - - /* Assigned format variables */ - static char *igo_fmt; - (void)igo_fmt; - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ - /* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* */ - /* DY2)**T. */ - /* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ - - /* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ - - /* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ - /* H=( ) ( ) ( ) ( ) */ - /* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ - /* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */ - /* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */ - /* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */ - - /* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ - /* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ - /* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ - - /* Arguments */ - /* ========= */ - - /* DD1 (input/output) DOUBLE PRECISION */ - - /* DD2 (input/output) DOUBLE PRECISION */ - - /* DX1 (input/output) DOUBLE PRECISION */ - - /* DY1 (input) DOUBLE PRECISION */ - - /* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ - /* DPARAM(1)=DFLAG */ - /* DPARAM(2)=DH11 */ - /* DPARAM(3)=DH21 */ - /* DPARAM(4)=DH12 */ - /* DPARAM(5)=DH22 */ - - /* ===================================================================== */ - - /* .. Local Scalars .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - /* .. Data statements .. */ - - /* Parameter adjustments */ - --dparam; - - /* Function Body */ - /* .. */ - if (!(*dd1 < zero)) { - goto L10; - } - /* GO ZERO-H-D-AND-DX1.. */ - goto L60; -L10: - /* CASE-DD1-NONNEGATIVE */ - dp2 = *dd2 * *dy1; - if (!(dp2 == zero)) { - goto L20; - } - dflag = -two; - goto L260; -/* REGULAR-CASE.. */ -L20: - dp1 = *dd1 * *dx1; - dq2 = dp2 * *dy1; - dq1 = dp1 * *dx1; - - if (!(abs(dq1) > abs(dq2))) { - goto L40; - } - dh21 = -(*dy1) / *dx1; - dh12 = dp2 / dp1; - - du = one - dh12 * dh21; - - if (!(du <= zero)) { - goto L30; - } - /* GO ZERO-H-D-AND-DX1.. */ - goto L60; -L30: - dflag = zero; - *dd1 /= du; - *dd2 /= du; - *dx1 *= du; - /* GO SCALE-CHECK.. */ - goto L100; -L40: - if (!(dq2 < zero)) { - goto L50; - } - /* GO ZERO-H-D-AND-DX1.. */ - goto L60; -L50: - dflag = one; - dh11 = dp1 / dp2; - dh22 = *dx1 / *dy1; - du = one + dh11 * dh22; - dtemp = *dd2 / du; - *dd2 = *dd1 / du; - *dd1 = dtemp; - *dx1 = *dy1 * du; - /* GO SCALE-CHECK */ - goto L100; -/* PROCEDURE..ZERO-H-D-AND-DX1.. */ -L60: - dflag = -one; - dh11 = zero; - dh12 = zero; - dh21 = zero; - dh22 = zero; - - *dd1 = zero; - *dd2 = zero; - *dx1 = zero; - /* RETURN.. */ - goto L220; -/* PROCEDURE..FIX-H.. */ -L70: - if (!(dflag >= zero)) { - goto L90; - } - - if (!(dflag == zero)) { - goto L80; - } - dh11 = one; - dh22 = one; - dflag = -one; - goto L90; -L80: - dh21 = -one; - dh12 = one; - dflag = -one; -L90: - switch (igo) { - case 0: - goto L120; - case 1: - goto L150; - case 2: - goto L180; - case 3: - goto L210; - } -/* PROCEDURE..SCALE-CHECK */ -L100: -L110: - if (!(*dd1 <= rgamsq)) { - goto L130; - } - if (*dd1 == zero) { - goto L160; - } - igo = 0; - igo_fmt = fmt_120; - /* FIX-H.. */ - goto L70; -L120: - /* Computing 2nd power */ - d__1 = gam; - *dd1 *= d__1 * d__1; - *dx1 /= gam; - dh11 /= gam; - dh12 /= gam; - goto L110; -L130: -L140: - if (!(*dd1 >= gamsq)) { - goto L160; - } - igo = 1; - igo_fmt = fmt_150; - /* FIX-H.. */ - goto L70; -L150: - /* Computing 2nd power */ - d__1 = gam; - *dd1 /= d__1 * d__1; - *dx1 *= gam; - dh11 *= gam; - dh12 *= gam; - goto L140; -L160: -L170: - if (!(abs(*dd2) <= rgamsq)) { - goto L190; - } - if (*dd2 == zero) { - goto L220; - } - igo = 2; - igo_fmt = fmt_180; - /* FIX-H.. */ - goto L70; -L180: - /* Computing 2nd power */ - d__1 = gam; - *dd2 *= d__1 * d__1; - dh21 /= gam; - dh22 /= gam; - goto L170; -L190: -L200: - if (!(abs(*dd2) >= gamsq)) { - goto L220; - } - igo = 3; - igo_fmt = fmt_210; - /* FIX-H.. */ - goto L70; -L210: - /* Computing 2nd power */ - d__1 = gam; - *dd2 /= d__1 * d__1; - dh21 *= gam; - dh22 *= gam; - goto L200; -L220: - if (dflag < 0.) { - goto L250; - } else if (dflag == 0) { - goto L230; - } else { - goto L240; - } -L230: - dparam[3] = dh21; - dparam[4] = dh12; - goto L260; -L240: - dparam[2] = dh11; - dparam[5] = dh22; - goto L260; -L250: - dparam[2] = dh11; - dparam[3] = dh21; - dparam[4] = dh12; - dparam[5] = dh22; -L260: - dparam[1] = dflag; -} /* drotmg_ */ diff --git a/blas/f2c/dsbmv.c b/blas/f2c/dsbmv.c deleted file mode 100644 index 4619369ff..000000000 --- a/blas/f2c/dsbmv.c +++ /dev/null @@ -1,356 +0,0 @@ -/* dsbmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void dsbmv_(char *uplo, integer *n, integer *k, doublereal *alpha, doublereal *a, integer *lda, - doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - integer i__, j, l, ix, iy, jx, jy, kx, ky, info; - doublereal temp1, temp2; - extern logical lsame_(char *, char *); - integer kplus1; - extern /* Subroutine */ void xerbla_(const char *, integer *); - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* DSBMV performs the matrix-vector operation */ - - /* y := alpha*A*x + beta*y, */ - - /* where alpha and beta are scalars, x and y are n element vectors and */ - /* A is an n by n symmetric band matrix, with k super-diagonals. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the upper or lower */ - /* triangular part of the band matrix A is being supplied as */ - /* follows: */ - - /* UPLO = 'U' or 'u' The upper triangular part of A is */ - /* being supplied. */ - - /* UPLO = 'L' or 'l' The lower triangular part of A is */ - /* being supplied. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* K - INTEGER. */ - /* On entry, K specifies the number of super-diagonals of the */ - /* matrix A. K must satisfy 0 .le. K. */ - /* Unchanged on exit. */ - - /* ALPHA - DOUBLE PRECISION. */ - /* On entry, ALPHA specifies the scalar alpha. */ - /* Unchanged on exit. */ - - /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ - /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ - /* by n part of the array A must contain the upper triangular */ - /* band part of the symmetric matrix, supplied column by */ - /* column, with the leading diagonal of the matrix in row */ - /* ( k + 1 ) of the array, the first super-diagonal starting at */ - /* position 2 in row k, and so on. The top left k by k triangle */ - /* of the array A is not referenced. */ - /* The following program segment will transfer the upper */ - /* triangular part of a symmetric band matrix from conventional */ - /* full matrix storage to band storage: */ - - /* DO 20, J = 1, N */ - /* M = K + 1 - J */ - /* DO 10, I = MAX( 1, J - K ), J */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ - /* by n part of the array A must contain the lower triangular */ - /* band part of the symmetric matrix, supplied column by */ - /* column, with the leading diagonal of the matrix in row 1 of */ - /* the array, the first sub-diagonal starting at position 1 in */ - /* row 2, and so on. The bottom right k by k triangle of the */ - /* array A is not referenced. */ - /* The following program segment will transfer the lower */ - /* triangular part of a symmetric band matrix from conventional */ - /* full matrix storage to band storage: */ - - /* DO 20, J = 1, N */ - /* M = 1 - J */ - /* DO 10, I = J, MIN( N, J + K ) */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Unchanged on exit. */ - - /* LDA - INTEGER. */ - /* On entry, LDA specifies the first dimension of A as declared */ - /* in the calling (sub) program. LDA must be at least */ - /* ( k + 1 ). */ - /* Unchanged on exit. */ - - /* X - DOUBLE PRECISION array of DIMENSION at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the */ - /* vector x. */ - /* Unchanged on exit. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* BETA - DOUBLE PRECISION. */ - /* On entry, BETA specifies the scalar beta. */ - /* Unchanged on exit. */ - - /* Y - DOUBLE PRECISION array of DIMENSION at least */ - /* ( 1 + ( n - 1 )*abs( INCY ) ). */ - /* Before entry, the incremented array Y must contain the */ - /* vector y. On exit, Y is overwritten by the updated vector y. */ - - /* INCY - INTEGER. */ - /* On entry, INCY specifies the increment for the elements of */ - /* Y. INCY must not be zero. */ - /* Unchanged on exit. */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*k < 0) { - info = 3; - } else if (*lda < *k + 1) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("DSBMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0. && *beta == 1.)) { - return; - } - - /* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - - /* Start the operations. In this version the elements of the array A */ - /* are accessed sequentially with one pass through A. */ - - /* First form y := beta*y. */ - - if (*beta != 1.) { - if (*incy == 1) { - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.; - /* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; - /* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.; - iy += *incy; - /* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; - /* L40: */ - } - } - } - } - if (*alpha == 0.) { - return; - } - if (lsame_(uplo, "U")) { - /* Form y when upper triangle of A is stored. */ - - kplus1 = *k + 1; - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - l = kplus1 - j; - /* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { - y[i__] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[i__]; - /* L50: */ - } - y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; - /* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - ix = kx; - iy = ky; - l = kplus1 - j; - /* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { - y[iy] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[ix]; - ix += *incx; - iy += *incy; - /* L70: */ - } - y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; - jx += *incx; - jy += *incy; - if (j > *k) { - kx += *incx; - ky += *incy; - } - /* L80: */ - } - } - } else { - /* Form y when lower triangle of A is stored. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - y[j] += temp1 * a[j * a_dim1 + 1]; - l = 1 - j; - /* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4, i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - y[i__] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[i__]; - /* L90: */ - } - y[j] += *alpha * temp2; - /* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - y[jy] += temp1 * a[j * a_dim1 + 1]; - l = 1 - j; - ix = jx; - iy = jy; - /* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4, i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[ix]; - /* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; - /* L120: */ - } - } - } - - /* End of DSBMV . */ - -} /* dsbmv_ */ diff --git a/blas/f2c/dspmv.c b/blas/f2c/dspmv.c deleted file mode 100644 index 314d67b8c..000000000 --- a/blas/f2c/dspmv.c +++ /dev/null @@ -1,308 +0,0 @@ -/* dspmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void dspmv_(char *uplo, integer *n, doublereal *alpha, doublereal *ap, doublereal *x, integer *incx, - doublereal *beta, doublereal *y, integer *incy) { - /* System generated locals */ - integer i__1, i__2; - - /* Local variables */ - integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; - doublereal temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ void xerbla_(const char *, integer *); - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* DSPMV performs the matrix-vector operation */ - - /* y := alpha*A*x + beta*y, */ - - /* where alpha and beta are scalars, x and y are n element vectors and */ - /* A is an n by n symmetric matrix, supplied in packed form. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the upper or lower */ - /* triangular part of the matrix A is supplied in the packed */ - /* array AP as follows: */ - - /* UPLO = 'U' or 'u' The upper triangular part of A is */ - /* supplied in AP. */ - - /* UPLO = 'L' or 'l' The lower triangular part of A is */ - /* supplied in AP. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* ALPHA - DOUBLE PRECISION. */ - /* On entry, ALPHA specifies the scalar alpha. */ - /* Unchanged on exit. */ - - /* AP - DOUBLE PRECISION array of DIMENSION at least */ - /* ( ( n*( n + 1 ) )/2 ). */ - /* Before entry with UPLO = 'U' or 'u', the array AP must */ - /* contain the upper triangular part of the symmetric matrix */ - /* packed sequentially, column by column, so that AP( 1 ) */ - /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ - /* and a( 2, 2 ) respectively, and so on. */ - /* Before entry with UPLO = 'L' or 'l', the array AP must */ - /* contain the lower triangular part of the symmetric matrix */ - /* packed sequentially, column by column, so that AP( 1 ) */ - /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ - /* and a( 3, 1 ) respectively, and so on. */ - /* Unchanged on exit. */ - - /* X - DOUBLE PRECISION array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the n */ - /* element vector x. */ - /* Unchanged on exit. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* BETA - DOUBLE PRECISION. */ - /* On entry, BETA specifies the scalar beta. When BETA is */ - /* supplied as zero then Y need not be set on input. */ - /* Unchanged on exit. */ - - /* Y - DOUBLE PRECISION array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCY ) ). */ - /* Before entry, the incremented array Y must contain the n */ - /* element vector y. On exit, Y is overwritten by the updated */ - /* vector y. */ - - /* INCY - INTEGER. */ - /* On entry, INCY specifies the increment for the elements of */ - /* Y. INCY must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - --y; - --x; - --ap; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 6; - } else if (*incy == 0) { - info = 9; - } - if (info != 0) { - xerbla_("DSPMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0. && *beta == 1.)) { - return; - } - - /* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - - /* Start the operations. In this version the elements of the array AP */ - /* are accessed sequentially with one pass through AP. */ - - /* First form y := beta*y. */ - - if (*beta != 1.) { - if (*incy == 1) { - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.; - /* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; - /* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.; - iy += *incy; - /* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; - /* L40: */ - } - } - } - } - if (*alpha == 0.) { - return; - } - kk = 1; - if (lsame_(uplo, "U")) { - /* Form y when AP contains the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - k = kk; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * ap[k]; - temp2 += ap[k] * x[i__]; - ++k; - /* L50: */ - } - y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; - kk += j; - /* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - ix = kx; - iy = ky; - i__2 = kk + j - 2; - for (k = kk; k <= i__2; ++k) { - y[iy] += temp1 * ap[k]; - temp2 += ap[k] * x[ix]; - ix += *incx; - iy += *incy; - /* L70: */ - } - y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; - jx += *incx; - jy += *incy; - kk += j; - /* L80: */ - } - } - } else { - /* Form y when AP contains the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - y[j] += temp1 * ap[kk]; - k = kk + 1; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * ap[k]; - temp2 += ap[k] * x[i__]; - ++k; - /* L90: */ - } - y[j] += *alpha * temp2; - kk += *n - j + 1; - /* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - y[jy] += temp1 * ap[kk]; - ix = jx; - iy = jy; - i__2 = kk + *n - j; - for (k = kk + 1; k <= i__2; ++k) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * ap[k]; - temp2 += ap[k] * x[ix]; - /* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; - kk += *n - j + 1; - /* L120: */ - } - } - } - - /* End of DSPMV . */ - -} /* dspmv_ */ diff --git a/blas/f2c/dtbmv.c b/blas/f2c/dtbmv.c deleted file mode 100644 index 92a0aeca6..000000000 --- a/blas/f2c/dtbmv.c +++ /dev/null @@ -1,417 +0,0 @@ -/* dtbmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void dtbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublereal *a, integer *lda, - doublereal *x, integer *incx) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - integer i__, j, l, ix, jx, kx, info; - doublereal temp; - extern logical lsame_(char *, char *); - integer kplus1; - extern /* Subroutine */ void xerbla_(const char *, integer *); - logical nounit; - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* DTBMV performs one of the matrix-vector operations */ - - /* x := A*x, or x := A'*x, */ - - /* where x is an n element vector and A is an n by n unit, or non-unit, */ - /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the matrix is an upper or */ - /* lower triangular matrix as follows: */ - - /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ - - /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ - - /* Unchanged on exit. */ - - /* TRANS - CHARACTER*1. */ - /* On entry, TRANS specifies the operation to be performed as */ - /* follows: */ - - /* TRANS = 'N' or 'n' x := A*x. */ - - /* TRANS = 'T' or 't' x := A'*x. */ - - /* TRANS = 'C' or 'c' x := A'*x. */ - - /* Unchanged on exit. */ - - /* DIAG - CHARACTER*1. */ - /* On entry, DIAG specifies whether or not A is unit */ - /* triangular as follows: */ - - /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ - - /* DIAG = 'N' or 'n' A is not assumed to be unit */ - /* triangular. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* K - INTEGER. */ - /* On entry with UPLO = 'U' or 'u', K specifies the number of */ - /* super-diagonals of the matrix A. */ - /* On entry with UPLO = 'L' or 'l', K specifies the number of */ - /* sub-diagonals of the matrix A. */ - /* K must satisfy 0 .le. K. */ - /* Unchanged on exit. */ - - /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ - /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ - /* by n part of the array A must contain the upper triangular */ - /* band part of the matrix of coefficients, supplied column by */ - /* column, with the leading diagonal of the matrix in row */ - /* ( k + 1 ) of the array, the first super-diagonal starting at */ - /* position 2 in row k, and so on. The top left k by k triangle */ - /* of the array A is not referenced. */ - /* The following program segment will transfer an upper */ - /* triangular band matrix from conventional full matrix storage */ - /* to band storage: */ - - /* DO 20, J = 1, N */ - /* M = K + 1 - J */ - /* DO 10, I = MAX( 1, J - K ), J */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ - /* by n part of the array A must contain the lower triangular */ - /* band part of the matrix of coefficients, supplied column by */ - /* column, with the leading diagonal of the matrix in row 1 of */ - /* the array, the first sub-diagonal starting at position 1 in */ - /* row 2, and so on. The bottom right k by k triangle of the */ - /* array A is not referenced. */ - /* The following program segment will transfer a lower */ - /* triangular band matrix from conventional full matrix storage */ - /* to band storage: */ - - /* DO 20, J = 1, N */ - /* M = 1 - J */ - /* DO 10, I = J, MIN( N, J + K ) */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Note that when DIAG = 'U' or 'u' the elements of the array A */ - /* corresponding to the diagonal elements of the matrix are not */ - /* referenced, but are assumed to be unity. */ - /* Unchanged on exit. */ - - /* LDA - INTEGER. */ - /* On entry, LDA specifies the first dimension of A as declared */ - /* in the calling (sub) program. LDA must be at least */ - /* ( k + 1 ). */ - /* Unchanged on exit. */ - - /* X - DOUBLE PRECISION array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the n */ - /* element vector x. On exit, X is overwritten with the */ - /* transformed vector x. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) { - info = 2; - } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < *k + 1) { - info = 7; - } else if (*incx == 0) { - info = 9; - } - if (info != 0) { - xerbla_("DTBMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0) { - return; - } - - nounit = lsame_(diag, "N"); - - /* Set up the start point in X if the increment is not unity. This */ - /* will be ( N - 1 )*INCX too small for descending loops. */ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - - /* Start the operations. In this version the elements of A are */ - /* accessed sequentially with one pass through A. */ - - if (lsame_(trans, "N")) { - /* Form x := A*x. */ - - if (lsame_(uplo, "U")) { - kplus1 = *k + 1; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0.) { - temp = x[j]; - l = kplus1 - j; - /* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { - x[i__] += temp * a[l + i__ + j * a_dim1]; - /* L10: */ - } - if (nounit) { - x[j] *= a[kplus1 + j * a_dim1]; - } - } - /* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.) { - temp = x[jx]; - ix = kx; - l = kplus1 - j; - /* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { - x[ix] += temp * a[l + i__ + j * a_dim1]; - ix += *incx; - /* L30: */ - } - if (nounit) { - x[jx] *= a[kplus1 + j * a_dim1]; - } - } - jx += *incx; - if (j > *k) { - kx += *incx; - } - /* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - if (x[j] != 0.) { - temp = x[j]; - l = 1 - j; - /* Computing MIN */ - i__1 = *n, i__3 = j + *k; - i__4 = j + 1; - for (i__ = min(i__1, i__3); i__ >= i__4; --i__) { - x[i__] += temp * a[l + i__ + j * a_dim1]; - /* L50: */ - } - if (nounit) { - x[j] *= a[j * a_dim1 + 1]; - } - } - /* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - if (x[jx] != 0.) { - temp = x[jx]; - ix = kx; - l = 1 - j; - /* Computing MIN */ - i__4 = *n, i__1 = j + *k; - i__3 = j + 1; - for (i__ = min(i__4, i__1); i__ >= i__3; --i__) { - x[ix] += temp * a[l + i__ + j * a_dim1]; - ix -= *incx; - /* L70: */ - } - if (nounit) { - x[jx] *= a[j * a_dim1 + 1]; - } - } - jx -= *incx; - if (*n - j >= *k) { - kx -= *incx; - } - /* L80: */ - } - } - } - } else { - /* Form x := A'*x. */ - - if (lsame_(uplo, "U")) { - kplus1 = *k + 1; - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - temp = x[j]; - l = kplus1 - j; - if (nounit) { - temp *= a[kplus1 + j * a_dim1]; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - temp += a[l + i__ + j * a_dim1] * x[i__]; - /* L90: */ - } - x[j] = temp; - /* L100: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - temp = x[jx]; - kx -= *incx; - ix = kx; - l = kplus1 - j; - if (nounit) { - temp *= a[kplus1 + j * a_dim1]; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - temp += a[l + i__ + j * a_dim1] * x[ix]; - ix -= *incx; - /* L110: */ - } - x[jx] = temp; - jx -= *incx; - /* L120: */ - } - } - } else { - if (*incx == 1) { - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - temp = x[j]; - l = 1 - j; - if (nounit) { - temp *= a[j * a_dim1 + 1]; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - temp += a[l + i__ + j * a_dim1] * x[i__]; - /* L130: */ - } - x[j] = temp; - /* L140: */ - } - } else { - jx = kx; - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - temp = x[jx]; - kx += *incx; - ix = kx; - l = 1 - j; - if (nounit) { - temp *= a[j * a_dim1 + 1]; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - temp += a[l + i__ + j * a_dim1] * x[ix]; - ix += *incx; - /* L150: */ - } - x[jx] = temp; - jx += *incx; - /* L160: */ - } - } - } - } - - /* End of DTBMV . */ - -} /* dtbmv_ */ diff --git a/blas/f2c/lsame.c b/blas/f2c/lsame.c deleted file mode 100644 index ad51ea1de..000000000 --- a/blas/f2c/lsame.c +++ /dev/null @@ -1,109 +0,0 @@ -/* lsame.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -logical lsame_(char *ca, char *cb) { - /* System generated locals */ - logical ret_val; - - /* Local variables */ - integer inta, intb, zcode; - - /* -- LAPACK auxiliary routine (version 3.1) -- */ - /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ - /* November 2006 */ - - /* .. Scalar Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ - /* case. */ - - /* Arguments */ - /* ========= */ - - /* CA (input) CHARACTER*1 */ - - /* CB (input) CHARACTER*1 */ - /* CA and CB specify the single characters to be compared. */ - - /* ===================================================================== */ - - /* .. Intrinsic Functions .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - - /* Test if the characters are equal */ - - ret_val = *(unsigned char *)ca == *(unsigned char *)cb; - if (ret_val) { - return ret_val; - } - - /* Now test for equivalence if both characters are alphabetic. */ - - zcode = 'Z'; - - /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ - /* machines, on which ICHAR returns a value with bit 8 set. */ - /* ICHAR('A') on Prime machines returns 193 which is the same as */ - /* ICHAR('A') on an EBCDIC machine. */ - - inta = *(unsigned char *)ca; - intb = *(unsigned char *)cb; - - if (zcode == 90 || zcode == 122) { - /* ASCII is assumed - ZCODE is the ASCII code of either lower or */ - /* upper case 'Z'. */ - - if (inta >= 97 && inta <= 122) { - inta += -32; - } - if (intb >= 97 && intb <= 122) { - intb += -32; - } - - } else if (zcode == 233 || zcode == 169) { - /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ - /* upper case 'Z'. */ - - if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta >= 162 && inta <= 169)) { - inta += 64; - } - if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb >= 162 && intb <= 169)) { - intb += 64; - } - - } else if (zcode == 218 || zcode == 250) { - /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ - /* plus 128 of either lower or upper case 'Z'. */ - - if (inta >= 225 && inta <= 250) { - inta += -32; - } - if (intb >= 225 && intb <= 250) { - intb += -32; - } - } - ret_val = inta == intb; - - /* RETURN */ - - /* End of LSAME */ - - return ret_val; -} /* lsame_ */ diff --git a/blas/f2c/srotm.c b/blas/f2c/srotm.c deleted file mode 100644 index 410dd87ff..000000000 --- a/blas/f2c/srotm.c +++ /dev/null @@ -1,212 +0,0 @@ -/* srotm.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void srotm_(integer *n, real *sx, integer *incx, real *sy, integer *incy, real *sparam) { - /* Initialized data */ - - static real zero = 0.f; - static real two = 2.f; - - /* System generated locals */ - integer i__1, i__2; - - /* Local variables */ - integer i__; - real w, z__; - integer kx, ky; - real sh11, sh12, sh21, sh22, sflag; - integer nsteps; - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ - - /* (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */ - /* (DX**T) */ - - /* SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ - /* LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */ - /* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ - - /* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ - - /* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ - /* H=( ) ( ) ( ) ( ) */ - /* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ - /* SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */ - - /* Arguments */ - /* ========= */ - - /* N (input) INTEGER */ - /* number of elements in input vector(s) */ - - /* SX (input/output) REAL array, dimension N */ - /* double precision vector with N elements */ - - /* INCX (input) INTEGER */ - /* storage spacing between elements of SX */ - - /* SY (input/output) REAL array, dimension N */ - /* double precision vector with N elements */ - - /* INCY (input) INTEGER */ - /* storage spacing between elements of SY */ - - /* SPARAM (input/output) REAL array, dimension 5 */ - /* SPARAM(1)=SFLAG */ - /* SPARAM(2)=SH11 */ - /* SPARAM(3)=SH21 */ - /* SPARAM(4)=SH12 */ - /* SPARAM(5)=SH22 */ - - /* ===================================================================== */ - - /* .. Local Scalars .. */ - /* .. */ - /* .. Data statements .. */ - /* Parameter adjustments */ - --sparam; - --sy; - --sx; - - /* Function Body */ - /* .. */ - - sflag = sparam[1]; - if (*n <= 0 || sflag + two == zero) { - goto L140; - } - if (!(*incx == *incy && *incx > 0)) { - goto L70; - } - - nsteps = *n * *incx; - if (sflag < 0.f) { - goto L50; - } else if (sflag == 0) { - goto L10; - } else { - goto L30; - } -L10: - sh12 = sparam[4]; - sh21 = sparam[3]; - i__1 = nsteps; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = sx[i__]; - z__ = sy[i__]; - sx[i__] = w + z__ * sh12; - sy[i__] = w * sh21 + z__; - /* L20: */ - } - goto L140; -L30: - sh11 = sparam[2]; - sh22 = sparam[5]; - i__2 = nsteps; - i__1 = *incx; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - w = sx[i__]; - z__ = sy[i__]; - sx[i__] = w * sh11 + z__; - sy[i__] = -w + sh22 * z__; - /* L40: */ - } - goto L140; -L50: - sh11 = sparam[2]; - sh12 = sparam[4]; - sh21 = sparam[3]; - sh22 = sparam[5]; - i__1 = nsteps; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - w = sx[i__]; - z__ = sy[i__]; - sx[i__] = w * sh11 + z__ * sh12; - sy[i__] = w * sh21 + z__ * sh22; - /* L60: */ - } - goto L140; -L70: - kx = 1; - ky = 1; - if (*incx < 0) { - kx = (1 - *n) * *incx + 1; - } - if (*incy < 0) { - ky = (1 - *n) * *incy + 1; - } - - if (sflag < 0.f) { - goto L120; - } else if (sflag == 0) { - goto L80; - } else { - goto L100; - } -L80: - sh12 = sparam[4]; - sh21 = sparam[3]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = sx[kx]; - z__ = sy[ky]; - sx[kx] = w + z__ * sh12; - sy[ky] = w * sh21 + z__; - kx += *incx; - ky += *incy; - /* L90: */ - } - goto L140; -L100: - sh11 = sparam[2]; - sh22 = sparam[5]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = sx[kx]; - z__ = sy[ky]; - sx[kx] = w * sh11 + z__; - sy[ky] = -w + sh22 * z__; - kx += *incx; - ky += *incy; - /* L110: */ - } - goto L140; -L120: - sh11 = sparam[2]; - sh12 = sparam[4]; - sh21 = sparam[3]; - sh22 = sparam[5]; - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - w = sx[kx]; - z__ = sy[ky]; - sx[kx] = w * sh11 + z__ * sh12; - sy[ky] = w * sh21 + z__ * sh22; - kx += *incx; - ky += *incy; - /* L130: */ - } -L140: - return; -} /* srotm_ */ diff --git a/blas/f2c/srotmg.c b/blas/f2c/srotmg.c deleted file mode 100644 index 3a0f9f6bf..000000000 --- a/blas/f2c/srotmg.c +++ /dev/null @@ -1,293 +0,0 @@ -/* srotmg.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real *sparam) { - /* Initialized data */ - - static real zero = 0.f; - static real one = 1.f; - static real two = 2.f; - static real gam = 4096.f; - static real gamsq = 16777200.f; - static real rgamsq = 5.96046e-8f; - - /* Format strings */ - static char fmt_120[] = ""; - static char fmt_150[] = ""; - static char fmt_180[] = ""; - static char fmt_210[] = ""; - - /* System generated locals */ - real r__1; - - /* Local variables */ - real su, sp1, sp2, sq1, sq2, sh11, sh12, sh21, sh22; - integer igo; - real sflag, stemp; - - /* Assigned format variables */ - static char *igo_fmt; - (void)igo_fmt; - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ - /* THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2)* */ - /* SY2)**T. */ - /* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ - - /* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ - - /* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ - /* H=( ) ( ) ( ) ( ) */ - /* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ - /* LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */ - /* RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */ - /* VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */ - - /* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ - /* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ - /* OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ - - /* Arguments */ - /* ========= */ - - /* SD1 (input/output) REAL */ - - /* SD2 (input/output) REAL */ - - /* SX1 (input/output) REAL */ - - /* SY1 (input) REAL */ - - /* SPARAM (input/output) REAL array, dimension 5 */ - /* SPARAM(1)=SFLAG */ - /* SPARAM(2)=SH11 */ - /* SPARAM(3)=SH21 */ - /* SPARAM(4)=SH12 */ - /* SPARAM(5)=SH22 */ - - /* ===================================================================== */ - - /* .. Local Scalars .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - /* .. Data statements .. */ - - /* Parameter adjustments */ - --sparam; - - /* Function Body */ - /* .. */ - if (!(*sd1 < zero)) { - goto L10; - } - /* GO ZERO-H-D-AND-SX1.. */ - goto L60; -L10: - /* CASE-SD1-NONNEGATIVE */ - sp2 = *sd2 * *sy1; - if (!(sp2 == zero)) { - goto L20; - } - sflag = -two; - goto L260; -/* REGULAR-CASE.. */ -L20: - sp1 = *sd1 * *sx1; - sq2 = sp2 * *sy1; - sq1 = sp1 * *sx1; - - if (!(dabs(sq1) > dabs(sq2))) { - goto L40; - } - sh21 = -(*sy1) / *sx1; - sh12 = sp2 / sp1; - - su = one - sh12 * sh21; - - if (!(su <= zero)) { - goto L30; - } - /* GO ZERO-H-D-AND-SX1.. */ - goto L60; -L30: - sflag = zero; - *sd1 /= su; - *sd2 /= su; - *sx1 *= su; - /* GO SCALE-CHECK.. */ - goto L100; -L40: - if (!(sq2 < zero)) { - goto L50; - } - /* GO ZERO-H-D-AND-SX1.. */ - goto L60; -L50: - sflag = one; - sh11 = sp1 / sp2; - sh22 = *sx1 / *sy1; - su = one + sh11 * sh22; - stemp = *sd2 / su; - *sd2 = *sd1 / su; - *sd1 = stemp; - *sx1 = *sy1 * su; - /* GO SCALE-CHECK */ - goto L100; -/* PROCEDURE..ZERO-H-D-AND-SX1.. */ -L60: - sflag = -one; - sh11 = zero; - sh12 = zero; - sh21 = zero; - sh22 = zero; - - *sd1 = zero; - *sd2 = zero; - *sx1 = zero; - /* RETURN.. */ - goto L220; -/* PROCEDURE..FIX-H.. */ -L70: - if (!(sflag >= zero)) { - goto L90; - } - - if (!(sflag == zero)) { - goto L80; - } - sh11 = one; - sh22 = one; - sflag = -one; - goto L90; -L80: - sh21 = -one; - sh12 = one; - sflag = -one; -L90: - switch (igo) { - case 0: - goto L120; - case 1: - goto L150; - case 2: - goto L180; - case 3: - goto L210; - } -/* PROCEDURE..SCALE-CHECK */ -L100: -L110: - if (!(*sd1 <= rgamsq)) { - goto L130; - } - if (*sd1 == zero) { - goto L160; - } - igo = 0; - igo_fmt = fmt_120; - /* FIX-H.. */ - goto L70; -L120: - /* Computing 2nd power */ - r__1 = gam; - *sd1 *= r__1 * r__1; - *sx1 /= gam; - sh11 /= gam; - sh12 /= gam; - goto L110; -L130: -L140: - if (!(*sd1 >= gamsq)) { - goto L160; - } - igo = 1; - igo_fmt = fmt_150; - /* FIX-H.. */ - goto L70; -L150: - /* Computing 2nd power */ - r__1 = gam; - *sd1 /= r__1 * r__1; - *sx1 *= gam; - sh11 *= gam; - sh12 *= gam; - goto L140; -L160: -L170: - if (!(dabs(*sd2) <= rgamsq)) { - goto L190; - } - if (*sd2 == zero) { - goto L220; - } - igo = 2; - igo_fmt = fmt_180; - /* FIX-H.. */ - goto L70; -L180: - /* Computing 2nd power */ - r__1 = gam; - *sd2 *= r__1 * r__1; - sh21 /= gam; - sh22 /= gam; - goto L170; -L190: -L200: - if (!(dabs(*sd2) >= gamsq)) { - goto L220; - } - igo = 3; - igo_fmt = fmt_210; - /* FIX-H.. */ - goto L70; -L210: - /* Computing 2nd power */ - r__1 = gam; - *sd2 /= r__1 * r__1; - sh21 *= gam; - sh22 *= gam; - goto L200; -L220: - if (sflag < 0.f) { - goto L250; - } else if (sflag == 0) { - goto L230; - } else { - goto L240; - } -L230: - sparam[3] = sh21; - sparam[4] = sh12; - goto L260; -L240: - sparam[2] = sh11; - sparam[5] = sh22; - goto L260; -L250: - sparam[2] = sh11; - sparam[3] = sh21; - sparam[4] = sh12; - sparam[5] = sh22; -L260: - sparam[1] = sflag; -} /* srotmg_ */ diff --git a/blas/f2c/ssbmv.c b/blas/f2c/ssbmv.c deleted file mode 100644 index ac1d70a43..000000000 --- a/blas/f2c/ssbmv.c +++ /dev/null @@ -1,359 +0,0 @@ -/* ssbmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void ssbmv_(char *uplo, integer *n, integer *k, real *alpha, real *a, integer *lda, real *x, - integer *incx, real *beta, real *y, integer *incy) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - integer i__, j, l, ix, iy, jx, jy, kx, ky, info; - real temp1, temp2; - extern logical lsame_(char *, char *); - integer kplus1; - extern /* Subroutine */ void xerbla_(const char *, integer *); - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* SSBMV performs the matrix-vector operation */ - - /* y := alpha*A*x + beta*y, */ - - /* where alpha and beta are scalars, x and y are n element vectors and */ - /* A is an n by n symmetric band matrix, with k super-diagonals. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the upper or lower */ - /* triangular part of the band matrix A is being supplied as */ - /* follows: */ - - /* UPLO = 'U' or 'u' The upper triangular part of A is */ - /* being supplied. */ - - /* UPLO = 'L' or 'l' The lower triangular part of A is */ - /* being supplied. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* K - INTEGER. */ - /* On entry, K specifies the number of super-diagonals of the */ - /* matrix A. K must satisfy 0 .le. K. */ - /* Unchanged on exit. */ - - /* ALPHA - REAL . */ - /* On entry, ALPHA specifies the scalar alpha. */ - /* Unchanged on exit. */ - - /* A - REAL array of DIMENSION ( LDA, n ). */ - /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ - /* by n part of the array A must contain the upper triangular */ - /* band part of the symmetric matrix, supplied column by */ - /* column, with the leading diagonal of the matrix in row */ - /* ( k + 1 ) of the array, the first super-diagonal starting at */ - /* position 2 in row k, and so on. The top left k by k triangle */ - /* of the array A is not referenced. */ - /* The following program segment will transfer the upper */ - /* triangular part of a symmetric band matrix from conventional */ - /* full matrix storage to band storage: */ - - /* DO 20, J = 1, N */ - /* M = K + 1 - J */ - /* DO 10, I = MAX( 1, J - K ), J */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ - /* by n part of the array A must contain the lower triangular */ - /* band part of the symmetric matrix, supplied column by */ - /* column, with the leading diagonal of the matrix in row 1 of */ - /* the array, the first sub-diagonal starting at position 1 in */ - /* row 2, and so on. The bottom right k by k triangle of the */ - /* array A is not referenced. */ - /* The following program segment will transfer the lower */ - /* triangular part of a symmetric band matrix from conventional */ - /* full matrix storage to band storage: */ - - /* DO 20, J = 1, N */ - /* M = 1 - J */ - /* DO 10, I = J, MIN( N, J + K ) */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Unchanged on exit. */ - - /* LDA - INTEGER. */ - /* On entry, LDA specifies the first dimension of A as declared */ - /* in the calling (sub) program. LDA must be at least */ - /* ( k + 1 ). */ - /* Unchanged on exit. */ - - /* X - REAL array of DIMENSION at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the */ - /* vector x. */ - /* Unchanged on exit. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* BETA - REAL . */ - /* On entry, BETA specifies the scalar beta. */ - /* Unchanged on exit. */ - - /* Y - REAL array of DIMENSION at least */ - /* ( 1 + ( n - 1 )*abs( INCY ) ). */ - /* Before entry, the incremented array Y must contain the */ - /* vector y. On exit, Y is overwritten by the updated vector y. */ - - /* INCY - INTEGER. */ - /* On entry, INCY specifies the increment for the elements of */ - /* Y. INCY must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*k < 0) { - info = 3; - } else if (*lda < *k + 1) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("SSBMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { - return; - } - - /* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - - /* Start the operations. In this version the elements of the array A */ - /* are accessed sequentially with one pass through A. */ - - /* First form y := beta*y. */ - - if (*beta != 1.f) { - if (*incy == 1) { - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.f; - /* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; - /* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.f; - iy += *incy; - /* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; - /* L40: */ - } - } - } - } - if (*alpha == 0.f) { - return; - } - if (lsame_(uplo, "U")) { - /* Form y when upper triangle of A is stored. */ - - kplus1 = *k + 1; - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - l = kplus1 - j; - /* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { - y[i__] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[i__]; - /* L50: */ - } - y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; - /* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - ix = kx; - iy = ky; - l = kplus1 - j; - /* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { - y[iy] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[ix]; - ix += *incx; - iy += *incy; - /* L70: */ - } - y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; - jx += *incx; - jy += *incy; - if (j > *k) { - kx += *incx; - ky += *incy; - } - /* L80: */ - } - } - } else { - /* Form y when lower triangle of A is stored. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - y[j] += temp1 * a[j * a_dim1 + 1]; - l = 1 - j; - /* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4, i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - y[i__] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[i__]; - /* L90: */ - } - y[j] += *alpha * temp2; - /* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - y[jy] += temp1 * a[j * a_dim1 + 1]; - l = 1 - j; - ix = jx; - iy = jy; - /* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4, i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * a[l + i__ + j * a_dim1]; - temp2 += a[l + i__ + j * a_dim1] * x[ix]; - /* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; - /* L120: */ - } - } - } - - /* End of SSBMV . */ - -} /* ssbmv_ */ diff --git a/blas/f2c/sspmv.c b/blas/f2c/sspmv.c deleted file mode 100644 index ea9db339b..000000000 --- a/blas/f2c/sspmv.c +++ /dev/null @@ -1,308 +0,0 @@ -/* sspmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void sspmv_(char *uplo, integer *n, real *alpha, real *ap, real *x, integer *incx, real *beta, real *y, - integer *incy) { - /* System generated locals */ - integer i__1, i__2; - - /* Local variables */ - integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; - real temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ void xerbla_(const char *, integer *); - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* SSPMV performs the matrix-vector operation */ - - /* y := alpha*A*x + beta*y, */ - - /* where alpha and beta are scalars, x and y are n element vectors and */ - /* A is an n by n symmetric matrix, supplied in packed form. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the upper or lower */ - /* triangular part of the matrix A is supplied in the packed */ - /* array AP as follows: */ - - /* UPLO = 'U' or 'u' The upper triangular part of A is */ - /* supplied in AP. */ - - /* UPLO = 'L' or 'l' The lower triangular part of A is */ - /* supplied in AP. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* ALPHA - REAL . */ - /* On entry, ALPHA specifies the scalar alpha. */ - /* Unchanged on exit. */ - - /* AP - REAL array of DIMENSION at least */ - /* ( ( n*( n + 1 ) )/2 ). */ - /* Before entry with UPLO = 'U' or 'u', the array AP must */ - /* contain the upper triangular part of the symmetric matrix */ - /* packed sequentially, column by column, so that AP( 1 ) */ - /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ - /* and a( 2, 2 ) respectively, and so on. */ - /* Before entry with UPLO = 'L' or 'l', the array AP must */ - /* contain the lower triangular part of the symmetric matrix */ - /* packed sequentially, column by column, so that AP( 1 ) */ - /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ - /* and a( 3, 1 ) respectively, and so on. */ - /* Unchanged on exit. */ - - /* X - REAL array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the n */ - /* element vector x. */ - /* Unchanged on exit. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* BETA - REAL . */ - /* On entry, BETA specifies the scalar beta. When BETA is */ - /* supplied as zero then Y need not be set on input. */ - /* Unchanged on exit. */ - - /* Y - REAL array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCY ) ). */ - /* Before entry, the incremented array Y must contain the n */ - /* element vector y. On exit, Y is overwritten by the updated */ - /* vector y. */ - - /* INCY - INTEGER. */ - /* On entry, INCY specifies the increment for the elements of */ - /* Y. INCY must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - --y; - --x; - --ap; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 6; - } else if (*incy == 0) { - info = 9; - } - if (info != 0) { - xerbla_("SSPMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { - return; - } - - /* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - - /* Start the operations. In this version the elements of the array AP */ - /* are accessed sequentially with one pass through AP. */ - - /* First form y := beta*y. */ - - if (*beta != 1.f) { - if (*incy == 1) { - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.f; - /* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; - /* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.f; - iy += *incy; - /* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; - /* L40: */ - } - } - } - } - if (*alpha == 0.f) { - return; - } - kk = 1; - if (lsame_(uplo, "U")) { - /* Form y when AP contains the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - k = kk; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * ap[k]; - temp2 += ap[k] * x[i__]; - ++k; - /* L50: */ - } - y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; - kk += j; - /* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - ix = kx; - iy = ky; - i__2 = kk + j - 2; - for (k = kk; k <= i__2; ++k) { - y[iy] += temp1 * ap[k]; - temp2 += ap[k] * x[ix]; - ix += *incx; - iy += *incy; - /* L70: */ - } - y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; - jx += *incx; - jy += *incy; - kk += j; - /* L80: */ - } - } - } else { - /* Form y when AP contains the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - y[j] += temp1 * ap[kk]; - k = kk + 1; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * ap[k]; - temp2 += ap[k] * x[i__]; - ++k; - /* L90: */ - } - y[j] += *alpha * temp2; - kk += *n - j + 1; - /* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - y[jy] += temp1 * ap[kk]; - ix = jx; - iy = jy; - i__2 = kk + *n - j; - for (k = kk + 1; k <= i__2; ++k) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * ap[k]; - temp2 += ap[k] * x[ix]; - /* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; - kk += *n - j + 1; - /* L120: */ - } - } - } - - /* End of SSPMV . */ - -} /* sspmv_ */ diff --git a/blas/f2c/stbmv.c b/blas/f2c/stbmv.c deleted file mode 100644 index 43329f620..000000000 --- a/blas/f2c/stbmv.c +++ /dev/null @@ -1,417 +0,0 @@ -/* stbmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -/* Subroutine */ void stbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda, - real *x, integer *incx) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - integer i__, j, l, ix, jx, kx, info; - real temp; - extern logical lsame_(char *, char *); - integer kplus1; - extern /* Subroutine */ void xerbla_(const char *, integer *); - logical nounit; - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* STBMV performs one of the matrix-vector operations */ - - /* x := A*x, or x := A'*x, */ - - /* where x is an n element vector and A is an n by n unit, or non-unit, */ - /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the matrix is an upper or */ - /* lower triangular matrix as follows: */ - - /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ - - /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ - - /* Unchanged on exit. */ - - /* TRANS - CHARACTER*1. */ - /* On entry, TRANS specifies the operation to be performed as */ - /* follows: */ - - /* TRANS = 'N' or 'n' x := A*x. */ - - /* TRANS = 'T' or 't' x := A'*x. */ - - /* TRANS = 'C' or 'c' x := A'*x. */ - - /* Unchanged on exit. */ - - /* DIAG - CHARACTER*1. */ - /* On entry, DIAG specifies whether or not A is unit */ - /* triangular as follows: */ - - /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ - - /* DIAG = 'N' or 'n' A is not assumed to be unit */ - /* triangular. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* K - INTEGER. */ - /* On entry with UPLO = 'U' or 'u', K specifies the number of */ - /* super-diagonals of the matrix A. */ - /* On entry with UPLO = 'L' or 'l', K specifies the number of */ - /* sub-diagonals of the matrix A. */ - /* K must satisfy 0 .le. K. */ - /* Unchanged on exit. */ - - /* A - REAL array of DIMENSION ( LDA, n ). */ - /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ - /* by n part of the array A must contain the upper triangular */ - /* band part of the matrix of coefficients, supplied column by */ - /* column, with the leading diagonal of the matrix in row */ - /* ( k + 1 ) of the array, the first super-diagonal starting at */ - /* position 2 in row k, and so on. The top left k by k triangle */ - /* of the array A is not referenced. */ - /* The following program segment will transfer an upper */ - /* triangular band matrix from conventional full matrix storage */ - /* to band storage: */ - - /* DO 20, J = 1, N */ - /* M = K + 1 - J */ - /* DO 10, I = MAX( 1, J - K ), J */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ - /* by n part of the array A must contain the lower triangular */ - /* band part of the matrix of coefficients, supplied column by */ - /* column, with the leading diagonal of the matrix in row 1 of */ - /* the array, the first sub-diagonal starting at position 1 in */ - /* row 2, and so on. The bottom right k by k triangle of the */ - /* array A is not referenced. */ - /* The following program segment will transfer a lower */ - /* triangular band matrix from conventional full matrix storage */ - /* to band storage: */ - - /* DO 20, J = 1, N */ - /* M = 1 - J */ - /* DO 10, I = J, MIN( N, J + K ) */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Note that when DIAG = 'U' or 'u' the elements of the array A */ - /* corresponding to the diagonal elements of the matrix are not */ - /* referenced, but are assumed to be unity. */ - /* Unchanged on exit. */ - - /* LDA - INTEGER. */ - /* On entry, LDA specifies the first dimension of A as declared */ - /* in the calling (sub) program. LDA must be at least */ - /* ( k + 1 ). */ - /* Unchanged on exit. */ - - /* X - REAL array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the n */ - /* element vector x. On exit, X is overwritten with the */ - /* transformed vector x. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) { - info = 2; - } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < *k + 1) { - info = 7; - } else if (*incx == 0) { - info = 9; - } - if (info != 0) { - xerbla_("STBMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0) { - return; - } - - nounit = lsame_(diag, "N"); - - /* Set up the start point in X if the increment is not unity. This */ - /* will be ( N - 1 )*INCX too small for descending loops. */ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - - /* Start the operations. In this version the elements of A are */ - /* accessed sequentially with one pass through A. */ - - if (lsame_(trans, "N")) { - /* Form x := A*x. */ - - if (lsame_(uplo, "U")) { - kplus1 = *k + 1; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0.f) { - temp = x[j]; - l = kplus1 - j; - /* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { - x[i__] += temp * a[l + i__ + j * a_dim1]; - /* L10: */ - } - if (nounit) { - x[j] *= a[kplus1 + j * a_dim1]; - } - } - /* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f) { - temp = x[jx]; - ix = kx; - l = kplus1 - j; - /* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { - x[ix] += temp * a[l + i__ + j * a_dim1]; - ix += *incx; - /* L30: */ - } - if (nounit) { - x[jx] *= a[kplus1 + j * a_dim1]; - } - } - jx += *incx; - if (j > *k) { - kx += *incx; - } - /* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - if (x[j] != 0.f) { - temp = x[j]; - l = 1 - j; - /* Computing MIN */ - i__1 = *n, i__3 = j + *k; - i__4 = j + 1; - for (i__ = min(i__1, i__3); i__ >= i__4; --i__) { - x[i__] += temp * a[l + i__ + j * a_dim1]; - /* L50: */ - } - if (nounit) { - x[j] *= a[j * a_dim1 + 1]; - } - } - /* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - if (x[jx] != 0.f) { - temp = x[jx]; - ix = kx; - l = 1 - j; - /* Computing MIN */ - i__4 = *n, i__1 = j + *k; - i__3 = j + 1; - for (i__ = min(i__4, i__1); i__ >= i__3; --i__) { - x[ix] += temp * a[l + i__ + j * a_dim1]; - ix -= *incx; - /* L70: */ - } - if (nounit) { - x[jx] *= a[j * a_dim1 + 1]; - } - } - jx -= *incx; - if (*n - j >= *k) { - kx -= *incx; - } - /* L80: */ - } - } - } - } else { - /* Form x := A'*x. */ - - if (lsame_(uplo, "U")) { - kplus1 = *k + 1; - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - temp = x[j]; - l = kplus1 - j; - if (nounit) { - temp *= a[kplus1 + j * a_dim1]; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - temp += a[l + i__ + j * a_dim1] * x[i__]; - /* L90: */ - } - x[j] = temp; - /* L100: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - temp = x[jx]; - kx -= *incx; - ix = kx; - l = kplus1 - j; - if (nounit) { - temp *= a[kplus1 + j * a_dim1]; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - temp += a[l + i__ + j * a_dim1] * x[ix]; - ix -= *incx; - /* L110: */ - } - x[jx] = temp; - jx -= *incx; - /* L120: */ - } - } - } else { - if (*incx == 1) { - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - temp = x[j]; - l = 1 - j; - if (nounit) { - temp *= a[j * a_dim1 + 1]; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - temp += a[l + i__ + j * a_dim1] * x[i__]; - /* L130: */ - } - x[j] = temp; - /* L140: */ - } - } else { - jx = kx; - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - temp = x[jx]; - kx += *incx; - ix = kx; - l = 1 - j; - if (nounit) { - temp *= a[j * a_dim1 + 1]; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - temp += a[l + i__ + j * a_dim1] * x[ix]; - ix += *incx; - /* L150: */ - } - x[jx] = temp; - jx += *incx; - /* L160: */ - } - } - } - } - - /* End of STBMV . */ - -} /* stbmv_ */ diff --git a/blas/f2c/zhbmv.c b/blas/f2c/zhbmv.c deleted file mode 100644 index cef111737..000000000 --- a/blas/f2c/zhbmv.c +++ /dev/null @@ -1,456 +0,0 @@ -/* zhbmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -static inline void d_cnjg(doublecomplex *r, doublecomplex *z) { - r->r = z->r; - r->i = -(z->i); -} - -/* Subroutine */ void zhbmv_(char *uplo, integer *n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, - doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4; - - /* Local variables */ - integer i__, j, l, ix, iy, jx, jy, kx, ky, info; - doublecomplex temp1, temp2; - extern logical lsame_(char *, char *); - integer kplus1; - extern /* Subroutine */ void xerbla_(const char *, integer *); - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* ZHBMV performs the matrix-vector operation */ - - /* y := alpha*A*x + beta*y, */ - - /* where alpha and beta are scalars, x and y are n element vectors and */ - /* A is an n by n hermitian band matrix, with k super-diagonals. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the upper or lower */ - /* triangular part of the band matrix A is being supplied as */ - /* follows: */ - - /* UPLO = 'U' or 'u' The upper triangular part of A is */ - /* being supplied. */ - - /* UPLO = 'L' or 'l' The lower triangular part of A is */ - /* being supplied. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* K - INTEGER. */ - /* On entry, K specifies the number of super-diagonals of the */ - /* matrix A. K must satisfy 0 .le. K. */ - /* Unchanged on exit. */ - - /* ALPHA - COMPLEX*16 . */ - /* On entry, ALPHA specifies the scalar alpha. */ - /* Unchanged on exit. */ - - /* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ - /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ - /* by n part of the array A must contain the upper triangular */ - /* band part of the hermitian matrix, supplied column by */ - /* column, with the leading diagonal of the matrix in row */ - /* ( k + 1 ) of the array, the first super-diagonal starting at */ - /* position 2 in row k, and so on. The top left k by k triangle */ - /* of the array A is not referenced. */ - /* The following program segment will transfer the upper */ - /* triangular part of a hermitian band matrix from conventional */ - /* full matrix storage to band storage: */ - - /* DO 20, J = 1, N */ - /* M = K + 1 - J */ - /* DO 10, I = MAX( 1, J - K ), J */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ - /* by n part of the array A must contain the lower triangular */ - /* band part of the hermitian matrix, supplied column by */ - /* column, with the leading diagonal of the matrix in row 1 of */ - /* the array, the first sub-diagonal starting at position 1 in */ - /* row 2, and so on. The bottom right k by k triangle of the */ - /* array A is not referenced. */ - /* The following program segment will transfer the lower */ - /* triangular part of a hermitian band matrix from conventional */ - /* full matrix storage to band storage: */ - - /* DO 20, J = 1, N */ - /* M = 1 - J */ - /* DO 10, I = J, MIN( N, J + K ) */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Note that the imaginary parts of the diagonal elements need */ - /* not be set and are assumed to be zero. */ - /* Unchanged on exit. */ - - /* LDA - INTEGER. */ - /* On entry, LDA specifies the first dimension of A as declared */ - /* in the calling (sub) program. LDA must be at least */ - /* ( k + 1 ). */ - /* Unchanged on exit. */ - - /* X - COMPLEX*16 array of DIMENSION at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the */ - /* vector x. */ - /* Unchanged on exit. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* BETA - COMPLEX*16 . */ - /* On entry, BETA specifies the scalar beta. */ - /* Unchanged on exit. */ - - /* Y - COMPLEX*16 array of DIMENSION at least */ - /* ( 1 + ( n - 1 )*abs( INCY ) ). */ - /* Before entry, the incremented array Y must contain the */ - /* vector y. On exit, Y is overwritten by the updated vector y. */ - - /* INCY - INTEGER. */ - /* On entry, INCY specifies the increment for the elements of */ - /* Y. INCY must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*k < 0) { - info = 3; - } else if (*lda < *k + 1) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("ZHBMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.))) { - return; - } - - /* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - - /* Start the operations. In this version the elements of the array A */ - /* are accessed sequentially with one pass through A. */ - - /* First form y := beta*y. */ - - if (beta->r != 1. || beta->i != 0.) { - if (*incy == 1) { - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0., y[i__2].i = 0.; - /* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - /* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0., y[i__2].i = 0.; - iy += *incy; - /* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - iy += *incy; - /* L40: */ - } - } - } - } - if (alpha->r == 0. && alpha->i == 0.) { - return; - } - if (lsame_(uplo, "U")) { - /* Form y when upper triangle of A is stored. */ - - kplus1 = *k + 1; - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - l = kplus1 - j; - /* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { - i__2 = i__; - i__3 = i__; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__2 = i__; - z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - /* L50: */ - } - i__4 = j; - i__2 = j; - i__3 = kplus1 + j * a_dim1; - d__1 = a[i__3].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__4].r = z__1.r, y[i__4].i = z__1.i; - /* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__4 = jx; - z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - ix = kx; - iy = ky; - l = kplus1 - j; - /* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { - i__4 = iy; - i__2 = iy; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; - z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; - y[i__4].r = z__1.r, y[i__4].i = z__1.i; - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ix += *incx; - iy += *incy; - /* L70: */ - } - i__3 = jy; - i__4 = jy; - i__2 = kplus1 + j * a_dim1; - d__1 = a[i__2].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - jx += *incx; - jy += *incy; - if (j > *k) { - kx += *incx; - ky += *incy; - } - /* L80: */ - } - } - } else { - /* Form y when lower triangle of A is stored. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__3 = j; - z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__3 = j; - i__4 = j; - i__2 = j * a_dim1 + 1; - d__1 = a[i__2].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - l = 1 - j; - /* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4, i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__; - i__2 = i__; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; - z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; - y[i__4].r = z__1.r, y[i__4].i = z__1.i; - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = i__; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - /* L90: */ - } - i__3 = j; - i__4 = j; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - /* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__3 = jx; - z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = alpha->r * x[i__3].i + alpha->i * x[i__3].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__3 = jy; - i__4 = jy; - i__2 = j * a_dim1 + 1; - d__1 = a[i__2].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - l = 1 - j; - ix = jx; - iy = jy; - /* Computing MIN */ - i__4 = *n, i__2 = j + *k; - i__3 = min(i__4, i__2); - for (i__ = j + 1; i__ <= i__3; ++i__) { - ix += *incx; - iy += *incy; - i__4 = iy; - i__2 = iy; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; - z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; - y[i__4].r = z__1.r, y[i__4].i = z__1.i; - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - /* L110: */ - } - i__3 = jy; - i__4 = jy; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - jx += *incx; - jy += *incy; - /* L120: */ - } - } - } - - /* End of ZHBMV . */ - -} /* zhbmv_ */ diff --git a/blas/f2c/zhpmv.c b/blas/f2c/zhpmv.c deleted file mode 100644 index 6b9a00049..000000000 --- a/blas/f2c/zhpmv.c +++ /dev/null @@ -1,407 +0,0 @@ -/* zhpmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -static inline void d_cnjg(doublecomplex *r, doublecomplex *z) { - r->r = z->r; - r->i = -(z->i); -} - -/* Subroutine */ void zhpmv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x, - integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy) { - /* System generated locals */ - integer i__1, i__2, i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4; - - /* Local variables */ - integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; - doublecomplex temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ void xerbla_(const char *, integer *); - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* ZHPMV performs the matrix-vector operation */ - - /* y := alpha*A*x + beta*y, */ - - /* where alpha and beta are scalars, x and y are n element vectors and */ - /* A is an n by n hermitian matrix, supplied in packed form. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the upper or lower */ - /* triangular part of the matrix A is supplied in the packed */ - /* array AP as follows: */ - - /* UPLO = 'U' or 'u' The upper triangular part of A is */ - /* supplied in AP. */ - - /* UPLO = 'L' or 'l' The lower triangular part of A is */ - /* supplied in AP. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* ALPHA - COMPLEX*16 . */ - /* On entry, ALPHA specifies the scalar alpha. */ - /* Unchanged on exit. */ - - /* AP - COMPLEX*16 array of DIMENSION at least */ - /* ( ( n*( n + 1 ) )/2 ). */ - /* Before entry with UPLO = 'U' or 'u', the array AP must */ - /* contain the upper triangular part of the hermitian matrix */ - /* packed sequentially, column by column, so that AP( 1 ) */ - /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ - /* and a( 2, 2 ) respectively, and so on. */ - /* Before entry with UPLO = 'L' or 'l', the array AP must */ - /* contain the lower triangular part of the hermitian matrix */ - /* packed sequentially, column by column, so that AP( 1 ) */ - /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ - /* and a( 3, 1 ) respectively, and so on. */ - /* Note that the imaginary parts of the diagonal elements need */ - /* not be set and are assumed to be zero. */ - /* Unchanged on exit. */ - - /* X - COMPLEX*16 array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the n */ - /* element vector x. */ - /* Unchanged on exit. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* BETA - COMPLEX*16 . */ - /* On entry, BETA specifies the scalar beta. When BETA is */ - /* supplied as zero then Y need not be set on input. */ - /* Unchanged on exit. */ - - /* Y - COMPLEX*16 array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCY ) ). */ - /* Before entry, the incremented array Y must contain the n */ - /* element vector y. On exit, Y is overwritten by the updated */ - /* vector y. */ - - /* INCY - INTEGER. */ - /* On entry, INCY specifies the increment for the elements of */ - /* Y. INCY must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - --y; - --x; - --ap; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 6; - } else if (*incy == 0) { - info = 9; - } - if (info != 0) { - xerbla_("ZHPMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.))) { - return; - } - - /* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - - /* Start the operations. In this version the elements of the array AP */ - /* are accessed sequentially with one pass through AP. */ - - /* First form y := beta*y. */ - - if (beta->r != 1. || beta->i != 0.) { - if (*incy == 1) { - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0., y[i__2].i = 0.; - /* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - /* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0., y[i__2].i = 0.; - iy += *incy; - /* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - iy += *incy; - /* L40: */ - } - } - } - } - if (alpha->r == 0. && alpha->i == 0.) { - return; - } - kk = 1; - if (lsame_(uplo, "U")) { - /* Form y when AP contains the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - k = kk; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = k; - z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &ap[k]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ++k; - /* L50: */ - } - i__2 = j; - i__3 = j; - i__4 = kk + j - 1; - d__1 = ap[i__4].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - kk += j; - /* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - ix = kx; - iy = ky; - i__2 = kk + j - 2; - for (k = kk; k <= i__2; ++k) { - i__3 = iy; - i__4 = iy; - i__5 = k; - z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &ap[k]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ix += *incx; - iy += *incy; - /* L70: */ - } - i__2 = jy; - i__3 = jy; - i__4 = kk + j - 1; - d__1 = ap[i__4].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - jx += *incx; - jy += *incy; - kk += j; - /* L80: */ - } - } - } else { - /* Form y when AP contains the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__2 = j; - i__3 = j; - i__4 = kk; - d__1 = ap[i__4].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - k = kk + 1; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = k; - z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &ap[k]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ++k; - /* L90: */ - } - i__2 = j; - i__3 = j; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - kk += *n - j + 1; - /* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__2 = jy; - i__3 = jy; - i__4 = kk; - d__1 = ap[i__4].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - ix = jx; - iy = jy; - i__2 = kk + *n - j; - for (k = kk + 1; k <= i__2; ++k) { - ix += *incx; - iy += *incy; - i__3 = iy; - i__4 = iy; - i__5 = k; - z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5].r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &ap[k]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - /* L110: */ - } - i__2 = jy; - i__3 = jy; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - jx += *incx; - jy += *incy; - kk += *n - j + 1; - /* L120: */ - } - } - } - - /* End of ZHPMV . */ - -} /* zhpmv_ */ diff --git a/blas/f2c/ztbmv.c b/blas/f2c/ztbmv.c deleted file mode 100644 index 2fbfa0012..000000000 --- a/blas/f2c/ztbmv.c +++ /dev/null @@ -1,586 +0,0 @@ -/* ztbmv.f -- translated by f2c (version 20100827). - You must link the resulting object file with libf2c: - on Microsoft Windows system, link with libf2c.lib; - on Linux or Unix systems, link with .../path/to/libf2c.a -lm - or, if you install libf2c.a in a standard place, with -lf2c -lm - -- in that order, at the end of the command line, as in - cc *.o -lf2c -lm - Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., - - http://www.netlib.org/f2c/libf2c.zip -*/ - -#include "datatypes.h" - -static inline void d_cnjg(doublecomplex *r, doublecomplex *z) { - r->r = z->r; - r->i = -(z->i); -} - -/* Subroutine */ void ztbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, doublecomplex *a, - integer *lda, doublecomplex *x, integer *incx) { - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2, z__3; - - /* Local variables */ - integer i__, j, l, ix, jx, kx, info; - doublecomplex temp; - extern logical lsame_(char *, char *); - integer kplus1; - extern /* Subroutine */ void xerbla_(const char *, integer *); - logical noconj, nounit; - - /* .. Scalar Arguments .. */ - /* .. */ - /* .. Array Arguments .. */ - /* .. */ - - /* Purpose */ - /* ======= */ - - /* ZTBMV performs one of the matrix-vector operations */ - - /* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ - - /* where x is an n element vector and A is an n by n unit, or non-unit, */ - /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ - - /* Arguments */ - /* ========== */ - - /* UPLO - CHARACTER*1. */ - /* On entry, UPLO specifies whether the matrix is an upper or */ - /* lower triangular matrix as follows: */ - - /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ - - /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ - - /* Unchanged on exit. */ - - /* TRANS - CHARACTER*1. */ - /* On entry, TRANS specifies the operation to be performed as */ - /* follows: */ - - /* TRANS = 'N' or 'n' x := A*x. */ - - /* TRANS = 'T' or 't' x := A'*x. */ - - /* TRANS = 'C' or 'c' x := conjg( A' )*x. */ - - /* Unchanged on exit. */ - - /* DIAG - CHARACTER*1. */ - /* On entry, DIAG specifies whether or not A is unit */ - /* triangular as follows: */ - - /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ - - /* DIAG = 'N' or 'n' A is not assumed to be unit */ - /* triangular. */ - - /* Unchanged on exit. */ - - /* N - INTEGER. */ - /* On entry, N specifies the order of the matrix A. */ - /* N must be at least zero. */ - /* Unchanged on exit. */ - - /* K - INTEGER. */ - /* On entry with UPLO = 'U' or 'u', K specifies the number of */ - /* super-diagonals of the matrix A. */ - /* On entry with UPLO = 'L' or 'l', K specifies the number of */ - /* sub-diagonals of the matrix A. */ - /* K must satisfy 0 .le. K. */ - /* Unchanged on exit. */ - - /* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ - /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ - /* by n part of the array A must contain the upper triangular */ - /* band part of the matrix of coefficients, supplied column by */ - /* column, with the leading diagonal of the matrix in row */ - /* ( k + 1 ) of the array, the first super-diagonal starting at */ - /* position 2 in row k, and so on. The top left k by k triangle */ - /* of the array A is not referenced. */ - /* The following program segment will transfer an upper */ - /* triangular band matrix from conventional full matrix storage */ - /* to band storage: */ - - /* DO 20, J = 1, N */ - /* M = K + 1 - J */ - /* DO 10, I = MAX( 1, J - K ), J */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ - /* by n part of the array A must contain the lower triangular */ - /* band part of the matrix of coefficients, supplied column by */ - /* column, with the leading diagonal of the matrix in row 1 of */ - /* the array, the first sub-diagonal starting at position 1 in */ - /* row 2, and so on. The bottom right k by k triangle of the */ - /* array A is not referenced. */ - /* The following program segment will transfer a lower */ - /* triangular band matrix from conventional full matrix storage */ - /* to band storage: */ - - /* DO 20, J = 1, N */ - /* M = 1 - J */ - /* DO 10, I = J, MIN( N, J + K ) */ - /* A( M + I, J ) = matrix( I, J ) */ - /* 10 CONTINUE */ - /* 20 CONTINUE */ - - /* Note that when DIAG = 'U' or 'u' the elements of the array A */ - /* corresponding to the diagonal elements of the matrix are not */ - /* referenced, but are assumed to be unity. */ - /* Unchanged on exit. */ - - /* LDA - INTEGER. */ - /* On entry, LDA specifies the first dimension of A as declared */ - /* in the calling (sub) program. LDA must be at least */ - /* ( k + 1 ). */ - /* Unchanged on exit. */ - - /* X - COMPLEX*16 array of dimension at least */ - /* ( 1 + ( n - 1 )*abs( INCX ) ). */ - /* Before entry, the incremented array X must contain the n */ - /* element vector x. On exit, X is overwritten with the */ - /* transformed vector x. */ - - /* INCX - INTEGER. */ - /* On entry, INCX specifies the increment for the elements of */ - /* X. INCX must not be zero. */ - /* Unchanged on exit. */ - - /* Further Details */ - /* =============== */ - - /* Level 2 Blas routine. */ - - /* -- Written on 22-October-1986. */ - /* Jack Dongarra, Argonne National Lab. */ - /* Jeremy Du Croz, Nag Central Office. */ - /* Sven Hammarling, Nag Central Office. */ - /* Richard Hanson, Sandia National Labs. */ - - /* ===================================================================== */ - - /* .. Parameters .. */ - /* .. */ - /* .. Local Scalars .. */ - /* .. */ - /* .. External Functions .. */ - /* .. */ - /* .. External Subroutines .. */ - /* .. */ - /* .. Intrinsic Functions .. */ - /* .. */ - - /* Test the input parameters. */ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (!lsame_(uplo, "U") && !lsame_(uplo, "L")) { - info = 1; - } else if (!lsame_(trans, "N") && !lsame_(trans, "T") && !lsame_(trans, "C")) { - info = 2; - } else if (!lsame_(diag, "U") && !lsame_(diag, "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < *k + 1) { - info = 7; - } else if (*incx == 0) { - info = 9; - } - if (info != 0) { - xerbla_("ZTBMV ", &info); - return; - } - - /* Quick return if possible. */ - - if (*n == 0) { - return; - } - - noconj = lsame_(trans, "T"); - nounit = lsame_(diag, "N"); - - /* Set up the start point in X if the increment is not unity. This */ - /* will be ( N - 1 )*INCX too small for descending loops. */ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - - /* Start the operations. In this version the elements of A are */ - /* accessed sequentially with one pass through A. */ - - if (lsame_(trans, "N")) { - /* Form x := A*x. */ - - if (lsame_(uplo, "U")) { - kplus1 = *k + 1; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - if (x[i__2].r != 0. || x[i__2].i != 0.) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - l = kplus1 - j; - /* Computing MAX */ - i__2 = 1, i__3 = j - *k; - i__4 = j - 1; - for (i__ = max(i__2, i__3); i__ <= i__4; ++i__) { - i__2 = i__; - i__3 = i__; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; - z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + z__2.i; - x[i__2].r = z__1.r, x[i__2].i = z__1.i; - /* L10: */ - } - if (nounit) { - i__4 = j; - i__2 = j; - i__3 = kplus1 + j * a_dim1; - z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[i__3].i, - z__1.i = x[i__2].r * a[i__3].i + x[i__2].i * a[i__3].r; - x[i__4].r = z__1.r, x[i__4].i = z__1.i; - } - } - /* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__4 = jx; - if (x[i__4].r != 0. || x[i__4].i != 0.) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - ix = kx; - l = kplus1 - j; - /* Computing MAX */ - i__4 = 1, i__2 = j - *k; - i__3 = j - 1; - for (i__ = max(i__4, i__2); i__ <= i__3; ++i__) { - i__4 = ix; - i__2 = ix; - i__5 = l + i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; - z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + z__2.i; - x[i__4].r = z__1.r, x[i__4].i = z__1.i; - ix += *incx; - /* L30: */ - } - if (nounit) { - i__3 = jx; - i__4 = jx; - i__2 = kplus1 + j * a_dim1; - z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[i__2].i, - z__1.i = x[i__4].r * a[i__2].i + x[i__4].i * a[i__2].r; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } - } - jx += *incx; - if (j > *k) { - kx += *incx; - } - /* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - if (x[i__1].r != 0. || x[i__1].i != 0.) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - l = 1 - j; - /* Computing MIN */ - i__1 = *n, i__3 = j + *k; - i__4 = j + 1; - for (i__ = min(i__1, i__3); i__ >= i__4; --i__) { - i__1 = i__; - i__3 = i__; - i__2 = l + i__ + j * a_dim1; - z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, z__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r; - z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + z__2.i; - x[i__1].r = z__1.r, x[i__1].i = z__1.i; - /* L50: */ - } - if (nounit) { - i__4 = j; - i__1 = j; - i__3 = j * a_dim1 + 1; - z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[i__3].i, - z__1.i = x[i__1].r * a[i__3].i + x[i__1].i * a[i__3].r; - x[i__4].r = z__1.r, x[i__4].i = z__1.i; - } - } - /* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__4 = jx; - if (x[i__4].r != 0. || x[i__4].i != 0.) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - ix = kx; - l = 1 - j; - /* Computing MIN */ - i__4 = *n, i__1 = j + *k; - i__3 = j + 1; - for (i__ = min(i__4, i__1); i__ >= i__3; --i__) { - i__4 = ix; - i__1 = ix; - i__2 = l + i__ + j * a_dim1; - z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, z__2.i = temp.r * a[i__2].i + temp.i * a[i__2].r; - z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + z__2.i; - x[i__4].r = z__1.r, x[i__4].i = z__1.i; - ix -= *incx; - /* L70: */ - } - if (nounit) { - i__3 = jx; - i__4 = jx; - i__1 = j * a_dim1 + 1; - z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[i__1].i, - z__1.i = x[i__4].r * a[i__1].i + x[i__4].i * a[i__1].r; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } - } - jx -= *incx; - if (*n - j >= *k) { - kx -= *incx; - } - /* L80: */ - } - } - } - } else { - /* Form x := A'*x or x := conjg( A' )*x. */ - - if (lsame_(uplo, "U")) { - kplus1 = *k + 1; - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__3 = j; - temp.r = x[i__3].r, temp.i = x[i__3].i; - l = kplus1 - j; - if (noconj) { - if (nounit) { - i__3 = kplus1 + j * a_dim1; - z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, z__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - i__4 = l + i__ + j * a_dim1; - i__1 = i__; - z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i, - z__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - /* L90: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = i__; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - /* L100: */ - } - } - i__3 = j; - x[i__3].r = temp.r, x[i__3].i = temp.i; - /* L110: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__3 = jx; - temp.r = x[i__3].r, temp.i = x[i__3].i; - kx -= *incx; - ix = kx; - l = kplus1 - j; - if (noconj) { - if (nounit) { - i__3 = kplus1 + j * a_dim1; - z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, z__1.i = temp.r * a[i__3].i + temp.i * a[i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - i__4 = l + i__ + j * a_dim1; - i__1 = ix; - z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[i__1].i, - z__2.i = a[i__4].r * x[i__1].i + a[i__4].i * x[i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix -= *incx; - /* L120: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - /* Computing MAX */ - i__4 = 1, i__1 = j - *k; - i__3 = max(i__4, i__1); - for (i__ = j - 1; i__ >= i__3; --i__) { - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__4 = ix; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = z__3.r * x[i__4].i + z__3.i * x[i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix -= *incx; - /* L130: */ - } - } - i__3 = jx; - x[i__3].r = temp.r, x[i__3].i = temp.i; - jx -= *incx; - /* L140: */ - } - } - } else { - if (*incx == 1) { - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - i__4 = j; - temp.r = x[i__4].r, temp.i = x[i__4].i; - l = 1 - j; - if (noconj) { - if (nounit) { - i__4 = j * a_dim1 + 1; - z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, z__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r; - temp.r = z__1.r, temp.i = z__1.i; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - i__1 = l + i__ + j * a_dim1; - i__2 = i__; - z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i, - z__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - /* L150: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[j * a_dim1 + 1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__1 = i__; - z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, z__2.i = z__3.r * x[i__1].i + z__3.i * x[i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - /* L160: */ - } - } - i__4 = j; - x[i__4].r = temp.r, x[i__4].i = temp.i; - /* L170: */ - } - } else { - jx = kx; - i__3 = *n; - for (j = 1; j <= i__3; ++j) { - i__4 = jx; - temp.r = x[i__4].r, temp.i = x[i__4].i; - kx += *incx; - ix = kx; - l = 1 - j; - if (noconj) { - if (nounit) { - i__4 = j * a_dim1 + 1; - z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, z__1.i = temp.r * a[i__4].i + temp.i * a[i__4].r; - temp.r = z__1.r, temp.i = z__1.i; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - i__1 = l + i__ + j * a_dim1; - i__2 = ix; - z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[i__2].i, - z__2.i = a[i__1].r * x[i__2].i + a[i__1].i * x[i__2].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix += *incx; - /* L180: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[j * a_dim1 + 1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, z__1.i = temp.r * z__2.i + temp.i * z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - /* Computing MIN */ - i__1 = *n, i__2 = j + *k; - i__4 = min(i__1, i__2); - for (i__ = j + 1; i__ <= i__4; ++i__) { - d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); - i__1 = ix; - z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, z__2.i = z__3.r * x[i__1].i + z__3.i * x[i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix += *incx; - /* L190: */ - } - } - i__4 = jx; - x[i__4].r = temp.r, x[i__4].i = temp.i; - jx += *incx; - /* L200: */ - } - } - } - } - - /* End of ZTBMV . */ - -} /* ztbmv_ */ diff --git a/blas/level1_real_impl.h b/blas/level1_real_impl.h index 202f4324d..074883d3e 100644 --- a/blas/level1_real_impl.h +++ b/blas/level1_real_impl.h @@ -108,23 +108,171 @@ EIGEN_BLAS_FUNC(rot)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scala Eigen::internal::apply_rotation_in_the_plane(vx, vy, Eigen::JacobiRotation(c, s)); } -/* -// performs rotation of points in the modified plane. -EIGEN_BLAS_FUNC(rotm)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *param) -{ - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); +// Applies modified Givens rotation H to vectors x and y. +// param[0] = flag: +// -1: H = [[h11, h12], [h21, h22]] (all 4 elements from param) +// 0: H = [[1, h12], [h21, 1]] (h12, h21 from param) +// 1: H = [[h11, 1], [-1, h22]] (h11, h22 from param) +// -2: H = identity (no-op) +// param[1..4] = h11, h21, h12, h22 +EIGEN_BLAS_FUNC(rotm)(int *n, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *param) { + Scalar *x = reinterpret_cast(px); + Scalar *y = reinterpret_cast(py); - // TODO + Scalar flag = param[0]; + if (*n <= 0 || flag == Scalar(-2)) return; - return 0; + Scalar h11, h12, h21, h22; + if (flag < Scalar(0)) { + h11 = param[1]; + h21 = param[2]; + h12 = param[3]; + h22 = param[4]; + } else if (flag == Scalar(0)) { + h11 = Scalar(1); + h21 = param[2]; + h12 = param[3]; + h22 = Scalar(1); + } else { + h11 = param[1]; + h21 = Scalar(-1); + h12 = Scalar(1); + h22 = param[4]; + } + + int kx = *incx > 0 ? 0 : (1 - *n) * *incx; + int ky = *incy > 0 ? 0 : (1 - *n) * *incy; + + for (int i = 0; i < *n; ++i) { + Scalar w = x[kx]; + Scalar z = y[ky]; + x[kx] = h11 * w + h12 * z; + y[ky] = h21 * w + h22 * z; + kx += *incx; + ky += *incy; + } } -// computes the modified parameters for a Givens rotation. -EIGEN_BLAS_FUNC(rotmg)(Scalar *d1, Scalar *d2, Scalar *x1, Scalar *x2, Scalar *param) -{ - // TODO +// Constructs the modified Givens transformation matrix H which zeros the second +// component of (sqrt(d1)*x1, sqrt(d2)*y1)^T. +EIGEN_BLAS_FUNC(rotmg)(Scalar *d1, Scalar *d2, Scalar *x1, Scalar *y1, Scalar *param) { + using std::abs; - return 0; + const Scalar gam = Scalar(4096); + const Scalar gamsq = gam * gam; + const Scalar rgamsq = Scalar(1) / gamsq; + + Scalar flag, h11 = Scalar(0), h12 = Scalar(0), h21 = Scalar(0), h22 = Scalar(0); + + if (*d1 < Scalar(0)) { + // Negative d1: zero everything. + flag = Scalar(-1); + *d1 = *d2 = *x1 = Scalar(0); + } else { + Scalar p2 = *d2 * *y1; + if (p2 == Scalar(0)) { + // d2*y1 == 0: identity transform. + param[0] = Scalar(-2); + return; + } + + Scalar p1 = *d1 * *x1; + Scalar q2 = p2 * *y1; + Scalar q1 = p1 * *x1; + bool do_scale = true; + + if (abs(q1) > abs(q2)) { + h21 = -(*y1) / *x1; + h12 = p2 / p1; + Scalar u = Scalar(1) - h12 * h21; + if (u <= Scalar(0)) { + flag = Scalar(-1); + h11 = h12 = h21 = h22 = Scalar(0); + *d1 = *d2 = *x1 = Scalar(0); + do_scale = false; + } else { + flag = Scalar(0); + *d1 /= u; + *d2 /= u; + *x1 *= u; + } + } else if (q2 < Scalar(0)) { + flag = Scalar(-1); + h11 = h12 = h21 = h22 = Scalar(0); + *d1 = *d2 = *x1 = Scalar(0); + do_scale = false; + } else { + flag = Scalar(1); + h11 = p1 / p2; + h22 = *x1 / *y1; + Scalar u = Scalar(1) + h11 * h22; + Scalar temp = *d2 / u; + *d2 = *d1 / u; + *d1 = temp; + *x1 = *y1 * u; + } + + if (do_scale) { + // Converts compact H representation (flag 0 or 1) to full form (flag -1) + // so that scaling factors can be absorbed into all four elements. + auto fix_h = [&]() { + if (flag >= Scalar(0)) { + if (flag == Scalar(0)) { + h11 = Scalar(1); + h22 = Scalar(1); + } else { + h21 = Scalar(-1); + h12 = Scalar(1); + } + flag = Scalar(-1); + } + }; + + // Scale d1 up if too small. + while (*d1 <= rgamsq && *d1 != Scalar(0)) { + fix_h(); + *d1 *= gamsq; + *x1 /= gam; + h11 /= gam; + h12 /= gam; + } + // Scale d1 down if too large. + while (*d1 >= gamsq) { + fix_h(); + *d1 /= gamsq; + *x1 *= gam; + h11 *= gam; + h12 *= gam; + } + // Scale |d2| up if too small. + while (abs(*d2) <= rgamsq && *d2 != Scalar(0)) { + fix_h(); + *d2 *= gamsq; + h21 /= gam; + h22 /= gam; + } + // Scale |d2| down if too large. + while (abs(*d2) >= gamsq) { + fix_h(); + *d2 /= gamsq; + h21 *= gam; + h22 *= gam; + } + } + } + + // Store result in param array. + if (flag < Scalar(0)) { + param[1] = h11; + param[2] = h21; + param[3] = h12; + param[4] = h22; + } else if (flag == Scalar(0)) { + param[2] = h21; + param[3] = h12; + } else { + param[1] = h11; + param[4] = h22; + } + param[0] = flag; } -*/ diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index d1ce492ac..f6172400e 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -72,31 +72,186 @@ EIGEN_BLAS_FUNC(hemv) if (actual_y != y) delete[] copy_back(actual_y, y, *n, *incy); } -/** ZHBMV performs the matrix-vector operation +/** HBMV performs the matrix-vector operation * * y := alpha*A*x + beta*y, * * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n hermitian band matrix, with k super-diagonals. + * Diagonal elements are real; off-diagonal contributions use conjugation. */ -// EIGEN_BLAS_FUNC(hbmv)(char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, -// RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) -// { -// return 1; -// } +EIGEN_BLAS_FUNC(hbmv) +(char *uplo, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, + RealScalar *py, int *incy) { + const Scalar alpha = *reinterpret_cast(palpha); + const Scalar beta = *reinterpret_cast(pbeta); + const Scalar *a = reinterpret_cast(pa); + const Scalar *x = reinterpret_cast(px); + Scalar *y = reinterpret_cast(py); -/** ZHPMV performs the matrix-vector operation + int info = 0; + if (UPLO(*uplo) == INVALID) + info = 1; + else if (*n < 0) + info = 2; + else if (*k < 0) + info = 3; + else if (*lda < *k + 1) + info = 6; + else if (*incx == 0) + info = 8; + else if (*incy == 0) + info = 11; + if (info) return xerbla_(SCALAR_SUFFIX_UP "HBMV ", &info); + + if (*n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return; + + int kx = *incx > 0 ? 0 : (1 - *n) * *incx; + int ky = *incy > 0 ? 0 : (1 - *n) * *incy; + + // First form y := beta*y. + if (beta != Scalar(1)) { + int iy = ky; + for (int i = 0; i < *n; ++i) { + y[iy] = (beta == Scalar(0)) ? Scalar(0) : beta * y[iy]; + iy += *incy; + } + } + + if (alpha == Scalar(0)) return; + + if (UPLO(*uplo) == UP) { + // Upper triangle: A[i,j] at a[(k+i-j) + j*lda], diagonal at row k. + int jx = kx, jy = ky; + for (int j = 0; j < *n; ++j) { + Scalar temp1 = alpha * x[jx]; + Scalar temp2 = Scalar(0); + int ix = kx, iy = ky; + for (int i = std::max(0, j - *k); i < j; ++i) { + Scalar aij = a[(*k + i - j) + j * *lda]; + y[iy] += temp1 * aij; + temp2 += Eigen::numext::conj(aij) * x[ix]; + ix += *incx; + iy += *incy; + } + // Diagonal is real. + y[jy] += Scalar(Eigen::numext::real(a[*k + j * *lda])) * temp1 + alpha * temp2; + jx += *incx; + jy += *incy; + if (j >= *k) { + kx += *incx; + ky += *incy; + } + } + } else { + // Lower triangle: A[i,j] at a[(i-j) + j*lda], diagonal at row 0. + int jx = kx, jy = ky; + for (int j = 0; j < *n; ++j) { + Scalar temp1 = alpha * x[jx]; + Scalar temp2 = Scalar(0); + // Diagonal is real. + y[jy] += Scalar(Eigen::numext::real(a[j * *lda])) * temp1; + int ix = jx, iy = jy; + for (int i = j + 1; i <= std::min(*n - 1, j + *k); ++i) { + ix += *incx; + iy += *incy; + Scalar aij = a[(i - j) + j * *lda]; + y[iy] += temp1 * aij; + temp2 += Eigen::numext::conj(aij) * x[ix]; + } + y[jy] += alpha * temp2; + jx += *incx; + jy += *incy; + } + } +} + +/** HPMV performs the matrix-vector operation * * y := alpha*A*x + beta*y, * * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n hermitian matrix, supplied in packed form. + * Diagonal elements are real; off-diagonal contributions use conjugation. */ -// EIGEN_BLAS_FUNC(hpmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar -// *beta, RealScalar *y, int *incy) -// { -// return 1; -// } +EIGEN_BLAS_FUNC(hpmv) +(char *uplo, int *n, RealScalar *palpha, RealScalar *pap, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, + int *incy) { + const Scalar alpha = *reinterpret_cast(palpha); + const Scalar beta = *reinterpret_cast(pbeta); + const Scalar *ap = reinterpret_cast(pap); + const Scalar *x = reinterpret_cast(px); + Scalar *y = reinterpret_cast(py); + + int info = 0; + if (UPLO(*uplo) == INVALID) + info = 1; + else if (*n < 0) + info = 2; + else if (*incx == 0) + info = 6; + else if (*incy == 0) + info = 9; + if (info) return xerbla_(SCALAR_SUFFIX_UP "HPMV ", &info); + + if (*n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return; + + int kx = *incx > 0 ? 0 : (1 - *n) * *incx; + int ky = *incy > 0 ? 0 : (1 - *n) * *incy; + + // First form y := beta*y. + if (beta != Scalar(1)) { + int iy = ky; + for (int i = 0; i < *n; ++i) { + y[iy] = (beta == Scalar(0)) ? Scalar(0) : beta * y[iy]; + iy += *incy; + } + } + + if (alpha == Scalar(0)) return; + + int kk = 0; + if (UPLO(*uplo) == UP) { + // Upper triangle packed. + int jx = kx, jy = ky; + for (int j = 0; j < *n; ++j) { + Scalar temp1 = alpha * x[jx]; + Scalar temp2 = Scalar(0); + int ix = kx, iy = ky; + for (int i = 0; i < j; ++i) { + y[iy] += temp1 * ap[kk + i]; + temp2 += Eigen::numext::conj(ap[kk + i]) * x[ix]; + ix += *incx; + iy += *incy; + } + // Diagonal is real. + y[jy] += Scalar(Eigen::numext::real(ap[kk + j])) * temp1 + alpha * temp2; + jx += *incx; + jy += *incy; + kk += j + 1; + } + } else { + // Lower triangle packed. + int jx = kx, jy = ky; + for (int j = 0; j < *n; ++j) { + Scalar temp1 = alpha * x[jx]; + Scalar temp2 = Scalar(0); + // Diagonal is real. + y[jy] += Scalar(Eigen::numext::real(ap[kk])) * temp1; + int ix = jx, iy = jy; + for (int i = 1; i < *n - j; ++i) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * ap[kk + i]; + temp2 += Eigen::numext::conj(ap[kk + i]) * x[ix]; + } + y[jy] += alpha * temp2; + jx += *incx; + jy += *incy; + kk += *n - j; + } + } +} /** ZHPR performs the hermitian rank 1 operation * diff --git a/blas/level2_impl.h b/blas/level2_impl.h index ca9f48f3d..f8394eedd 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -303,61 +303,92 @@ EIGEN_BLAS_FUNC(gbmv) if (actual_y != y) delete[] copy_back(actual_y, y, actual_m, *incy); } -#if 0 /** TBMV performs one of the matrix-vector operations - * - * x := A*x, or x := A'*x, - * - * where x is an n element vector and A is an n by n unit, or non-unit, - * upper or lower triangular band matrix, with ( k + 1 ) diagonals. - */ -EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) -{ - Scalar* a = reinterpret_cast(pa); - Scalar* x = reinterpret_cast(px); - int coeff_rows = *k + 1; + * + * x := A*x, or x := A'*x, or x := conjg(A')*x, + * + * where x is an n element vector and A is an n by n unit, or non-unit, + * upper or lower triangular band matrix, with ( k + 1 ) diagonals. + * + * Band storage: upper triangle stores A[i,j] at a[(k+i-j) + j*lda], + * lower triangle stores A[i,j] at a[(i-j) + j*lda]. + */ +EIGEN_BLAS_FUNC(tbmv) +(char *uplo, char *opa, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) { + Scalar *a = reinterpret_cast(pa); + Scalar *x = reinterpret_cast(px); int info = 0; - if(UPLO(*uplo)==INVALID) info = 1; - else if(OP(*opa)==INVALID) info = 2; - else if(DIAG(*diag)==INVALID) info = 3; - else if(*n<0) info = 4; - else if(*k<0) info = 5; - else if(*lda= 0; --j) { + if (actual_x[j] != Scalar(0)) { + Scalar temp = actual_x[j]; + for (int i = j + 1; i <= std::min(*n - 1, j + *k); ++i) actual_x[i] += temp * a[(i - j) + j * *lda]; + if (!unit) actual_x[j] = temp * a[j * *lda]; + } + } + } + } else { + // Transpose or conjugate transpose. + bool do_conj = (op == ADJ); + auto maybe_conj = [do_conj](Scalar val) -> Scalar { return do_conj ? Eigen::numext::conj(val) : val; }; - int ku = UPLO(*uplo)==UPPER ? *k : 0; - int kl = UPLO(*uplo)==LOWER ? *k : 0; - - for(int j=0; j<*n; ++j) - { - int start = std::max(0,j - ku); - int end = std::min((*m)-1,j + kl); - int len = end - start + 1; - int offset = (ku) - j + start; - - if(OP(*trans)==NOTR) - make_vector(actual_y+start,len) += (alpha*actual_x[j]) * mat_coeffs.col(j).segment(offset,len); - else if(OP(*trans)==TR) - actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).transpose() * make_vector(actual_x+start,len) ).value(); - else - actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).adjoint() * make_vector(actual_x+start,len) ).value(); + if (upper) { + // x := op(A)*x, upper band. Process columns right to left. + for (int j = *n - 1; j >= 0; --j) { + Scalar temp = actual_x[j]; + if (!unit) temp *= maybe_conj(a[*k + j * *lda]); + for (int i = std::max(0, j - *k); i < j; ++i) temp += maybe_conj(a[(*k + i - j) + j * *lda]) * actual_x[i]; + actual_x[j] = temp; + } + } else { + // x := op(A)*x, lower band. Process columns left to right. + for (int j = 0; j < *n; ++j) { + Scalar temp = actual_x[j]; + if (!unit) temp *= maybe_conj(a[j * *lda]); + for (int i = j + 1; i <= std::min(*n - 1, j + *k); ++i) temp += maybe_conj(a[(i - j) + j * *lda]) * actual_x[i]; + actual_x[j] = temp; + } + } } - if(actual_x!=x) delete[] actual_x; - if(actual_y!=y) delete[] copy_back(actual_y,y,actual_m,*incy); + if (actual_x != x) delete[] copy_back(actual_x, x, *n, *incx); } -#endif /** DTBSV solves one of the systems of equations * diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index 415944cc0..6072007ff 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -158,32 +158,187 @@ EIGEN_BLAS_FUNC(syr2) // func[code](*n, a, *inca, b, *incb, c, *ldc, alpha); } -/** DSBMV performs the matrix-vector operation +/** SBMV performs the matrix-vector operation * * y := alpha*A*x + beta*y, * * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n symmetric band matrix, with k super-diagonals. + * + * Band storage: upper triangle stores A[i,j] at a[(k+i-j) + j*lda], + * lower triangle stores A[i,j] at a[(i-j) + j*lda]. */ -// EIGEN_BLAS_FUNC(sbmv)( char *uplo, int *n, int *k, RealScalar *alpha, RealScalar *a, int *lda, -// RealScalar *x, int *incx, RealScalar *beta, RealScalar *y, int *incy) -// { -// return 1; -// } +EIGEN_BLAS_FUNC(sbmv) +(char *uplo, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, + RealScalar *py, int *incy) { + const Scalar alpha = *reinterpret_cast(palpha); + const Scalar beta = *reinterpret_cast(pbeta); + const Scalar *a = reinterpret_cast(pa); + const Scalar *x = reinterpret_cast(px); + Scalar *y = reinterpret_cast(py); -/** DSPMV performs the matrix-vector operation + int info = 0; + if (UPLO(*uplo) == INVALID) + info = 1; + else if (*n < 0) + info = 2; + else if (*k < 0) + info = 3; + else if (*lda < *k + 1) + info = 6; + else if (*incx == 0) + info = 8; + else if (*incy == 0) + info = 11; + if (info) return xerbla_(SCALAR_SUFFIX_UP "SBMV ", &info); + + if (*n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return; + + int kx = *incx > 0 ? 0 : (1 - *n) * *incx; + int ky = *incy > 0 ? 0 : (1 - *n) * *incy; + + // First form y := beta*y. + if (beta != Scalar(1)) { + int iy = ky; + for (int i = 0; i < *n; ++i) { + y[iy] = (beta == Scalar(0)) ? Scalar(0) : beta * y[iy]; + iy += *incy; + } + } + + if (alpha == Scalar(0)) return; + + if (UPLO(*uplo) == UP) { + // Upper triangle: A[i,j] at a[(k+i-j) + j*lda], diagonal at row k. + int jx = kx, jy = ky; + for (int j = 0; j < *n; ++j) { + Scalar temp1 = alpha * x[jx]; + Scalar temp2 = Scalar(0); + int ix = kx, iy = ky; + for (int i = std::max(0, j - *k); i < j; ++i) { + Scalar aij = a[(*k + i - j) + j * *lda]; + y[iy] += temp1 * aij; + temp2 += aij * x[ix]; + ix += *incx; + iy += *incy; + } + y[jy] += temp1 * a[*k + j * *lda] + alpha * temp2; + jx += *incx; + jy += *incy; + if (j >= *k) { + kx += *incx; + ky += *incy; + } + } + } else { + // Lower triangle: A[i,j] at a[(i-j) + j*lda], diagonal at row 0. + int jx = kx, jy = ky; + for (int j = 0; j < *n; ++j) { + Scalar temp1 = alpha * x[jx]; + Scalar temp2 = Scalar(0); + y[jy] += temp1 * a[j * *lda]; + int ix = jx, iy = jy; + for (int i = j + 1; i <= std::min(*n - 1, j + *k); ++i) { + ix += *incx; + iy += *incy; + Scalar aij = a[(i - j) + j * *lda]; + y[iy] += temp1 * aij; + temp2 += aij * x[ix]; + } + y[jy] += alpha * temp2; + jx += *incx; + jy += *incy; + } + } +} + +/** SPMV performs the matrix-vector operation * * y := alpha*A*x + beta*y, * * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n symmetric matrix, supplied in packed form. * + * Packed storage: upper triangle stores columns sequentially so that + * column j occupies positions kk..kk+j (where kk = j*(j+1)/2), + * lower triangle stores column j at positions kk..kk+(n-j-1). */ -// EIGEN_BLAS_FUNC(spmv)(char *uplo, int *n, RealScalar *alpha, RealScalar *ap, RealScalar *x, int *incx, RealScalar -// *beta, RealScalar *y, int *incy) -// { -// return 1; -// } +EIGEN_BLAS_FUNC(spmv) +(char *uplo, int *n, RealScalar *palpha, RealScalar *pap, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, + int *incy) { + const Scalar alpha = *reinterpret_cast(palpha); + const Scalar beta = *reinterpret_cast(pbeta); + const Scalar *ap = reinterpret_cast(pap); + const Scalar *x = reinterpret_cast(px); + Scalar *y = reinterpret_cast(py); + + int info = 0; + if (UPLO(*uplo) == INVALID) + info = 1; + else if (*n < 0) + info = 2; + else if (*incx == 0) + info = 6; + else if (*incy == 0) + info = 9; + if (info) return xerbla_(SCALAR_SUFFIX_UP "SPMV ", &info); + + if (*n == 0 || (alpha == Scalar(0) && beta == Scalar(1))) return; + + int kx = *incx > 0 ? 0 : (1 - *n) * *incx; + int ky = *incy > 0 ? 0 : (1 - *n) * *incy; + + // First form y := beta*y. + if (beta != Scalar(1)) { + int iy = ky; + for (int i = 0; i < *n; ++i) { + y[iy] = (beta == Scalar(0)) ? Scalar(0) : beta * y[iy]; + iy += *incy; + } + } + + if (alpha == Scalar(0)) return; + + int kk = 0; + if (UPLO(*uplo) == UP) { + // Upper triangle packed. + int jx = kx, jy = ky; + for (int j = 0; j < *n; ++j) { + Scalar temp1 = alpha * x[jx]; + Scalar temp2 = Scalar(0); + int ix = kx, iy = ky; + for (int i = 0; i < j; ++i) { + y[iy] += temp1 * ap[kk + i]; + temp2 += ap[kk + i] * x[ix]; + ix += *incx; + iy += *incy; + } + y[jy] += temp1 * ap[kk + j] + alpha * temp2; + jx += *incx; + jy += *incy; + kk += j + 1; + } + } else { + // Lower triangle packed. + int jx = kx, jy = ky; + for (int j = 0; j < *n; ++j) { + Scalar temp1 = alpha * x[jx]; + Scalar temp2 = Scalar(0); + y[jy] += temp1 * ap[kk]; + int ix = jx, iy = jy; + for (int i = 1; i < *n - j; ++i) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * ap[kk + i]; + temp2 += ap[kk + i] * x[ix]; + } + y[jy] += alpha * temp2; + jx += *incx; + jy += *incy; + kk += *n - j; + } + } +} /** DSPR performs the symmetric rank 1 operation * diff --git a/blas/lsame.cpp b/blas/lsame.cpp new file mode 100644 index 000000000..c65b5b0a1 --- /dev/null +++ b/blas/lsame.cpp @@ -0,0 +1,15 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include "blas.h" + +// LSAME returns true if ca and cb are the same letter, regardless of case. +extern "C" EIGEN_BLAS_API int lsame_(const char *ca, const char *cb) { + return std::toupper(static_cast(*ca)) == std::toupper(static_cast(*cb)); +}