862 lines
33 KiB
C++
862 lines
33 KiB
C++
/*
|
|
* tval3.cpp
|
|
*
|
|
* Created on: Mar 21, 2011d
|
|
* Author: ditlevsen
|
|
*/
|
|
|
|
#include <cstdlib>
|
|
#include <cmath>
|
|
#include <omp.h>
|
|
#include <complex>
|
|
#include <string>
|
|
#include "tval3_types.h"
|
|
#include "utility.h"
|
|
#include "timestamp.h"
|
|
#include "blas_wrap.h"
|
|
#include "mat_mult.h"
|
|
#include <mkl.h>
|
|
#include <fstream>
|
|
#include <cfloat>
|
|
|
|
using namespace std;
|
|
|
|
//#define PROFILING
|
|
|
|
#ifdef PROFILING
|
|
double t_mult, t_d, t_dt;
|
|
bool rec_time;
|
|
#endif
|
|
|
|
// this struct holds various values, accessed by the functions get_g(), update_g(), update_W() and update_mlp()
|
|
// the member names correspond to the variable names in the MATLAB-version
|
|
template<class T_comp, class T_scal>
|
|
struct tval3_data { // parameters and intermediate results...
|
|
// dimensions...
|
|
int p; // y-dimension of reconstruction volume
|
|
int q; // x-dimension of reconstruction volume
|
|
int r; // z-dimension of reconstruction volume
|
|
int n; // number of pixels (n = p*q*r)
|
|
int m; // number of measurements
|
|
|
|
// penalty parameters...
|
|
T_scal mu;
|
|
T_scal beta;
|
|
T_scal muDbeta; // mu/beta
|
|
|
|
// multiplier...
|
|
mat<T_comp> delta;
|
|
mat<T_comp> sigmaX;
|
|
mat<T_comp> sigmaY;
|
|
mat<T_comp> sigmaZ;
|
|
|
|
// lagrangian function and sub terms...
|
|
T_comp f;
|
|
T_scal lam1;
|
|
T_scal lam2;
|
|
T_scal lam3;
|
|
T_comp lam4;
|
|
T_comp lam5;
|
|
|
|
mat<T_comp> Up;
|
|
mat<T_comp> dU; // U-Up, after steepest descent
|
|
mat<T_comp> uup; // U-Up, after "backtracking"
|
|
mat<T_comp> Ux; // gradients in x-direction
|
|
mat<T_comp> Uxp;
|
|
mat<T_comp> dUx;
|
|
mat<T_comp> Uy; // gradients in y-direction
|
|
mat<T_comp> Uyp;
|
|
mat<T_comp> dUy;
|
|
mat<T_comp> Uz; // gradients in y-direction
|
|
mat<T_comp> Uzp;
|
|
mat<T_comp> dUz;
|
|
mat<T_comp> Wx;
|
|
mat<T_comp> Wy;
|
|
mat<T_comp> Wz;
|
|
mat<T_comp> Atb; // A'*b
|
|
mat<T_comp> Au; // A*u
|
|
mat<T_comp> Aup;
|
|
mat<T_comp> dAu;
|
|
mat<T_comp> d; // gradient of objective function (2.28)
|
|
mat<T_comp> g; // sub term of (2.28)
|
|
mat<T_comp> gp;
|
|
mat<T_comp> dg;
|
|
mat<T_comp> g2; // sub term of (2.28)
|
|
mat<T_comp> g2p;
|
|
mat<T_comp> dg2;
|
|
tval3_data<T_comp, T_scal>(const mat<T_comp> &U, const mat<T_comp> &b, T_scal mu0, T_scal beta0):
|
|
p(U.dim_y), q(U.dim_x), r(U.dim_z), n(U.len), m(b.len), mu(mu0), beta(beta0), delta(b.len), sigmaX(U.dim_y, U.dim_x, U.dim_z),
|
|
sigmaY(U.dim_y, U.dim_x, U.dim_z), sigmaZ(U.dim_y, U.dim_x, U.dim_z), Up(U.dim_y, U.dim_x, U.dim_z), dU(U.dim_y, U.dim_x, U.dim_z),
|
|
uup(U.len), Ux(U.dim_y, U.dim_x, U.dim_z), Uxp(U.dim_y, U.dim_x, U.dim_z), dUx(U.dim_y, U.dim_x, U.dim_z), Uy(U.dim_y, U.dim_x, U.dim_z),
|
|
Uyp(U.dim_y, U.dim_x, U.dim_z), dUy(U.dim_y, U.dim_x, U.dim_z), Uz(U.dim_y, U.dim_x, U.dim_z), Uzp(U.dim_y, U.dim_x, U.dim_z),
|
|
dUz(U.dim_y, U.dim_x, U.dim_z), Wx(U.dim_y, U.dim_x, U.dim_z), Wy(U.dim_y, U.dim_x, U.dim_z), Wz(U.dim_y, U.dim_x, U.dim_z), Atb(U.len),
|
|
Au(b.len), Aup(b.len), dAu(b.len), d(U.dim_y, U.dim_x, U.dim_z), g(U.len), gp(U.len), dg(U.len), g2(U.len), g2p(U.len), dg2(U.len) {}
|
|
};
|
|
|
|
// z <- x + alpha*y
|
|
// N: number of elements
|
|
template<class T_mat, class T_scal>
|
|
void vec_add(const int N, const int stride, const T_mat *x, const T_scal alpha, const T_mat *y, T_mat *z) {
|
|
#pragma vector always
|
|
#pragma parallel always
|
|
for(int i=0; i<N; i++)
|
|
z[i*stride] = x[i*stride] + alpha*y[i*stride];
|
|
}
|
|
|
|
template<class T_mat, class T_scal>
|
|
void vec_add(const int N, const int stride, const std::complex<T_mat> *x, const T_scal alpha, const std::complex<T_mat> *y, std::complex<T_mat> *z) {
|
|
T_mat *data_x = (T_mat *)x;
|
|
T_mat *data_y = (T_mat *)y;
|
|
T_mat *data_z = (T_mat *)z;
|
|
#pragma vector always
|
|
#pragma parallel always
|
|
for(int i=0; i<N; i++) {
|
|
data_z[2*i*stride] = data_x[2*i*stride] + alpha*data_y[2*i*stride];
|
|
data_z[2*i*stride + 1] = data_x[2*i*stride + 1] + alpha*data_y[2*i*stride + 1];
|
|
}
|
|
}
|
|
|
|
// Scales the measurement vector. Returns the scale factor.
|
|
template<class T_comp, class T_scal>
|
|
T_scal scaleb(mat<T_comp> &b) {
|
|
T_scal scl=1;
|
|
if(b.len > 0) {
|
|
T_scal threshold1 = 0.5;
|
|
T_scal threshold2 = 1.5;
|
|
scl=1;
|
|
T_comp val;
|
|
T_scal val_abs;
|
|
T_comp bmin = b[0];
|
|
T_comp bmax = bmin;
|
|
T_scal bmin_abs = abs(bmin);
|
|
T_scal bmax_abs = bmin_abs;
|
|
for(int i=0; i < b.len; i++) {
|
|
val = b[i];
|
|
val_abs = abs(val);
|
|
if(val_abs < bmin_abs) {
|
|
bmin = val;
|
|
bmin_abs = val_abs;
|
|
}
|
|
if(val_abs > bmax_abs) {
|
|
bmax = val;
|
|
bmax_abs = val_abs;
|
|
}
|
|
}
|
|
T_scal b_dif = abs(bmax - bmin);
|
|
if(b_dif < threshold1) {
|
|
scl = threshold1/b_dif;
|
|
cblas_scal(b.len, scl, b.data(), 1);
|
|
} else if(b_dif > threshold2) {
|
|
scl = threshold2/b_dif;
|
|
cblas_scal(b.len, scl, b.data(), 1);
|
|
}
|
|
}
|
|
return scl;
|
|
}
|
|
|
|
template<class T_comp, class T_scal>
|
|
void D(mat<T_comp> &Ux, mat<T_comp> &Uy, mat<T_comp> &Uz, const mat<T_comp> &U) {
|
|
#ifdef PROFILING
|
|
double start = get_timestamp();
|
|
#endif
|
|
vec_add(U.dim_x*U.dim_y*U.dim_z - 1, 1, U.data()+1, -1, U.data(), Ux.data());
|
|
vec_add(U.dim_y*U.dim_z, U.dim_x, U.data(), -1, U.data()+U.dim_x-1, Ux.data()+U.dim_x-1);
|
|
|
|
vec_add((U.dim_y*U.dim_z-1)*U.dim_x, 1, U.data() + U.dim_x, -1, U.data(), Uy.data());
|
|
for(int z = 0; z < U.dim_z; z++)
|
|
vec_add(U.dim_x, 1, U.data() + z*U.dim_x*U.dim_y, -1, U.data() + (U.dim_y-1)*U.dim_x + z*U.dim_x*U.dim_y,
|
|
Uy.data() + (U.dim_y-1)*U.dim_x + z*U.dim_x*U.dim_y);
|
|
|
|
vec_add(U.dim_x*U.dim_y*(U.dim_z - 1), 1, U.data() + U.dim_x*U.dim_y, -1, U.data(), Uz.data());
|
|
vec_add(U.dim_x*U.dim_y, 1, U.data(), -1, U.data() + (U.dim_z - 1) * U.dim_x * U.dim_y,
|
|
Uz.data() + (U.dim_z - 1) * U.dim_x * U.dim_y);
|
|
#ifdef PROFILING
|
|
double stop = get_timestamp();
|
|
if(rec_time) t_d += stop - start;
|
|
#endif
|
|
}
|
|
|
|
template<class T_comp, class T_scal>
|
|
void Dt(mat<T_comp> &res, const mat<T_comp> &X, const mat<T_comp> &Y, const mat<T_comp> &Z) {
|
|
#ifdef PROFILING
|
|
double start = get_timestamp();
|
|
#endif
|
|
vec_add(X.len-1, 1, X.data(), -1, X.data()+1, res.data()+1);
|
|
vec_add(X.dim_y * X.dim_z, X.dim_x, X.data()+X.dim_x-1, -1, X.data(), res.data());
|
|
|
|
cblas_axpy((Y.dim_y*Y.dim_z-1)*Y.dim_x, (T_comp)1, Y.data(), 1, res.data()+Y.dim_x, 1);
|
|
cblas_axpy((Y.dim_y*Y.dim_z-1)*Y.dim_x, (T_comp)-1, Y.data()+Y.dim_x, 1, res.data()+Y.dim_x, 1);
|
|
for(int z = 0; z < Y.dim_z; z++) {
|
|
cblas_axpy(Y.dim_x, (T_comp)1, Y.data() + (Y.dim_y-1)*Y.dim_x + z*Y.dim_x*Y.dim_y, 1, res.data() + z*Y.dim_x*Y.dim_y, 1);
|
|
cblas_axpy(Y.dim_x, (T_comp)-1, Y.data() + z*Y.dim_x*Y.dim_y, 1, res.data() + z*Y.dim_x*Y.dim_y, 1);
|
|
}
|
|
|
|
cblas_axpy(Z.dim_y*Z.dim_x*(Z.dim_z-1), (T_comp)1, Z.data(), 1, res.data()+Z.dim_y*Z.dim_x, 1);
|
|
cblas_axpy(Z.dim_y*Z.dim_x*(Z.dim_z-1), (T_comp)-1, Z.data()+Z.dim_y*Z.dim_x, 1, res.data()+Z.dim_y*Z.dim_x, 1);
|
|
cblas_axpy(Z.dim_y*Z.dim_x, (T_comp)1, Z.data() + (Z.dim_z-1)*Z.dim_x*Z.dim_y, 1, res.data(), 1);
|
|
cblas_axpy(Z.dim_y*Z.dim_x, (T_comp)-1, Z.data(), 1, res.data(), 1);
|
|
#ifdef PROFILING
|
|
double stop = get_timestamp();
|
|
if(rec_time) t_dt += stop - start;
|
|
#endif
|
|
}
|
|
|
|
// overloaded function get_g_mult calculates:
|
|
// Au = A*U...
|
|
// g = A'*Au...
|
|
|
|
// standrad version (parameter submat_mb not used)
|
|
template<class T_comp, class T_scal, class T_A>
|
|
void get_g_mult(tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &U, int submat_mb) {
|
|
gemv(CblasNoTrans, A, U, data.Au);
|
|
gemv(CblasConjTrans, A, data.Au, data.g);
|
|
}
|
|
|
|
// special version for dense matrices (cache blocking for single layer reconstructions....)
|
|
// submat_mb: size of submatrices in MB
|
|
template<class T_comp, class T_scal>
|
|
void get_g_mult(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &A, const mat<T_comp> &U, int submat_mb) {
|
|
|
|
int submat_elements = submat_mb*1024*1024 / sizeof(T_comp);
|
|
|
|
mat<T_comp> g_tmp(data.n);
|
|
|
|
int submat_dim_y = max(submat_elements / data.n, 1);
|
|
|
|
int n_y = (data.m + submat_dim_y - 1) / submat_dim_y;
|
|
|
|
int r;
|
|
memset(data.g.data(), 0, data.n * sizeof(T_comp));
|
|
for(int y=0; y < n_y; y++) {
|
|
r = min(submat_dim_y, data.m - y * submat_dim_y);
|
|
|
|
cblas_gemv(CblasRowMajor, CblasNoTrans, r, data.n, 1, A.data() + y * submat_dim_y * data.n, data.n, U.data(), 1, 0, data.Au.data() + y * submat_dim_y, 1);
|
|
|
|
cblas_gemv(CblasRowMajor, CblasConjTrans, r, data.n, 1, A.data() + y * submat_dim_y * data.n, data.n, data.Au.data() + y * submat_dim_y, 1, 0, g_tmp.data(), 1);
|
|
|
|
cblas_axpy(data.n, 1, g_tmp.data(), 1, data.g.data(), 1);
|
|
}
|
|
}
|
|
|
|
template<class T_comp, class T_scal, class T_A>
|
|
void get_g(tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &U, const mat<T_comp> &b, int submat_mb) {
|
|
#ifdef PROFILING
|
|
double start = get_timestamp();
|
|
#endif
|
|
get_g_mult(data, A, U, submat_mb);
|
|
#ifdef PROFILING
|
|
double stop = get_timestamp();
|
|
if(rec_time) t_mult += stop - start;
|
|
#endif
|
|
|
|
cblas_axpy(data.n, (T_comp)-1, data.Atb.data(), 1, data.g.data(), 1);
|
|
|
|
// update g2, lam2, lam4...
|
|
mat<T_comp> Vx(data.p, data.q, data.r);
|
|
mat<T_comp> Vy(data.p, data.q, data.r);
|
|
mat<T_comp> Vz(data.p, data.q, data.r);
|
|
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
|
|
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
|
|
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
|
|
Dt<T_comp, T_scal>(data.g2, Vx, Vy, Vz);
|
|
|
|
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
|
|
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
|
|
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
|
|
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
|
|
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
|
|
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
|
|
|
|
//update lam3, lam5...
|
|
mat<T_comp> Aub(data.m);
|
|
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
|
|
data.lam3=real(cblas_dotc(data.m, Aub.data(), 1, Aub.data(), 1));
|
|
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
|
|
|
|
data.f = (data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3) - data.lam4 - data.lam5;
|
|
}
|
|
|
|
template<class T_comp, class T_scal, class T_A>
|
|
T_scal get_tau(tval3_data<T_comp, T_scal> &data, const T_A &A, bool fst_iter) {
|
|
T_scal tau;
|
|
|
|
// calculate tau...
|
|
if(fst_iter) {
|
|
mat<T_comp> dx(data.p, data.q, data.r);
|
|
mat<T_comp> dy(data.p, data.q, data.r);
|
|
mat<T_comp> dz(data.p, data.q, data.r);
|
|
D<T_comp, T_scal>(dx, dy, dz, data.d);
|
|
T_comp dDd = cblas_dotc(data.n, dx.data(), 1, dx.data(), 1) +
|
|
cblas_dotc(data.n, dy.data(), 1, dy.data(), 1) +
|
|
cblas_dotc(data.n, dz.data(), 1, dz.data(), 1);
|
|
T_comp dd = cblas_dotc(data.n, data.d.data(), 1, data.d.data(), 1);
|
|
mat<T_comp> Ad(data.m);
|
|
// Ad=A*d...
|
|
#ifdef PROFILING
|
|
double start = get_timestamp();
|
|
#endif
|
|
gemv(CblasNoTrans, A, data.d, Ad);
|
|
#ifdef PROFILING
|
|
double stop = get_timestamp();
|
|
if(rec_time) t_mult += stop - start;
|
|
#endif
|
|
T_comp Add = cblas_dotc(data.m, Ad.data(), 1, Ad.data(), 1);
|
|
|
|
tau = abs(dd/(dDd + data.muDbeta*Add));
|
|
} else {
|
|
vec_add(data.n, 1, data.g.data(), -1, data.gp.data(), data.dg.data());
|
|
vec_add(data.n, 1, data.g2.data(), -1, data.g2p.data(), data.dg2.data());
|
|
T_comp ss = cblas_dotc(data.n, data.uup.data(), 1, data.uup.data(), 1);
|
|
mat<T_comp> tmp(data.dg2);
|
|
cblas_axpy(data.n, data.muDbeta, data.dg.data(), 1, tmp.data(), 1);
|
|
T_comp sy = cblas_dotc(data.n, data.uup.data(), 1, tmp.data(), 1);
|
|
tau = abs(ss/sy);
|
|
}
|
|
return tau;
|
|
}
|
|
|
|
template<class T_mat>
|
|
void nonneg(mat<T_mat> &U) {
|
|
T_mat val;
|
|
#pragma parallel always
|
|
#pragma vector always
|
|
for(int i=0; i < U.len; i++) {
|
|
val = U[i];
|
|
U[i] = (val < 0) ? 0 : val;
|
|
}
|
|
}
|
|
|
|
template<class T_mat>
|
|
void nonneg(mat<complex<T_mat> > &U) {
|
|
T_mat val;
|
|
T_mat *data = (T_mat *) U.data();
|
|
#pragma parallel always
|
|
#pragma vector always
|
|
for(int i=0; i < U.len; i++) {
|
|
val = data[2*i];
|
|
data[2*i] = (val < 0) ? 0 : val;
|
|
data[2*i + 1] = 0;
|
|
}
|
|
}
|
|
|
|
template<class T_comp, class T_scal, class T_A>
|
|
void descend(mat<T_comp> &U, tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &b, T_scal tau, bool non_neg,
|
|
int submat_mb) {
|
|
cblas_axpy(data.n, -1*tau, data.d.data(), 1, U.data(),1); // U = U - tau*d
|
|
if(non_neg) {
|
|
nonneg(U);
|
|
}
|
|
D<T_comp, T_scal>(data.Ux, data.Uy, data.Uz, U);
|
|
get_g<T_comp, T_scal>(data, A, U, b, submat_mb);
|
|
}
|
|
|
|
template<class T_comp, class T_scal, class T_A>
|
|
void update_g(tval3_data<T_comp, T_scal> &data, const T_scal alpha, const T_A &A, mat<T_comp> &U, const mat<T_comp> &b) {
|
|
vec_add(data.n, 1, data.gp.data(), alpha, data.dg.data(), data.g.data());
|
|
vec_add(data.n, 1, data.g2p.data(), alpha, data.dg2.data(), data.g2.data());
|
|
vec_add(data.n, 1, data.Up.data(), alpha, data.dU.data(), U.data());
|
|
vec_add(data.m, 1, data.Aup.data(), alpha, data.dAu.data(), data.Au.data());
|
|
vec_add(data.n, 1, data.Uxp.data(), alpha, data.dUx.data(), data.Ux.data());
|
|
vec_add(data.n, 1, data.Uyp.data(), alpha, data.dUy.data(), data.Uy.data());
|
|
vec_add(data.n, 1, data.Uzp.data(), alpha, data.dUz.data(), data.Uz.data());
|
|
|
|
// update lam2, lam4...
|
|
mat<T_comp> Vx(data.p, data.q, data.r);
|
|
mat<T_comp> Vy(data.p, data.q, data.r);
|
|
mat<T_comp> Vz(data.p, data.q, data.r);
|
|
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
|
|
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
|
|
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
|
|
|
|
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
|
|
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
|
|
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
|
|
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
|
|
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
|
|
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
|
|
|
|
//update lam3, lam5...
|
|
mat<T_comp> Aub(data.m);
|
|
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
|
|
data.lam3 =real(cblas_dotc(data.m, Aub.data(), 1, Aub.data(), 1));
|
|
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
|
|
|
|
data.f = data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3 - data.lam4 - data.lam5;
|
|
}
|
|
|
|
template<class T_comp, class T_scal, class T_A>
|
|
void min_u(tval3_data<T_comp, T_scal> &data, mat<T_comp> &U, T_scal &gam, const T_A &A, const mat<T_comp> &b,
|
|
const tval3_options<T_scal> &opts, T_comp C, bool fst_iter, int submat_mb) {
|
|
T_scal tau, alpha, c_armin;
|
|
|
|
tau = get_tau<T_comp, T_scal, T_A>(data, A, fst_iter);
|
|
|
|
// keep previous values...
|
|
data.Up = U; data.gp = data.g; data.g2p = data.g2;
|
|
data.Aup = data.Au; data.Uxp = data.Ux; data.Uyp = data.Uy;
|
|
data.Uzp = data.Uz;
|
|
|
|
// one step steepest descend...
|
|
descend<T_comp, T_scal, T_A>(U, data, A, b, tau, opts.nonneg, submat_mb);
|
|
|
|
// NMLA...
|
|
alpha=1;
|
|
vec_add(data.n, 1, U.data(), -1, data.Up.data(), data.dU.data()); // Ud = U - Up
|
|
c_armin = real(cblas_dotc(data.n, data.d.data(), 1, data.d.data(), 1) * tau * opts.c * data.beta); // c_armin = d'*d*tau*c*beta
|
|
if(abs(data.f) > abs(C - alpha*c_armin)) { // Arminjo condition...
|
|
|
|
vec_add(data.n, 1, data.g.data(), -1, data.gp.data(), data.dg.data()); // dg=g-gp
|
|
vec_add(data.n, 1, data.g2.data(), -1, data.g2p.data(), data.dg2.data()); // dg2=g2-g2p
|
|
vec_add(data.m, 1, data.Au.data(), -1, data.Aup.data(), data.dAu.data()); // dAu=Au-Aup
|
|
vec_add(data.n, 1, data.Ux.data(), -1, data.Uxp.data(), data.dUx.data()); // dUx=Ux-Uxp
|
|
vec_add(data.n, 1, data.Uy.data(), -1, data.Uyp.data(), data.dUy.data()); // Uy = Uy-Uyp
|
|
vec_add(data.n, 1, data.Uz.data(), -1, data.Uzp.data(), data.dUz.data()); // Uz = Uz-Uzp
|
|
|
|
int cnt=0;
|
|
|
|
while(abs(data.f) > abs(C - alpha*c_armin)) { // Arminjo condition...
|
|
|
|
if(cnt==5) { // "backtracking" not successful...
|
|
gam *= opts.rate_gam;
|
|
tau = get_tau<T_comp, T_scal, T_A>(data, A, true);
|
|
U = data.Up;
|
|
descend<T_comp, T_scal, T_A>(U, data, A, b, tau, opts.nonneg, submat_mb);
|
|
break;
|
|
}
|
|
alpha *= opts.gamma;
|
|
|
|
update_g<T_comp, T_scal, T_A>(data, alpha, A, U, b);
|
|
|
|
cnt++;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class T_comp, class T_scal>
|
|
void get_gradient(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &DtsAtd) {
|
|
// d = g2 + muDbeta*g + DtsAtd... (DtsAtd has opposite sign, compared to the MATLAB-version!)
|
|
data.d = DtsAtd;
|
|
cblas_axpy(data.n, 1, data.g2.data(), 1, data.d.data(), 1);
|
|
cblas_axpy(data.n, data.muDbeta, data.g.data(), 1, data.d.data(), 1);
|
|
}
|
|
|
|
// scalar
|
|
template<class T_mat, class T_scal>
|
|
void shrinkage(tval3_data<T_mat, T_scal> &data) {
|
|
T_scal sum=0;
|
|
T_scal temp_wx, temp_wy, temp_wz;
|
|
T_mat Uxbar, Uybar, Uzbar;
|
|
#pragma parallel always
|
|
#pragma vector always
|
|
for(int i=0; i < data.n; i++) {
|
|
Uxbar = data.Ux[i] - data.sigmaX[i]/data.beta;
|
|
Uybar = data.Uy[i] - data.sigmaY[i]/data.beta;
|
|
Uzbar = data.Uz[i] - data.sigmaZ[i]/data.beta;
|
|
|
|
temp_wx = abs(Uxbar) - 1/data.beta;
|
|
temp_wy = abs(Uybar) - 1/data.beta;
|
|
temp_wz = abs(Uzbar) - 1/data.beta;
|
|
temp_wx = (temp_wx >= 0) ? temp_wx : 0;
|
|
temp_wy = (temp_wy >= 0) ? temp_wy : 0;
|
|
temp_wz = (temp_wz >= 0) ? temp_wz : 0;
|
|
sum += temp_wx + temp_wy + temp_wz;
|
|
data.Wx[i] = (Uxbar >= 0) ? temp_wx : -1 * temp_wx;
|
|
data.Wy[i] = (Uybar >= 0) ? temp_wy : -1 * temp_wy;
|
|
data.Wz[i] = (Uzbar >= 0) ? temp_wz : -1 * temp_wz;
|
|
}
|
|
data.lam1 = sum;
|
|
}
|
|
|
|
// complex
|
|
template<class T_mat, class T_scal>
|
|
void shrinkage(tval3_data<complex<T_mat>, T_scal> &data) {
|
|
T_mat sum=0;
|
|
T_mat temp_wx, temp_wy, temp_wz;
|
|
T_mat Uxbar_real, Uxbar_imag, Uxbar_abs, Uybar_real, Uybar_imag, Uybar_abs, Uzbar_real, Uzbar_imag, Uzbar_abs;
|
|
T_mat *Ux_data = (T_mat *) data.Ux.data();
|
|
T_mat *Uy_data = (T_mat *) data.Uy.data();
|
|
T_mat *Uz_data = (T_mat *) data.Uz.data();
|
|
T_mat *sigmaX_data = (T_mat *) data.sigmaX.data();
|
|
T_mat *sigmaY_data = (T_mat *) data.sigmaY.data();
|
|
T_mat *sigmaZ_data = (T_mat *) data.sigmaZ.data();
|
|
T_mat *Wx_data = (T_mat *) data.Wx.data();
|
|
T_mat *Wy_data = (T_mat *) data.Wy.data();
|
|
T_mat *Wz_data = (T_mat *) data.Wz.data();
|
|
const int len = data.n;
|
|
#pragma parallel always
|
|
#pragma vector always
|
|
for(int i=0; i < len; i++) {
|
|
Uxbar_real = Ux_data[2*i] - sigmaX_data[2*i]/data.beta;
|
|
Uxbar_imag = Ux_data[2*i + 1] - sigmaX_data[2*i + 1]/data.beta;
|
|
Uybar_real = Uy_data[2*i] - sigmaY_data[2*i]/data.beta;
|
|
Uybar_imag = Uy_data[2*i + 1] - sigmaY_data[2*i + 1]/data.beta;
|
|
Uzbar_real = Uz_data[2*i] - sigmaZ_data[2*i]/data.beta;
|
|
Uzbar_imag = Uz_data[2*i + 1] - sigmaZ_data[2*i + 1]/data.beta;
|
|
|
|
Uxbar_abs = sqrt(Uxbar_real*Uxbar_real + Uxbar_imag*Uxbar_imag);
|
|
Uybar_abs = sqrt(Uybar_real*Uybar_real + Uybar_imag*Uybar_imag);
|
|
Uzbar_abs = sqrt(Uzbar_real*Uzbar_real + Uzbar_imag*Uzbar_imag);
|
|
|
|
temp_wx = Uxbar_abs - 1/data.beta;
|
|
temp_wy = Uybar_abs - 1/data.beta;
|
|
temp_wz = Uzbar_abs - 1/data.beta;
|
|
temp_wx = (temp_wx >= 0) ? temp_wx : 0;
|
|
temp_wy = (temp_wy >= 0) ? temp_wy : 0;
|
|
temp_wz = (temp_wz >= 0) ? temp_wz : 0;
|
|
sum += temp_wx + temp_wy + temp_wz;
|
|
|
|
Wx_data[2*i] = (Uxbar_abs > 0) ? temp_wx*Uxbar_real/Uxbar_abs : 0;
|
|
Wx_data[2*i+1] = (Uxbar_abs > 0) ? temp_wx*Uxbar_imag/Uxbar_abs : 0;
|
|
Wy_data[2*i] = (Uybar_abs > 0) ? temp_wy*Uybar_real/Uybar_abs : 0;
|
|
Wy_data[2*i+1] = (Uybar_abs > 0) ? temp_wy*Uybar_imag/Uybar_abs : 0;
|
|
Wz_data[2*i] = (Uzbar_abs > 0) ? temp_wz*Uybar_real/Uzbar_abs : 0;
|
|
Wz_data[2*i+1] = (Uzbar_abs > 0) ? temp_wz*Uybar_imag/Uzbar_abs : 0;
|
|
}
|
|
data.lam1 = sum;
|
|
}
|
|
|
|
template<class T_comp, class T_scal>
|
|
void update_W(tval3_data<T_comp, T_scal> &data) {
|
|
|
|
data.f -= (data.lam1 + data.beta/2*data.lam2 - data.lam4);
|
|
|
|
shrinkage(data);
|
|
|
|
// update g2, lam2, lam4...
|
|
mat<T_comp> Vx(data.p, data.q, data.r);
|
|
mat<T_comp> Vy(data.p, data.q, data.r);
|
|
mat<T_comp> Vz(data.p, data.q, data.r);
|
|
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
|
|
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
|
|
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
|
|
Dt<T_comp, T_scal>(data.g2, Vx, Vy, Vz);
|
|
|
|
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
|
|
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
|
|
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
|
|
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
|
|
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
|
|
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
|
|
|
|
data.f += (data.lam1 + data.beta/2*data.lam2 - data.lam4);
|
|
}
|
|
|
|
template<class T_comp, class T_scal>
|
|
void update_mlp(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &b) {
|
|
data.f += (data.lam4 + data.lam5);
|
|
|
|
mat<T_comp> Vx(data.p, data.q, data.r);
|
|
mat<T_comp> Vy(data.p, data.q, data.r);
|
|
mat<T_comp> Vz(data.p, data.q, data.r);
|
|
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
|
|
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
|
|
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
|
|
|
|
cblas_axpy(data.n, -1*data.beta, Vx.data(), 1, data.sigmaX.data(), 1); // sigmaX -= beta*Vx
|
|
cblas_axpy(data.n, -1*data.beta, Vy.data(), 1, data.sigmaY.data(), 1); // sigmaY -= beta*Vy
|
|
cblas_axpy(data.n, -1*data.beta, Vz.data(), 1, data.sigmaZ.data(), 1); // sigmaz -= beta*Vz
|
|
|
|
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
|
|
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
|
|
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
|
|
|
|
mat<T_comp> Aub(data.m);
|
|
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
|
|
cblas_axpy(data.m, -1*data.mu, Aub.data(), 1, data.delta.data(), 1); // delta -= mu*Aub
|
|
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
|
|
|
|
data.f -= (data.lam4 + data.lam5);
|
|
}
|
|
|
|
template<class T_comp>
|
|
void check_params(mat<T_comp> &U, const mat<T_comp> &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
|
|
if(U.format != mat_row_major || Ut.format != mat_row_major || A.format != mat_row_major)
|
|
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
|
|
else if(Ut.len > 0 && Ut.len != U.len)
|
|
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
|
|
else if(U.len != A.dim_x)
|
|
throw tval3_exception("Argument error: the length of U must be equal to A.dim_x!");
|
|
else if(b.len != A.dim_y)
|
|
throw tval3_exception(string("Argument error: b.len must be equal to A.dim_y!"));
|
|
}
|
|
|
|
template<class T_comp>
|
|
void check_params(mat<T_comp> &U, const sparse_mat<T_comp> &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
|
|
if(U.format != mat_row_major || Ut.format != mat_row_major)
|
|
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
|
|
if(A.format != sparse_mat_csc)
|
|
throw tval3_exception(string("Argument error: A must be in CSC format!"));
|
|
if(Ut.len > 0 && Ut.len != U.len)
|
|
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
|
|
else if(U.len != A.dim_x)
|
|
throw tval3_exception("Argument error: the length of U must be equal to A.dim_x!");
|
|
else if(b.len != A.dim_y)
|
|
throw tval3_exception(string("Argument error: b.len must be equal to A.dim_y!"));
|
|
}
|
|
|
|
template<class T_comp>
|
|
void check_params(mat<T_comp> &U, const geometry &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
|
|
if(U.format != mat_row_major || Ut.format != mat_row_major)
|
|
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
|
|
else if(Ut.len > 0 && Ut.len != U.len)
|
|
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
|
|
else if(b.len != A.num_emitters * A.num_receivers)
|
|
throw tval3_exception(string("Argument error: b.len must be equal to A.num_emitters * A.num_receivers!"));
|
|
}
|
|
|
|
template<class T_comp, class T_scal, class T_A>
|
|
const tval3_info<T_scal> tval3(mat<T_comp> &U, const T_A &A, const mat<T_comp> &b, const tval3_options<T_scal> &opts, const mat<T_comp> &Ut,
|
|
int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception) {
|
|
|
|
double start, stop;
|
|
#ifdef PROFILING
|
|
double start_loop, stop_loop;
|
|
t_mult = 0; t_d = 0; t_dt = 0; rec_time = false;
|
|
#endif
|
|
tval3_info<T_scal> info;
|
|
start = get_timestamp();
|
|
try {
|
|
|
|
check_params (U, A, b, Ut);
|
|
|
|
omp_set_num_threads(omp_get_num_procs() / virtual_procs_per_core);
|
|
|
|
mat<T_comp> bs = b; // create a copy of b (scaling...)
|
|
tval3_data<T_comp, T_scal> data(U, bs, opts.mu0, opts.beta0);
|
|
T_scal muf=opts.mu;
|
|
T_scal betaf=opts.beta, beta0=0;
|
|
mat<T_comp> DtsAtd(data.n);
|
|
T_scal gam = opts.gam;
|
|
T_scal scl = scaleb<T_comp, T_scal>(bs);
|
|
gemv(CblasConjTrans, A, bs, data.Atb); // Atb = A'*bs
|
|
U = data.Atb; // initial guess: U=A'*b
|
|
if(data.mu > muf) data.mu = muf;
|
|
if(data.beta > betaf) data.beta = betaf;
|
|
data.muDbeta = data.mu/data.beta;
|
|
mat <T_comp> rcdU(U), UrcdU(data.n);
|
|
D<T_comp, T_scal>(data.Ux, data.Uy, data.Uz, U);
|
|
shrinkage(data);
|
|
get_g<T_comp, T_scal>(data, A, U, bs, submat_mb);
|
|
get_gradient<T_comp, T_scal>(data, DtsAtd);
|
|
T_scal Q=1, Qp;
|
|
T_comp C=data.f;
|
|
int count_outer=0, count_total=0;
|
|
bool fst_iter;
|
|
T_scal RelChg, RelChgOut=0, nrmup=0;
|
|
|
|
#ifdef PROFILING
|
|
rec_time = true;
|
|
start_loop = get_timestamp();
|
|
#endif
|
|
|
|
while((count_outer < opts.maxcnt) && (count_total < opts.maxit)) {
|
|
|
|
fst_iter = true;
|
|
while(count_total < opts.maxit) {
|
|
// u-subproblem...
|
|
min_u<T_comp, T_scal>(data, U, gam, A, bs, opts, C, fst_iter, submat_mb);
|
|
|
|
// shrinkage like step...
|
|
update_W<T_comp, T_scal>(data);
|
|
|
|
// update reference values...
|
|
Qp = Q, Q = gam*Qp + 1; C = (gam*Qp*C + data.f)/Q;
|
|
vec_add(data.n, 1, U.data(), -1, data.Up.data(), data.uup.data());
|
|
|
|
// compute gradient...
|
|
get_gradient<T_comp, T_scal>(data, DtsAtd);
|
|
|
|
count_total++;
|
|
|
|
// calculate relative change...
|
|
nrmup = cblas_nrm2(data.n, data.Up.data(), 1);
|
|
RelChg = cblas_nrm2(data.n, data.uup.data(), 1) / nrmup;
|
|
|
|
if((RelChg < opts.tol_inn) && (!fst_iter))
|
|
break;
|
|
|
|
fst_iter = false;
|
|
|
|
}
|
|
count_outer++;
|
|
|
|
// calculate relative change...
|
|
vec_add(data.n, 1, U.data(), -1, rcdU.data(), UrcdU.data()); // UrcdU = U - rcdU
|
|
RelChgOut = cblas_nrm2(data.n, UrcdU.data(), 1) / nrmup;
|
|
rcdU = U;
|
|
|
|
if(RelChgOut < opts.tol)
|
|
break;
|
|
|
|
// update multipliers...
|
|
update_mlp<T_comp, T_scal>(data, bs);
|
|
|
|
// update penalty parameters for continuation scheme...
|
|
beta0 = data.beta;
|
|
data.beta *= opts.rate_cnt;
|
|
data.mu *= opts.rate_cnt;
|
|
if(data.beta > betaf) data.beta = betaf;
|
|
if(data.mu > muf) data.mu = muf;
|
|
data.muDbeta = data.mu/data.beta;
|
|
|
|
// update f...
|
|
data.f = data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3 - data.lam4 - data.lam5;
|
|
|
|
// DtsAtd = beta0/beta*d
|
|
DtsAtd = data.d;
|
|
cblas_scal(data.n, beta0/data.beta, DtsAtd.data(), 1);
|
|
|
|
// compute gradient...
|
|
get_gradient<T_comp, T_scal>(data, DtsAtd);
|
|
|
|
// reset reference values...
|
|
gam=opts.gam; Q=1; C=data.f;
|
|
}
|
|
#ifdef PROFILING
|
|
stop_loop = get_timestamp();
|
|
rec_time = false;
|
|
|
|
ofstream file;
|
|
file.open("profile.txt");
|
|
file << "loop [ms]: " << (stop_loop - start_loop)*1000 << "\n";
|
|
file << "mult [ms]: " << t_mult*1000 << "\n";
|
|
file << "d [ms]: " << t_d*1000 << "\n";
|
|
file << "dt [ms]: " << t_dt*1000 << "\n";
|
|
file.close();
|
|
#endif
|
|
|
|
// scale U. Take real part only, if opts.isreal==true
|
|
for(int i=0; i < data.n; i++) {
|
|
if(opts.isreal)
|
|
U[i] = (T_comp)real(U[i]);
|
|
U[i] = U[i]/scl;
|
|
}
|
|
|
|
stop = get_timestamp();
|
|
|
|
// generate return value...
|
|
info.secs = stop - start;
|
|
info.outer_iters = count_outer;
|
|
info.total_iters = count_total;
|
|
info.rel_chg = RelChgOut;
|
|
if(Ut.len == U.len) {
|
|
mat<T_comp> deltaU = Ut;
|
|
cblas_axpy(Ut.len, (T_comp)-1, U.data(), 1, deltaU.data(), 1);
|
|
T_scal nrm_deltaU = cblas_nrm2(deltaU.len, deltaU.data(), 1);
|
|
T_scal nrm_Ut = cblas_nrm2(Ut.len, Ut.data(), 1);
|
|
info.rel_error = nrm_deltaU/nrm_Ut;
|
|
} else {
|
|
info.rel_error = 0;
|
|
}
|
|
} catch (tval3_exception) {
|
|
throw;
|
|
} catch (const std::exception &ex) {
|
|
string msg = "Internal error: ";
|
|
msg += ex.what();
|
|
throw tval3_exception(msg);
|
|
}
|
|
|
|
return info;
|
|
}
|
|
|
|
|
|
|
|
|
|
const tval3_info<float> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b, const tval3_options<float> &opts,
|
|
const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<float, float, mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b,
|
|
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<float, double, mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<double> &U, const mat<double> &A, const mat<double> &b,
|
|
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
|
|
return tval3<double, double, mat<double> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<float> tval3(mat<complex<float> > &U, const mat<complex<float> > &A, const mat<complex<float> > &b,
|
|
const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<std::complex<float>, float, mat<std::complex<float> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<complex<double> > &U, const mat<complex<double> > &A, const mat<complex<double> > &b,
|
|
const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<std::complex<double>, double, mat<std::complex<double> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
|
|
const tval3_info<float> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b,
|
|
const tval3_options<float> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<float, float, sparse_mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b,
|
|
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<float, double, sparse_mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<double> &U, const sparse_mat<double> &A, const mat<double> &b,
|
|
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<double, double, sparse_mat<double> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<float> tval3(mat<complex<float> > &U, const sparse_mat<complex<float> > &A,
|
|
const mat<complex<float> > &b, const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<std::complex<float>, float, sparse_mat<std::complex<float> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<complex<double> > &U, const sparse_mat<complex<double> > &A,
|
|
const mat<complex<double> > &b, const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<std::complex<double>, double, sparse_mat<std::complex<double> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const tval3_info<float> tval3(mat<float> &U, const geometry &A, const mat<float> &b,
|
|
const tval3_options<float> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<float, float, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<float> &U, const geometry &A, const mat<float> &b,
|
|
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<float, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<double> &U, const geometry &A, const mat<double> &b,
|
|
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<double, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<float> tval3(mat<complex<float> > &U, const geometry &A,
|
|
const mat<complex<float> > &b, const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5,
|
|
int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<std::complex<float>, float, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|
|
|
|
const tval3_info<double> tval3(mat<complex<double> > &U, const geometry &A,
|
|
const mat<complex<double> > &b, const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5,
|
|
int virtual_procs_per_core=2) throw(tval3_exception)
|
|
{
|
|
return tval3<std::complex<double>, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
|
}
|