Commit source

This commit is contained in:
kradchen
2023-05-18 16:04:27 +08:00
parent 88cf81e4ea
commit c6cd188732
83 changed files with 39921 additions and 0 deletions

237
TVALCPU/src/blas_wrap.h Normal file
View File

@@ -0,0 +1,237 @@
/*
* blas_wrap.h
*
* Created on: Oct 7, 2011
* Author: ditlevsen
*/
#ifndef BLAS_WRAP_H_
#define BLAS_WRAP_H_
#include <complex>
#include <mkl_cblas.h>
#include <mkl_spblas.h>
// overloaded wrapper functions for blas / mkl sparse blas calls...
inline void cblas_scal(int N, float alpha, float *X, int incX) {
cblas_sscal(N, alpha, X, incX);
}
inline void cblas_scal(int N, double alpha, double *X, int incX) {
cblas_dscal(N, alpha, X, incX);
}
inline void cblas_scal(int N, std::complex<float> alpha, std::complex<float> *X, int incX) {
cblas_cscal(N, &alpha, X, incX);
}
inline void cblas_scal(int N, std::complex<double> alpha, std::complex<double> *X, int incX) {
cblas_zscal(N, &alpha, X, incX);
}
inline void cblas_scal(int N, float alpha, std::complex<float> *X, int incX) {
cblas_csscal(N, alpha, X, incX);
}
inline void cblas_scal(int N, double alpha, std::complex<double> *X, int incX) {
cblas_zdscal(N, alpha, X, incX);
}
inline void cblas_axpy(int N, float alpha, const float *X, int incX, float *Y, int incY) {
cblas_saxpy(N, alpha, X, incX, Y, incY);
}
inline void cblas_axpy(int N, double alpha, const double *X, int incX, double *Y, int incY) {
cblas_daxpy(N, alpha, X, incX, Y, incY);
}
inline void cblas_axpy(int N, std::complex<float> alpha, const std::complex<float> *X, int incX, std::complex<float> *Y, int incY) {
cblas_caxpy(N, &alpha, X, incX, Y, incY);
}
inline void cblas_axpy(int N, std::complex<double> alpha, const std::complex<double> *X, int incX, std::complex<double> *Y, int incY) {
cblas_zaxpy(N, &alpha, X, incX, Y, incY);
}
inline void cblas_copy(int N, const float *X, int incX, float *Y, int incY) {
cblas_scopy(N, X, incX, Y, incY);
}
inline void cblas_copy(int N, const double *X, int incX, double *Y, int incY) {
cblas_dcopy(N, X, incX, Y, incY);
}
inline void cblas_copy(int N, const std::complex<float> *X, int incX, std::complex<float> *Y, int incY) {
cblas_ccopy(N,X, incX, Y, incY);
}
inline void cblas_copy(int N, const std::complex<double> *X, int incX, std::complex<double> *Y, int incY) {
cblas_zcopy(N, X, incX, Y, incY);
}
inline float cblas_dotc(int N, const float *X, int incX, const float *Y, int incY) {
return cblas_sdot(N, X, incX, Y, incY);
}
inline double cblas_dotc(int N, const double *X, int incX, const double *Y, int incY) {
return cblas_ddot(N, X, incX, Y, incY);
}
inline std::complex<float> cblas_dotc(int N, const std::complex<float> *X, int incX, const std::complex<float> *Y, int incY) {
std::complex<float> result;
cblas_cdotc_sub(N, X, incX, Y, incY, &result);
return result;
}
inline std::complex<double> cblas_dotc(int N, const std::complex<double> *X, int incX, const std::complex<double> *Y, int incY) {
std::complex<double> result;
cblas_zdotc_sub(N, X, incX, Y, incY, &result);
return result;
}
inline float cblas_nrm2(int N, const float *X, int incX) {
return cblas_snrm2(N, X, incX);
}
inline double cblas_nrm2(int N, const double *X, int incX) {
return cblas_dnrm2(N, X, incX);
}
inline float cblas_nrm2(int N, const std::complex<float> *X, int incX) {
return cblas_scnrm2(N, X, incX);
}
inline double cblas_nrm2(int N, const std::complex<double> *X, int incX) {
return cblas_dznrm2(N, X, incX);
}
inline void cblas_gemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
float alpha, const float *A, int lda, const float *X, int incX, float beta, float *Y, int incY) {
cblas_sgemv(Order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY);
}
inline void cblas_gemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
double alpha, const double *A, int lda, const double *X, int incX, double beta, double *Y, int incY) {
cblas_dgemv(Order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY);
}
inline void cblas_gemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
std::complex<float> alpha, const std::complex<float> *A, int lda, const std::complex<float> *X, int incX,
std::complex<float> beta, std::complex<float> *Y, int incY)
{
cblas_cgemv(Order, TransA, M, N, &alpha, A, lda, X, incX, &beta, Y, incY);
}
inline void cblas_gemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
std::complex<double> alpha, const std::complex<double> *A, int lda, const std::complex<double> *X, int incX,
std::complex<double> beta, std::complex<double> *Y, int incY)
{
cblas_zgemv(Order, TransA, M, N, &alpha, A, lda, X, incX, &beta, Y, incY);
}
inline void cblas_gemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const float alpha, const float *A,
const int lda, const float *B, const int ldb,
const float beta, float *C, const int ldc)
{
cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}
inline void cblas_gemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc)
{
cblas_dgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
}
inline void cblas_gemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const std::complex<float> alpha, const std::complex<float> *A,
const int lda, const std::complex<float> *B, const int ldb,
const std::complex<float> beta, std::complex<float> *C, const int ldc)
{
cblas_cgemm(Order, TransA, TransB, M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc);
}
inline void cblas_gemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const std::complex<double> alpha, const std::complex<double> *A,
const int lda, const std::complex<double> *B, const int ldb,
const std::complex<double> beta, std::complex<double> *C, const int ldc)
{
cblas_zgemm(Order, TransA, TransB, M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc);
}
inline void mkl_cscmv(char *transa, MKL_INT *m, MKL_INT *k, float *alpha, char *matdescra, float *val, MKL_INT *indx,
MKL_INT *pntrb, MKL_INT *pntre, float *x, float *beta, float *y)
{
mkl_scscmv(transa, m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y);
}
inline void mkl_cscmv(char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx,
MKL_INT *pntrb, MKL_INT *pntre, double *x, double *beta, double *y)
{
mkl_dcscmv(transa, m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y);
}
inline void mkl_cscmv(char *transa, MKL_INT *m, MKL_INT *k, std::complex<float> *alpha, char *matdescra,
std::complex<float> *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, std::complex<float> *x,
std::complex<float> *beta, std::complex<float> *y)
{
mkl_ccscmv(transa, m, k, (MKL_Complex8 *)alpha, matdescra, (MKL_Complex8 *)val, indx, pntrb, pntre,
(MKL_Complex8 *)x, (MKL_Complex8 *)beta, (MKL_Complex8 *)y);
}
inline void mkl_cscmv(char *transa, MKL_INT *m, MKL_INT *k, std::complex<double> *alpha, char *matdescra,
std::complex<double> *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, std::complex<double> *x,
std::complex<double> *beta, std::complex<double> *y)
{
mkl_zcscmv(transa, m, k, (MKL_Complex16 *)alpha, matdescra, (MKL_Complex16 *)val, indx, pntrb, pntre,
(MKL_Complex16 *)x, (MKL_Complex16 *)beta, (MKL_Complex16 *)y);
}
void mkl_cscmm(char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, float *alpha, char *matdescra, float *val,
MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, float *b, MKL_INT *ldb, float *beta, float *c, MKL_INT *ldc)
{
mkl_scscmm(transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntre, b, ldb, beta, c, ldc);
}
void mkl_cscmm(char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *alpha, char *matdescra,
double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *b, MKL_INT *ldb, double *beta, double *c,
MKL_INT *ldc)
{
mkl_dcscmm(transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntre, b, ldb, beta, c, ldc);
}
void mkl_cscmm(char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, std::complex<float> *alpha, char *matdescra,
std::complex<float> *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, std::complex<float> *b, MKL_INT *ldb,
std::complex<float> *beta, std::complex<float> *c, MKL_INT *ldc)
{
mkl_ccscmm(transa, m, n, k, (MKL_Complex8 *)alpha, matdescra, (MKL_Complex8 *)val, indx, pntrb, pntre,
(MKL_Complex8 *)b, ldb, (MKL_Complex8 *)beta, (MKL_Complex8 *)c, ldc);
}
void mkl_cscmm(char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, std::complex<double> *alpha, char *matdescra,
std::complex<double> *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, std::complex<double> *b, MKL_INT *ldb,
std::complex<double> *beta, std::complex<double> *c, MKL_INT *ldc)
{
mkl_zcscmm(transa, m, n, k, (MKL_Complex16 *)alpha, matdescra, (MKL_Complex16 *)val, indx, pntrb, pntre,
(MKL_Complex16 *)b, ldb, (MKL_Complex16 *)beta, (MKL_Complex16 *)c, ldc);
}
#endif /* BLAS_WRAP_H_ */

291
TVALCPU/src/container.h Normal file
View File

@@ -0,0 +1,291 @@
/*
* Classes for handling matrices and vectors:
*
* Class mat<Type>: dense matrices/vectors
* Class sparse_mat<Type>: sparse matrices
* Class geometry: data for dynamic calculation of the measurement matrix
*
*/
#ifndef CONTAINER_H_
#define CONTAINER_H_
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <exception>
// enumerations...
enum T_alloc {allocated_in_constructor, preallocated_buffer};
enum T_mat_format {mat_col_major, mat_row_major};
enum T_sparse_mat_format {sparse_mat_csc, sparse_mat_csr};
// exceptions...
class illegal_mat_assignment: public std::exception
{
virtual const char* what() const throw()
{
return "Illegale Zuweisung von mat-Objekten.";
}
};
class illegal_sparse_mat_assignment: public std::exception
{
virtual const char* what() const throw()
{
return "Illegale Zuweisung von sparse_mat-Objekten.";
}
};
//---------------------------------------
// class mat...
//---------------------------------------
template <class Type>
class mat {
private:
Type *pbuf;
T_alloc alloc_info;
public:
const int dim_y;
const int dim_x;
const int dim_z;
const int len;
const T_mat_format format;
mat(int l_y, int l_x, int l_z, bool init=true, T_mat_format storage=mat_row_major);
mat(int num_elements, bool init=true);
mat(int l_y, int l_x, int l_z, Type *buffer, T_mat_format storage=mat_row_major);
mat(int num_elements, Type *buffer);
mat(const mat &m);
~mat() { if(pbuf && (alloc_info == allocated_in_constructor)) delete[] pbuf; };
Type *data() { return pbuf; }
const Type *data() const { return pbuf; }
Type &operator[](int i) { return pbuf[i]; }
const Type &operator[](int i) const { return pbuf[i]; }
mat &operator=(const mat<Type> &m) throw(illegal_mat_assignment);
void randomFill(Type scl);
};
//---------------------------------------
// class sparse_mat...
//---------------------------------------
template<class Type>
class sparse_mat {
private:
int *p_ptr;
int *p_ind;
Type *p_val;
T_alloc alloc_info;
public:
const int dim_y;
const int dim_x;
const int nnz;
const T_sparse_mat_format format;
sparse_mat(int l_y, int l_x, int n_nonzero, T_sparse_mat_format storage_format=sparse_mat_csc, bool init=true);
sparse_mat(int l_y, int l_x, int n_nonzero, Type *buff_val, int *buff_ptr, int *buff_ind,
T_sparse_mat_format storage_format=sparse_mat_csc);
sparse_mat(const sparse_mat &m);
~sparse_mat();
Type *val() { return p_val; }
const Type *val() const { return p_val; }
int *ptr() { return p_ptr; }
const int *ptr() const { return p_ptr; }
int *ind() { return p_ind; }
const int *ind() const { return p_ind; }
sparse_mat<Type> &operator=(const mat<Type> &m) throw(illegal_sparse_mat_assignment);
};
//---------------------------------------
// Struct geomety
//---------------------------------------
struct geometry {
int *x_emitters;
int *y_emitters;
int *z_emitters;
int *x_receivers;
int *y_receivers;
int *z_receivers;
int num_emitters;
int num_receivers;
int rv_x;
int rv_y;
int rv_z;
float scale_factor;
};
// ------------------------------------------------------------------------
// member functions, etc. class mat
// ------------------------------------------------------------------------
template<class Type>
mat<Type>::mat(int l_y, int l_x, int l_z, bool init, T_mat_format storage): pbuf(NULL), alloc_info(allocated_in_constructor),
dim_y(l_y), dim_x(l_x), dim_z(l_z), len(l_y*l_x*l_z), format(storage) {
if(len > 0) {
pbuf = new Type[len];
if(init)
memset(pbuf, 0, len*sizeof(Type));
}
};
template<class Type>
mat<Type>::mat(int num_elements, bool init): pbuf(NULL), alloc_info(allocated_in_constructor), dim_y(num_elements), dim_x(1),
dim_z(1), len(num_elements), format(mat_row_major) {
if(len > 0) {
pbuf = new Type[len];
if(init)
memset(pbuf, 0, len*sizeof(Type));
}
};
template<class Type>
mat<Type>::mat(int l_y, int l_x, int l_z, Type *buffer, T_mat_format storage): pbuf(buffer), dim_y(l_y), dim_x(l_x),
dim_z(l_z), len(l_y*l_x*l_z), alloc_info(preallocated_buffer), format(storage) {};
template<class Type>
mat<Type>::mat(int num_elements, Type *buffer): pbuf(buffer), dim_y(num_elements), dim_x(1),
dim_z(1), len(num_elements), alloc_info(preallocated_buffer), format(mat_row_major) {};
template<class Type>
mat<Type>::mat(const mat &m): pbuf(NULL), alloc_info(allocated_in_constructor), dim_y(m.dim_y), dim_x(m.dim_x),
dim_z(m.dim_z), len(m.len), format(m.format) {
if(len > 0) {
pbuf = new Type[len];
memcpy(pbuf, m.pbuf, len*sizeof(Type));
}
};
template<class Type>
mat<Type> &mat<Type>::operator=(const mat<Type> &m) throw(illegal_mat_assignment) {
if(m.len == len)
memcpy(pbuf, m.pbuf, len*sizeof(Type));
else
throw illegal_mat_assignment();
return *this;
}
template<class Type>
void mat<Type>::randomFill(Type scl) {
for(int i=0; i<len; i++)
pbuf[i] = ((Type)rand() / RAND_MAX - 0.5)/scl;
}
template<class Type>
std::ostream &operator<<(std::ostream &stream, const mat<Type> &m) {
for(int z=0; z < m.dim_z; z++) {
stream << "\nz=" << z+1 << ":\n\n";
for(int i=0; i<m.dim_y; i++) {
for(int j=0; j<m.dim_x; j++) {
if(m.format == mat_row_major)
stream << m[z*m.dim_x*m.dim_y + i*m.dim_x + j] << " ";
else
stream << m[z*m.dim_x*m.dim_y + i + j*m.dim_y] << " ";
}
stream << "\n";
}
return stream;
}
}
// ------------------------------------------------------------------------
// member functions, etc. class sparse_mat
// ------------------------------------------------------------------------
template<class Type>
sparse_mat<Type>::sparse_mat(int l_y, int l_x, int n_nonzero, T_sparse_mat_format storage_format, bool init):
p_val(NULL), p_ptr(NULL), p_ind(NULL), dim_y(l_y), dim_x(l_x), nnz(n_nonzero), format(storage_format),
alloc_info(allocated_in_constructor)
{
if(nnz > 0) {
int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
p_val = new Type[nnz];
p_ptr = new int[len_ptr];
p_ind = new int[nnz];
if(init) {
memset(p_val, 0, nnz*sizeof(Type));
memset(p_ptr, 0, (len_ptr)*sizeof(int));
memset(p_ind, 0, nnz*sizeof(int));
}
}
};
template<class Type>
sparse_mat<Type>::sparse_mat(int l_y, int l_x, int n_nonzero, Type *buff_val, int *buff_ptr, int *buff_ind,
T_sparse_mat_format storage_format): p_val(buff_val), p_ptr(buff_ptr), p_ind(buff_ind), dim_y(l_y), dim_x(l_x),
nnz(n_nonzero), format(storage_format), alloc_info(preallocated_buffer) {}
template<class Type>
sparse_mat<Type>::sparse_mat(const sparse_mat &m): p_val(NULL), p_ptr(NULL), p_ind(NULL), dim_y(m.dim_y),
dim_x(m.dim_x), nnz(m.nnz), format(m.format), alloc_info(allocated_in_constructor)
{
if(nnz > 0) {
int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
p_val = new Type[nnz];
memcpy(p_val, m.p_val, nnz*sizeof(Type));
p_ptr = new int[len_ptr];
memcpy(p_ptr, m.p_ptr, (len_ptr)*sizeof(int));
p_ind = new int[nnz];
memcpy(p_ind, m.p_ind, nnz*sizeof(int));
}
};
template<class Type>
sparse_mat<Type>::~sparse_mat() {
if(alloc_info == allocated_in_constructor) {
if(p_val) delete[] p_val;
if(p_ptr) delete[] p_ptr;
if(p_ind) delete[] p_ind;
}
}
template<class Type>
sparse_mat<Type> &sparse_mat<Type>::operator=(const mat<Type> &m) throw(illegal_sparse_mat_assignment){
if(m.nnz == nnz && m.dim_y == dim_y && m.dim_x == dim_x && m.format == format) {
int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
memcpy(p_val, m.p_val, nnz*sizeof(Type));
memcpy(p_ptr, m.p_ptr, (len_ptr)*sizeof(int));
memcpy(p_ind, m.p_ind, nnz*sizeof(int));
} else {
throw illegal_sparse_mat_assignment();
}
return *this;
}
// output operator ...
template<class Type>
std::ostream &operator<<(std::ostream &stream, const sparse_mat<Type> &m) {
mat<Type> tmp(m.dim_y, m.dim_x);
if(m.format == sparse_mat_csc) {
for(int c=0; c < m.dim_x; c++)
for(int i = m.ptr()[c]; i < m.ptr()[c+1]; i++)
tmp[c*m.dim_y + m.ind()[i]] = m.val()[i];
} else {
for(int r=0; r < m.dim_y; r++)
for(int i = m.ptr()[r]; i < m.ptr()[r+1]; i++)
tmp[m.ind()[i]*m.dim_y + r] = m.val()[i];
}
stream << tmp;
return stream;
}
#endif /* CONTAINER_H_ */

260
TVALCPU/src/mat_mult.h Normal file
View File

@@ -0,0 +1,260 @@
/*
* mat_mult.h
*
* Created on: Oct 7, 2011
* Author: ditlevsen
*/
#ifndef MAT_MULT_H_
#define MAT_MULT_H_
//#include <omp.h>
#include <cmath>
#include <complex>
#include "container.h"
#include "blas_wrap.h"
// overloaded function for matrix-vector multiplication (dense / sparse / dynamically calculated)
template <class Type>
// y = Op(A)*x
// A Row Major, x, y column Major!
inline void gemv(CBLAS_TRANSPOSE TransA, const mat<Type> &A, const mat<Type> &x, mat<Type> &y) {
cblas_gemv(CblasRowMajor, TransA, A.dim_y, A.dim_x, (Type)1, A.data(), A.dim_x, x.data(), 1, (Type)0, y.data(), 1);
}
template <class Type>
inline void gemv(CBLAS_TRANSPOSE TransA, const sparse_mat<Type> &A, const mat<Type> &x, mat<Type> &y) {
char transpose;
if(TransA == CblasTrans)
transpose = 't';
else if(TransA == CblasConjTrans)
transpose = 'c';
else
transpose = 'n';
char *matdescra = (char *)"GLLC";
Type alpha = 1;
Type beta = 0;
mkl_cscmv(&transpose, (int *)&A.dim_y, (int *)&A.dim_x, &alpha, matdescra, (Type *)A.val(), (int *)A.ind(),
(int *)A.ptr(), (int *)(A.ptr() + 1), (Type *)x.data(), &beta, y.data());
}
// functions for the dynamic calculation of the measurement matrix...
template <class Type>
inline void mult_element(CBLAS_TRANSPOSE TransA, int ray, int x_pixel, int y_pixel, int z_pixel, int rv_x, int rv_y, float scale_factor, const mat<Type> &x, mat<Type> &y) {
int lin_index = x_pixel + y_pixel*rv_x + z_pixel*rv_x*rv_y;
if(TransA == CblasNoTrans) {
y[ray] += x[lin_index] * (Type) scale_factor;
} else {
y[lin_index] += x[ray] * (Type) scale_factor;
}
}
template <class Type>
void gemv(CBLAS_TRANSPOSE TransA, const geometry &A, const mat<Type> &x, mat<Type> &y) {
memset(y.data(), 0, y.len*sizeof(Type));
mat<Type> **vecs;
int numthreads;
#pragma omp parallel
{
int dx, dy, dz, x_inc, mx, y_inc, my, z_inc, mz, x_pixel, y_pixel, z_pixel, ray, err_1, err_2;
int threadnum = omp_get_thread_num();
#pragma omp single
{
numthreads = omp_get_num_threads();
vecs = new mat<Type>* [numthreads];
}
if(threadnum==0)
vecs[0] = &y;
else
vecs[threadnum] = new mat<Type>(y.len);
#pragma omp for
for(int emitter=0; emitter < A.num_emitters; emitter++) {
for(int receiver=0; receiver < A.num_receivers; receiver++) {
ray = emitter*A.num_receivers + receiver;
// trace path using Bresenheimer algorithm...
dx = A.x_receivers[receiver] - A.x_emitters[emitter];
dy = A.y_receivers[receiver] - A.y_emitters[emitter];
dz = A.z_receivers[receiver] - A.z_emitters[emitter];
x_inc = (dx < 0) ? -1 : 1;
mx = abs(dx);
y_inc = (dy < 0) ? -1 : 1;
my = abs(dy);
z_inc = (dz < 0) ? -1 : 1;
mz = abs(dz);
x_pixel = A.x_emitters[emitter];
y_pixel = A.y_emitters[emitter];
z_pixel = A.z_emitters[emitter];
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
if (mx >= my && mx >= mz) {
err_1 = 2*my - mx;
err_2 = 2*mz - mx;
for(int j = 1; j < mx+1; j++) {
if (err_1 > 0) {
y_pixel += y_inc;
err_1 -= 2*mx;
}
if (err_2 > 0) {
z_pixel += z_inc;
err_2 -= 2*mx;
}
err_1 += 2*my;
err_2 += 2*mz;
x_pixel += x_inc;
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
}
} else if(my >= mx && my >= mz) {
err_1 = 2*mx - my;
err_2 = 2*mz - my;
for (int j = 1; j < my+1; j++) {
if (err_1 > 0) {
x_pixel += x_inc;
err_1 -= 2*my;
}
if (err_2 > 0) {
z_pixel += z_inc;
err_2 -= 2*my;
}
err_1 += 2*mx;
err_2 += 2*mz;
y_pixel += y_inc;
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
}
} else {
err_1 = 2*mx - mz;
err_2 = 2*my - mz;
for (int j = 1; j < mz+1; j++) {
if (err_1 > 0) {
x_pixel += x_inc;
err_1 -= 2*mz;
}
if (err_2 > 0) {
y_pixel += y_inc;
err_2 -= 2*mz;
}
err_1 += 2*mx;
err_2 += 2*my;
z_pixel += z_inc;
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
}
}
}
}
}
for(int i=1; i<numthreads; i++) {
cblas_axpy(y.len, 1, vecs[i]->data(), 1, y.data(), 1);
delete vecs[i];
}
delete vecs;
}
/*
// Version mit nur einem Loop für alle Richtungen (-> "Prototyp" für GraKa-Version), auf CPU etwas langsamer
template <class Type>
void gemv(CBLAS_TRANSPOSE TransA, const geometry &A, const mat<Type> &x, mat<Type> &y) {
memset(y.data(), 0, y.len*sizeof(Type));
mat<Type> **vecs;
int numthreads;
#pragma omp parallel
{
int dx, dy, dz, x_inc, mx, y_inc, my, z_inc, mz, x_pixel, y_pixel, z_pixel, ray, err_1, err_2;
int *m1, *m2, *m3, *inc1, *inc2, *inc3, *pixel1, *pixel2, *pixel3;
int threadnum = omp_get_thread_num();
#pragma omp single
{
numthreads = omp_get_num_threads();
vecs = new mat<Type>* [numthreads];
}
if(threadnum==0)
vecs[0] = &y;
else
vecs[threadnum] = new mat<Type>(y.len);
#pragma omp for
for(int emitter=0; emitter < A.num_emitters; emitter++) {
for(int receiver=0; receiver < A.num_receivers; receiver++) {
ray = emitter*A.num_receivers + receiver;
// trace path using Bresenheimer algorithm...
dx = A.x_receivers[receiver] - A.x_emitters[emitter];
dy = A.y_receivers[receiver] - A.y_emitters[emitter];
dz = A.z_receivers[receiver] - A.z_emitters[emitter];
x_inc = (dx < 0) ? -1 : 1;
mx = abs(dx);
y_inc = (dy < 0) ? -1 : 1;
my = abs(dy);
z_inc = (dz < 0) ? -1 : 1;
mz = abs(dz);
x_pixel = A.x_emitters[emitter];
y_pixel = A.y_emitters[emitter];
z_pixel = A.z_emitters[emitter];
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
if (mx >= my && mx >= mz) {
m1 = &mx; pixel1 = &x_pixel; inc1 = &x_inc;
m2 = &my; pixel2 = &y_pixel; inc2 = &y_inc;
m3 = &mz; pixel3 = &z_pixel; inc3 = &z_inc;
} else if(my >= mx && my >= mz) {
m1 = &my; pixel1 = &y_pixel; inc1 = &y_inc;
m2 = &mx; pixel2 = &x_pixel; inc2 = &x_inc;
m3 = &mz; pixel3 = &z_pixel; inc3 = &z_inc;
} else {
m1 = &mz; pixel1 = &z_pixel; inc1 = &z_inc;
m2 = &mx; pixel2 = &x_pixel; inc2 = &x_inc;
m3 = &my; pixel3 = &y_pixel; inc3 = &y_inc;
}
err_1 = 2 * *m2 - *m1;
err_2 = 2 * *m3 - *m1;
for(int j = 1; j < *m1+1; j++) {
if (err_1 > 0) {
*pixel2 += *inc2;
err_1 -= 2 * *m1;
}
if (err_2 > 0) {
*pixel3 += *inc3;
err_2 -= 2 * *m1;
}
err_1 += 2 * *m2;
err_2 += 2 * *m3;
*pixel1 += *inc1;
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
}
}
}
}
for(int i=1; i<numthreads; i++) {
cblas_axpy(y.len, 1, vecs[i]->data(), 1, y.data(), 1);
delete vecs[i];
}
delete vecs;
}*/
#endif /* MAT_MULT_H_ */

21
TVALCPU/src/timestamp.cpp Normal file
View File

@@ -0,0 +1,21 @@
/*
* utility.cpp
*
* Created on: Mar 22, 2011
* Author: ditlevsen
*/
#include <sys/time.h>
#include <sys/types.h>
#include <cstdlib>
// get time stamp in seconds using Native Linux Time Measurement
double get_timestamp() {
struct timeval tp;
double sec, usec;
gettimeofday(&tp, NULL);
sec = static_cast<double>( tp.tv_sec );
usec = static_cast<double>( tp.tv_usec )/1E6;
return sec + usec;
}

7
TVALCPU/src/timestamp.h Normal file
View File

@@ -0,0 +1,7 @@
#ifndef TIMESTAMP_H_
#define TIMESTAMP_H_
// returns a time stamp in seconds, using native Linux time measurement
extern double get_timestamp();
#endif

861
TVALCPU/src/tval3.cpp Normal file
View File

@@ -0,0 +1,861 @@
/*
* tval3.cpp
*
* Created on: Mar 21, 2011d
* Author: ditlevsen
*/
#include <cstdlib>
#include <cmath>
#include <omp.h>
#include <complex>
#include <string>
#include "tval3_types.h"
#include "utility.h"
#include "timestamp.h"
#include "blas_wrap.h"
#include "mat_mult.h"
#include <mkl.h>
#include <fstream>
#include <cfloat>
using namespace std;
//#define PROFILING
#ifdef PROFILING
double t_mult, t_d, t_dt;
bool rec_time;
#endif
// this struct holds various values, accessed by the functions get_g(), update_g(), update_W() and update_mlp()
// the member names correspond to the variable names in the MATLAB-version
template<class T_comp, class T_scal>
struct tval3_data { // parameters and intermediate results...
// dimensions...
int p; // y-dimension of reconstruction volume
int q; // x-dimension of reconstruction volume
int r; // z-dimension of reconstruction volume
int n; // number of pixels (n = p*q*r)
int m; // number of measurements
// penalty parameters...
T_scal mu;
T_scal beta;
T_scal muDbeta; // mu/beta
// multiplier...
mat<T_comp> delta;
mat<T_comp> sigmaX;
mat<T_comp> sigmaY;
mat<T_comp> sigmaZ;
// lagrangian function and sub terms...
T_comp f;
T_scal lam1;
T_scal lam2;
T_scal lam3;
T_comp lam4;
T_comp lam5;
mat<T_comp> Up;
mat<T_comp> dU; // U-Up, after steepest descent
mat<T_comp> uup; // U-Up, after "backtracking"
mat<T_comp> Ux; // gradients in x-direction
mat<T_comp> Uxp;
mat<T_comp> dUx;
mat<T_comp> Uy; // gradients in y-direction
mat<T_comp> Uyp;
mat<T_comp> dUy;
mat<T_comp> Uz; // gradients in y-direction
mat<T_comp> Uzp;
mat<T_comp> dUz;
mat<T_comp> Wx;
mat<T_comp> Wy;
mat<T_comp> Wz;
mat<T_comp> Atb; // A'*b
mat<T_comp> Au; // A*u
mat<T_comp> Aup;
mat<T_comp> dAu;
mat<T_comp> d; // gradient of objective function (2.28)
mat<T_comp> g; // sub term of (2.28)
mat<T_comp> gp;
mat<T_comp> dg;
mat<T_comp> g2; // sub term of (2.28)
mat<T_comp> g2p;
mat<T_comp> dg2;
tval3_data<T_comp, T_scal>(const mat<T_comp> &U, const mat<T_comp> &b, T_scal mu0, T_scal beta0):
p(U.dim_y), q(U.dim_x), r(U.dim_z), n(U.len), m(b.len), mu(mu0), beta(beta0), delta(b.len), sigmaX(U.dim_y, U.dim_x, U.dim_z),
sigmaY(U.dim_y, U.dim_x, U.dim_z), sigmaZ(U.dim_y, U.dim_x, U.dim_z), Up(U.dim_y, U.dim_x, U.dim_z), dU(U.dim_y, U.dim_x, U.dim_z),
uup(U.len), Ux(U.dim_y, U.dim_x, U.dim_z), Uxp(U.dim_y, U.dim_x, U.dim_z), dUx(U.dim_y, U.dim_x, U.dim_z), Uy(U.dim_y, U.dim_x, U.dim_z),
Uyp(U.dim_y, U.dim_x, U.dim_z), dUy(U.dim_y, U.dim_x, U.dim_z), Uz(U.dim_y, U.dim_x, U.dim_z), Uzp(U.dim_y, U.dim_x, U.dim_z),
dUz(U.dim_y, U.dim_x, U.dim_z), Wx(U.dim_y, U.dim_x, U.dim_z), Wy(U.dim_y, U.dim_x, U.dim_z), Wz(U.dim_y, U.dim_x, U.dim_z), Atb(U.len),
Au(b.len), Aup(b.len), dAu(b.len), d(U.dim_y, U.dim_x, U.dim_z), g(U.len), gp(U.len), dg(U.len), g2(U.len), g2p(U.len), dg2(U.len) {}
};
// z <- x + alpha*y
// N: number of elements
template<class T_mat, class T_scal>
void vec_add(const int N, const int stride, const T_mat *x, const T_scal alpha, const T_mat *y, T_mat *z) {
#pragma vector always
#pragma parallel always
for(int i=0; i<N; i++)
z[i*stride] = x[i*stride] + alpha*y[i*stride];
}
template<class T_mat, class T_scal>
void vec_add(const int N, const int stride, const std::complex<T_mat> *x, const T_scal alpha, const std::complex<T_mat> *y, std::complex<T_mat> *z) {
T_mat *data_x = (T_mat *)x;
T_mat *data_y = (T_mat *)y;
T_mat *data_z = (T_mat *)z;
#pragma vector always
#pragma parallel always
for(int i=0; i<N; i++) {
data_z[2*i*stride] = data_x[2*i*stride] + alpha*data_y[2*i*stride];
data_z[2*i*stride + 1] = data_x[2*i*stride + 1] + alpha*data_y[2*i*stride + 1];
}
}
// Scales the measurement vector. Returns the scale factor.
template<class T_comp, class T_scal>
T_scal scaleb(mat<T_comp> &b) {
T_scal scl=1;
if(b.len > 0) {
T_scal threshold1 = 0.5;
T_scal threshold2 = 1.5;
scl=1;
T_comp val;
T_scal val_abs;
T_comp bmin = b[0];
T_comp bmax = bmin;
T_scal bmin_abs = abs(bmin);
T_scal bmax_abs = bmin_abs;
for(int i=0; i < b.len; i++) {
val = b[i];
val_abs = abs(val);
if(val_abs < bmin_abs) {
bmin = val;
bmin_abs = val_abs;
}
if(val_abs > bmax_abs) {
bmax = val;
bmax_abs = val_abs;
}
}
T_scal b_dif = abs(bmax - bmin);
if(b_dif < threshold1) {
scl = threshold1/b_dif;
cblas_scal(b.len, scl, b.data(), 1);
} else if(b_dif > threshold2) {
scl = threshold2/b_dif;
cblas_scal(b.len, scl, b.data(), 1);
}
}
return scl;
}
template<class T_comp, class T_scal>
void D(mat<T_comp> &Ux, mat<T_comp> &Uy, mat<T_comp> &Uz, const mat<T_comp> &U) {
#ifdef PROFILING
double start = get_timestamp();
#endif
vec_add(U.dim_x*U.dim_y*U.dim_z - 1, 1, U.data()+1, -1, U.data(), Ux.data());
vec_add(U.dim_y*U.dim_z, U.dim_x, U.data(), -1, U.data()+U.dim_x-1, Ux.data()+U.dim_x-1);
vec_add((U.dim_y*U.dim_z-1)*U.dim_x, 1, U.data() + U.dim_x, -1, U.data(), Uy.data());
for(int z = 0; z < U.dim_z; z++)
vec_add(U.dim_x, 1, U.data() + z*U.dim_x*U.dim_y, -1, U.data() + (U.dim_y-1)*U.dim_x + z*U.dim_x*U.dim_y,
Uy.data() + (U.dim_y-1)*U.dim_x + z*U.dim_x*U.dim_y);
vec_add(U.dim_x*U.dim_y*(U.dim_z - 1), 1, U.data() + U.dim_x*U.dim_y, -1, U.data(), Uz.data());
vec_add(U.dim_x*U.dim_y, 1, U.data(), -1, U.data() + (U.dim_z - 1) * U.dim_x * U.dim_y,
Uz.data() + (U.dim_z - 1) * U.dim_x * U.dim_y);
#ifdef PROFILING
double stop = get_timestamp();
if(rec_time) t_d += stop - start;
#endif
}
template<class T_comp, class T_scal>
void Dt(mat<T_comp> &res, const mat<T_comp> &X, const mat<T_comp> &Y, const mat<T_comp> &Z) {
#ifdef PROFILING
double start = get_timestamp();
#endif
vec_add(X.len-1, 1, X.data(), -1, X.data()+1, res.data()+1);
vec_add(X.dim_y * X.dim_z, X.dim_x, X.data()+X.dim_x-1, -1, X.data(), res.data());
cblas_axpy((Y.dim_y*Y.dim_z-1)*Y.dim_x, (T_comp)1, Y.data(), 1, res.data()+Y.dim_x, 1);
cblas_axpy((Y.dim_y*Y.dim_z-1)*Y.dim_x, (T_comp)-1, Y.data()+Y.dim_x, 1, res.data()+Y.dim_x, 1);
for(int z = 0; z < Y.dim_z; z++) {
cblas_axpy(Y.dim_x, (T_comp)1, Y.data() + (Y.dim_y-1)*Y.dim_x + z*Y.dim_x*Y.dim_y, 1, res.data() + z*Y.dim_x*Y.dim_y, 1);
cblas_axpy(Y.dim_x, (T_comp)-1, Y.data() + z*Y.dim_x*Y.dim_y, 1, res.data() + z*Y.dim_x*Y.dim_y, 1);
}
cblas_axpy(Z.dim_y*Z.dim_x*(Z.dim_z-1), (T_comp)1, Z.data(), 1, res.data()+Z.dim_y*Z.dim_x, 1);
cblas_axpy(Z.dim_y*Z.dim_x*(Z.dim_z-1), (T_comp)-1, Z.data()+Z.dim_y*Z.dim_x, 1, res.data()+Z.dim_y*Z.dim_x, 1);
cblas_axpy(Z.dim_y*Z.dim_x, (T_comp)1, Z.data() + (Z.dim_z-1)*Z.dim_x*Z.dim_y, 1, res.data(), 1);
cblas_axpy(Z.dim_y*Z.dim_x, (T_comp)-1, Z.data(), 1, res.data(), 1);
#ifdef PROFILING
double stop = get_timestamp();
if(rec_time) t_dt += stop - start;
#endif
}
// overloaded function get_g_mult calculates:
// Au = A*U...
// g = A'*Au...
// standrad version (parameter submat_mb not used)
template<class T_comp, class T_scal, class T_A>
void get_g_mult(tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &U, int submat_mb) {
gemv(CblasNoTrans, A, U, data.Au);
gemv(CblasConjTrans, A, data.Au, data.g);
}
// special version for dense matrices (cache blocking for single layer reconstructions....)
// submat_mb: size of submatrices in MB
template<class T_comp, class T_scal>
void get_g_mult(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &A, const mat<T_comp> &U, int submat_mb) {
int submat_elements = submat_mb*1024*1024 / sizeof(T_comp);
mat<T_comp> g_tmp(data.n);
int submat_dim_y = max(submat_elements / data.n, 1);
int n_y = (data.m + submat_dim_y - 1) / submat_dim_y;
int r;
memset(data.g.data(), 0, data.n * sizeof(T_comp));
for(int y=0; y < n_y; y++) {
r = min(submat_dim_y, data.m - y * submat_dim_y);
cblas_gemv(CblasRowMajor, CblasNoTrans, r, data.n, 1, A.data() + y * submat_dim_y * data.n, data.n, U.data(), 1, 0, data.Au.data() + y * submat_dim_y, 1);
cblas_gemv(CblasRowMajor, CblasConjTrans, r, data.n, 1, A.data() + y * submat_dim_y * data.n, data.n, data.Au.data() + y * submat_dim_y, 1, 0, g_tmp.data(), 1);
cblas_axpy(data.n, 1, g_tmp.data(), 1, data.g.data(), 1);
}
}
template<class T_comp, class T_scal, class T_A>
void get_g(tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &U, const mat<T_comp> &b, int submat_mb) {
#ifdef PROFILING
double start = get_timestamp();
#endif
get_g_mult(data, A, U, submat_mb);
#ifdef PROFILING
double stop = get_timestamp();
if(rec_time) t_mult += stop - start;
#endif
cblas_axpy(data.n, (T_comp)-1, data.Atb.data(), 1, data.g.data(), 1);
// update g2, lam2, lam4...
mat<T_comp> Vx(data.p, data.q, data.r);
mat<T_comp> Vy(data.p, data.q, data.r);
mat<T_comp> Vz(data.p, data.q, data.r);
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
Dt<T_comp, T_scal>(data.g2, Vx, Vy, Vz);
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
//update lam3, lam5...
mat<T_comp> Aub(data.m);
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
data.lam3=real(cblas_dotc(data.m, Aub.data(), 1, Aub.data(), 1));
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
data.f = (data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3) - data.lam4 - data.lam5;
}
template<class T_comp, class T_scal, class T_A>
T_scal get_tau(tval3_data<T_comp, T_scal> &data, const T_A &A, bool fst_iter) {
T_scal tau;
// calculate tau...
if(fst_iter) {
mat<T_comp> dx(data.p, data.q, data.r);
mat<T_comp> dy(data.p, data.q, data.r);
mat<T_comp> dz(data.p, data.q, data.r);
D<T_comp, T_scal>(dx, dy, dz, data.d);
T_comp dDd = cblas_dotc(data.n, dx.data(), 1, dx.data(), 1) +
cblas_dotc(data.n, dy.data(), 1, dy.data(), 1) +
cblas_dotc(data.n, dz.data(), 1, dz.data(), 1);
T_comp dd = cblas_dotc(data.n, data.d.data(), 1, data.d.data(), 1);
mat<T_comp> Ad(data.m);
// Ad=A*d...
#ifdef PROFILING
double start = get_timestamp();
#endif
gemv(CblasNoTrans, A, data.d, Ad);
#ifdef PROFILING
double stop = get_timestamp();
if(rec_time) t_mult += stop - start;
#endif
T_comp Add = cblas_dotc(data.m, Ad.data(), 1, Ad.data(), 1);
tau = abs(dd/(dDd + data.muDbeta*Add));
} else {
vec_add(data.n, 1, data.g.data(), -1, data.gp.data(), data.dg.data());
vec_add(data.n, 1, data.g2.data(), -1, data.g2p.data(), data.dg2.data());
T_comp ss = cblas_dotc(data.n, data.uup.data(), 1, data.uup.data(), 1);
mat<T_comp> tmp(data.dg2);
cblas_axpy(data.n, data.muDbeta, data.dg.data(), 1, tmp.data(), 1);
T_comp sy = cblas_dotc(data.n, data.uup.data(), 1, tmp.data(), 1);
tau = abs(ss/sy);
}
return tau;
}
template<class T_mat>
void nonneg(mat<T_mat> &U) {
T_mat val;
#pragma parallel always
#pragma vector always
for(int i=0; i < U.len; i++) {
val = U[i];
U[i] = (val < 0) ? 0 : val;
}
}
template<class T_mat>
void nonneg(mat<complex<T_mat> > &U) {
T_mat val;
T_mat *data = (T_mat *) U.data();
#pragma parallel always
#pragma vector always
for(int i=0; i < U.len; i++) {
val = data[2*i];
data[2*i] = (val < 0) ? 0 : val;
data[2*i + 1] = 0;
}
}
template<class T_comp, class T_scal, class T_A>
void descend(mat<T_comp> &U, tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &b, T_scal tau, bool non_neg,
int submat_mb) {
cblas_axpy(data.n, -1*tau, data.d.data(), 1, U.data(),1); // U = U - tau*d
if(non_neg) {
nonneg(U);
}
D<T_comp, T_scal>(data.Ux, data.Uy, data.Uz, U);
get_g<T_comp, T_scal>(data, A, U, b, submat_mb);
}
template<class T_comp, class T_scal, class T_A>
void update_g(tval3_data<T_comp, T_scal> &data, const T_scal alpha, const T_A &A, mat<T_comp> &U, const mat<T_comp> &b) {
vec_add(data.n, 1, data.gp.data(), alpha, data.dg.data(), data.g.data());
vec_add(data.n, 1, data.g2p.data(), alpha, data.dg2.data(), data.g2.data());
vec_add(data.n, 1, data.Up.data(), alpha, data.dU.data(), U.data());
vec_add(data.m, 1, data.Aup.data(), alpha, data.dAu.data(), data.Au.data());
vec_add(data.n, 1, data.Uxp.data(), alpha, data.dUx.data(), data.Ux.data());
vec_add(data.n, 1, data.Uyp.data(), alpha, data.dUy.data(), data.Uy.data());
vec_add(data.n, 1, data.Uzp.data(), alpha, data.dUz.data(), data.Uz.data());
// update lam2, lam4...
mat<T_comp> Vx(data.p, data.q, data.r);
mat<T_comp> Vy(data.p, data.q, data.r);
mat<T_comp> Vz(data.p, data.q, data.r);
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
//update lam3, lam5...
mat<T_comp> Aub(data.m);
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
data.lam3 =real(cblas_dotc(data.m, Aub.data(), 1, Aub.data(), 1));
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
data.f = data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3 - data.lam4 - data.lam5;
}
template<class T_comp, class T_scal, class T_A>
void min_u(tval3_data<T_comp, T_scal> &data, mat<T_comp> &U, T_scal &gam, const T_A &A, const mat<T_comp> &b,
const tval3_options<T_scal> &opts, T_comp C, bool fst_iter, int submat_mb) {
T_scal tau, alpha, c_armin;
tau = get_tau<T_comp, T_scal, T_A>(data, A, fst_iter);
// keep previous values...
data.Up = U; data.gp = data.g; data.g2p = data.g2;
data.Aup = data.Au; data.Uxp = data.Ux; data.Uyp = data.Uy;
data.Uzp = data.Uz;
// one step steepest descend...
descend<T_comp, T_scal, T_A>(U, data, A, b, tau, opts.nonneg, submat_mb);
// NMLA...
alpha=1;
vec_add(data.n, 1, U.data(), -1, data.Up.data(), data.dU.data()); // Ud = U - Up
c_armin = real(cblas_dotc(data.n, data.d.data(), 1, data.d.data(), 1) * tau * opts.c * data.beta); // c_armin = d'*d*tau*c*beta
if(abs(data.f) > abs(C - alpha*c_armin)) { // Arminjo condition...
vec_add(data.n, 1, data.g.data(), -1, data.gp.data(), data.dg.data()); // dg=g-gp
vec_add(data.n, 1, data.g2.data(), -1, data.g2p.data(), data.dg2.data()); // dg2=g2-g2p
vec_add(data.m, 1, data.Au.data(), -1, data.Aup.data(), data.dAu.data()); // dAu=Au-Aup
vec_add(data.n, 1, data.Ux.data(), -1, data.Uxp.data(), data.dUx.data()); // dUx=Ux-Uxp
vec_add(data.n, 1, data.Uy.data(), -1, data.Uyp.data(), data.dUy.data()); // Uy = Uy-Uyp
vec_add(data.n, 1, data.Uz.data(), -1, data.Uzp.data(), data.dUz.data()); // Uz = Uz-Uzp
int cnt=0;
while(abs(data.f) > abs(C - alpha*c_armin)) { // Arminjo condition...
if(cnt==5) { // "backtracking" not successful...
gam *= opts.rate_gam;
tau = get_tau<T_comp, T_scal, T_A>(data, A, true);
U = data.Up;
descend<T_comp, T_scal, T_A>(U, data, A, b, tau, opts.nonneg, submat_mb);
break;
}
alpha *= opts.gamma;
update_g<T_comp, T_scal, T_A>(data, alpha, A, U, b);
cnt++;
}
}
}
template<class T_comp, class T_scal>
void get_gradient(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &DtsAtd) {
// d = g2 + muDbeta*g + DtsAtd... (DtsAtd has opposite sign, compared to the MATLAB-version!)
data.d = DtsAtd;
cblas_axpy(data.n, 1, data.g2.data(), 1, data.d.data(), 1);
cblas_axpy(data.n, data.muDbeta, data.g.data(), 1, data.d.data(), 1);
}
// scalar
template<class T_mat, class T_scal>
void shrinkage(tval3_data<T_mat, T_scal> &data) {
T_scal sum=0;
T_scal temp_wx, temp_wy, temp_wz;
T_mat Uxbar, Uybar, Uzbar;
#pragma parallel always
#pragma vector always
for(int i=0; i < data.n; i++) {
Uxbar = data.Ux[i] - data.sigmaX[i]/data.beta;
Uybar = data.Uy[i] - data.sigmaY[i]/data.beta;
Uzbar = data.Uz[i] - data.sigmaZ[i]/data.beta;
temp_wx = abs(Uxbar) - 1/data.beta;
temp_wy = abs(Uybar) - 1/data.beta;
temp_wz = abs(Uzbar) - 1/data.beta;
temp_wx = (temp_wx >= 0) ? temp_wx : 0;
temp_wy = (temp_wy >= 0) ? temp_wy : 0;
temp_wz = (temp_wz >= 0) ? temp_wz : 0;
sum += temp_wx + temp_wy + temp_wz;
data.Wx[i] = (Uxbar >= 0) ? temp_wx : -1 * temp_wx;
data.Wy[i] = (Uybar >= 0) ? temp_wy : -1 * temp_wy;
data.Wz[i] = (Uzbar >= 0) ? temp_wz : -1 * temp_wz;
}
data.lam1 = sum;
}
// complex
template<class T_mat, class T_scal>
void shrinkage(tval3_data<complex<T_mat>, T_scal> &data) {
T_mat sum=0;
T_mat temp_wx, temp_wy, temp_wz;
T_mat Uxbar_real, Uxbar_imag, Uxbar_abs, Uybar_real, Uybar_imag, Uybar_abs, Uzbar_real, Uzbar_imag, Uzbar_abs;
T_mat *Ux_data = (T_mat *) data.Ux.data();
T_mat *Uy_data = (T_mat *) data.Uy.data();
T_mat *Uz_data = (T_mat *) data.Uz.data();
T_mat *sigmaX_data = (T_mat *) data.sigmaX.data();
T_mat *sigmaY_data = (T_mat *) data.sigmaY.data();
T_mat *sigmaZ_data = (T_mat *) data.sigmaZ.data();
T_mat *Wx_data = (T_mat *) data.Wx.data();
T_mat *Wy_data = (T_mat *) data.Wy.data();
T_mat *Wz_data = (T_mat *) data.Wz.data();
const int len = data.n;
#pragma parallel always
#pragma vector always
for(int i=0; i < len; i++) {
Uxbar_real = Ux_data[2*i] - sigmaX_data[2*i]/data.beta;
Uxbar_imag = Ux_data[2*i + 1] - sigmaX_data[2*i + 1]/data.beta;
Uybar_real = Uy_data[2*i] - sigmaY_data[2*i]/data.beta;
Uybar_imag = Uy_data[2*i + 1] - sigmaY_data[2*i + 1]/data.beta;
Uzbar_real = Uz_data[2*i] - sigmaZ_data[2*i]/data.beta;
Uzbar_imag = Uz_data[2*i + 1] - sigmaZ_data[2*i + 1]/data.beta;
Uxbar_abs = sqrt(Uxbar_real*Uxbar_real + Uxbar_imag*Uxbar_imag);
Uybar_abs = sqrt(Uybar_real*Uybar_real + Uybar_imag*Uybar_imag);
Uzbar_abs = sqrt(Uzbar_real*Uzbar_real + Uzbar_imag*Uzbar_imag);
temp_wx = Uxbar_abs - 1/data.beta;
temp_wy = Uybar_abs - 1/data.beta;
temp_wz = Uzbar_abs - 1/data.beta;
temp_wx = (temp_wx >= 0) ? temp_wx : 0;
temp_wy = (temp_wy >= 0) ? temp_wy : 0;
temp_wz = (temp_wz >= 0) ? temp_wz : 0;
sum += temp_wx + temp_wy + temp_wz;
Wx_data[2*i] = (Uxbar_abs > 0) ? temp_wx*Uxbar_real/Uxbar_abs : 0;
Wx_data[2*i+1] = (Uxbar_abs > 0) ? temp_wx*Uxbar_imag/Uxbar_abs : 0;
Wy_data[2*i] = (Uybar_abs > 0) ? temp_wy*Uybar_real/Uybar_abs : 0;
Wy_data[2*i+1] = (Uybar_abs > 0) ? temp_wy*Uybar_imag/Uybar_abs : 0;
Wz_data[2*i] = (Uzbar_abs > 0) ? temp_wz*Uybar_real/Uzbar_abs : 0;
Wz_data[2*i+1] = (Uzbar_abs > 0) ? temp_wz*Uybar_imag/Uzbar_abs : 0;
}
data.lam1 = sum;
}
template<class T_comp, class T_scal>
void update_W(tval3_data<T_comp, T_scal> &data) {
data.f -= (data.lam1 + data.beta/2*data.lam2 - data.lam4);
shrinkage(data);
// update g2, lam2, lam4...
mat<T_comp> Vx(data.p, data.q, data.r);
mat<T_comp> Vy(data.p, data.q, data.r);
mat<T_comp> Vz(data.p, data.q, data.r);
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
Dt<T_comp, T_scal>(data.g2, Vx, Vy, Vz);
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
data.f += (data.lam1 + data.beta/2*data.lam2 - data.lam4);
}
template<class T_comp, class T_scal>
void update_mlp(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &b) {
data.f += (data.lam4 + data.lam5);
mat<T_comp> Vx(data.p, data.q, data.r);
mat<T_comp> Vy(data.p, data.q, data.r);
mat<T_comp> Vz(data.p, data.q, data.r);
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
cblas_axpy(data.n, -1*data.beta, Vx.data(), 1, data.sigmaX.data(), 1); // sigmaX -= beta*Vx
cblas_axpy(data.n, -1*data.beta, Vy.data(), 1, data.sigmaY.data(), 1); // sigmaY -= beta*Vy
cblas_axpy(data.n, -1*data.beta, Vz.data(), 1, data.sigmaZ.data(), 1); // sigmaz -= beta*Vz
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
mat<T_comp> Aub(data.m);
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
cblas_axpy(data.m, -1*data.mu, Aub.data(), 1, data.delta.data(), 1); // delta -= mu*Aub
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
data.f -= (data.lam4 + data.lam5);
}
template<class T_comp>
void check_params(mat<T_comp> &U, const mat<T_comp> &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
if(U.format != mat_row_major || Ut.format != mat_row_major || A.format != mat_row_major)
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
else if(Ut.len > 0 && Ut.len != U.len)
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
else if(U.len != A.dim_x)
throw tval3_exception("Argument error: the length of U must be equal to A.dim_x!");
else if(b.len != A.dim_y)
throw tval3_exception(string("Argument error: b.len must be equal to A.dim_y!"));
}
template<class T_comp>
void check_params(mat<T_comp> &U, const sparse_mat<T_comp> &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
if(U.format != mat_row_major || Ut.format != mat_row_major)
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
if(A.format != sparse_mat_csc)
throw tval3_exception(string("Argument error: A must be in CSC format!"));
if(Ut.len > 0 && Ut.len != U.len)
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
else if(U.len != A.dim_x)
throw tval3_exception("Argument error: the length of U must be equal to A.dim_x!");
else if(b.len != A.dim_y)
throw tval3_exception(string("Argument error: b.len must be equal to A.dim_y!"));
}
template<class T_comp>
void check_params(mat<T_comp> &U, const geometry &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
if(U.format != mat_row_major || Ut.format != mat_row_major)
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
else if(Ut.len > 0 && Ut.len != U.len)
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
else if(b.len != A.num_emitters * A.num_receivers)
throw tval3_exception(string("Argument error: b.len must be equal to A.num_emitters * A.num_receivers!"));
}
template<class T_comp, class T_scal, class T_A>
const tval3_info<T_scal> tval3(mat<T_comp> &U, const T_A &A, const mat<T_comp> &b, const tval3_options<T_scal> &opts, const mat<T_comp> &Ut,
int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception) {
double start, stop;
#ifdef PROFILING
double start_loop, stop_loop;
t_mult = 0; t_d = 0; t_dt = 0; rec_time = false;
#endif
tval3_info<T_scal> info;
start = get_timestamp();
try {
check_params (U, A, b, Ut);
omp_set_num_threads(omp_get_num_procs() / virtual_procs_per_core);
mat<T_comp> bs = b; // create a copy of b (scaling...)
tval3_data<T_comp, T_scal> data(U, bs, opts.mu0, opts.beta0);
T_scal muf=opts.mu;
T_scal betaf=opts.beta, beta0=0;
mat<T_comp> DtsAtd(data.n);
T_scal gam = opts.gam;
T_scal scl = scaleb<T_comp, T_scal>(bs);
gemv(CblasConjTrans, A, bs, data.Atb); // Atb = A'*bs
U = data.Atb; // initial guess: U=A'*b
if(data.mu > muf) data.mu = muf;
if(data.beta > betaf) data.beta = betaf;
data.muDbeta = data.mu/data.beta;
mat <T_comp> rcdU(U), UrcdU(data.n);
D<T_comp, T_scal>(data.Ux, data.Uy, data.Uz, U);
shrinkage(data);
get_g<T_comp, T_scal>(data, A, U, bs, submat_mb);
get_gradient<T_comp, T_scal>(data, DtsAtd);
T_scal Q=1, Qp;
T_comp C=data.f;
int count_outer=0, count_total=0;
bool fst_iter;
T_scal RelChg, RelChgOut=0, nrmup=0;
#ifdef PROFILING
rec_time = true;
start_loop = get_timestamp();
#endif
while((count_outer < opts.maxcnt) && (count_total < opts.maxit)) {
fst_iter = true;
while(count_total < opts.maxit) {
// u-subproblem...
min_u<T_comp, T_scal>(data, U, gam, A, bs, opts, C, fst_iter, submat_mb);
// shrinkage like step...
update_W<T_comp, T_scal>(data);
// update reference values...
Qp = Q, Q = gam*Qp + 1; C = (gam*Qp*C + data.f)/Q;
vec_add(data.n, 1, U.data(), -1, data.Up.data(), data.uup.data());
// compute gradient...
get_gradient<T_comp, T_scal>(data, DtsAtd);
count_total++;
// calculate relative change...
nrmup = cblas_nrm2(data.n, data.Up.data(), 1);
RelChg = cblas_nrm2(data.n, data.uup.data(), 1) / nrmup;
if((RelChg < opts.tol_inn) && (!fst_iter))
break;
fst_iter = false;
}
count_outer++;
// calculate relative change...
vec_add(data.n, 1, U.data(), -1, rcdU.data(), UrcdU.data()); // UrcdU = U - rcdU
RelChgOut = cblas_nrm2(data.n, UrcdU.data(), 1) / nrmup;
rcdU = U;
if(RelChgOut < opts.tol)
break;
// update multipliers...
update_mlp<T_comp, T_scal>(data, bs);
// update penalty parameters for continuation scheme...
beta0 = data.beta;
data.beta *= opts.rate_cnt;
data.mu *= opts.rate_cnt;
if(data.beta > betaf) data.beta = betaf;
if(data.mu > muf) data.mu = muf;
data.muDbeta = data.mu/data.beta;
// update f...
data.f = data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3 - data.lam4 - data.lam5;
// DtsAtd = beta0/beta*d
DtsAtd = data.d;
cblas_scal(data.n, beta0/data.beta, DtsAtd.data(), 1);
// compute gradient...
get_gradient<T_comp, T_scal>(data, DtsAtd);
// reset reference values...
gam=opts.gam; Q=1; C=data.f;
}
#ifdef PROFILING
stop_loop = get_timestamp();
rec_time = false;
ofstream file;
file.open("profile.txt");
file << "loop [ms]: " << (stop_loop - start_loop)*1000 << "\n";
file << "mult [ms]: " << t_mult*1000 << "\n";
file << "d [ms]: " << t_d*1000 << "\n";
file << "dt [ms]: " << t_dt*1000 << "\n";
file.close();
#endif
// scale U. Take real part only, if opts.isreal==true
for(int i=0; i < data.n; i++) {
if(opts.isreal)
U[i] = (T_comp)real(U[i]);
U[i] = U[i]/scl;
}
stop = get_timestamp();
// generate return value...
info.secs = stop - start;
info.outer_iters = count_outer;
info.total_iters = count_total;
info.rel_chg = RelChgOut;
if(Ut.len == U.len) {
mat<T_comp> deltaU = Ut;
cblas_axpy(Ut.len, (T_comp)-1, U.data(), 1, deltaU.data(), 1);
T_scal nrm_deltaU = cblas_nrm2(deltaU.len, deltaU.data(), 1);
T_scal nrm_Ut = cblas_nrm2(Ut.len, Ut.data(), 1);
info.rel_error = nrm_deltaU/nrm_Ut;
} else {
info.rel_error = 0;
}
} catch (tval3_exception) {
throw;
} catch (const std::exception &ex) {
string msg = "Internal error: ";
msg += ex.what();
throw tval3_exception(msg);
}
return info;
}
const tval3_info<float> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b, const tval3_options<float> &opts,
const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<float, float, mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b,
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<float, double, mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<double> &U, const mat<double> &A, const mat<double> &b,
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<double, double, mat<double> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<float> tval3(mat<complex<float> > &U, const mat<complex<float> > &A, const mat<complex<float> > &b,
const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<std::complex<float>, float, mat<std::complex<float> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<complex<double> > &U, const mat<complex<double> > &A, const mat<complex<double> > &b,
const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<std::complex<double>, double, mat<std::complex<double> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<float> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b,
const tval3_options<float> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<float, float, sparse_mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b,
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<float, double, sparse_mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<double> &U, const sparse_mat<double> &A, const mat<double> &b,
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<double, double, sparse_mat<double> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<float> tval3(mat<complex<float> > &U, const sparse_mat<complex<float> > &A,
const mat<complex<float> > &b, const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<std::complex<float>, float, sparse_mat<std::complex<float> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<complex<double> > &U, const sparse_mat<complex<double> > &A,
const mat<complex<double> > &b, const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<std::complex<double>, double, sparse_mat<std::complex<double> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<float> tval3(mat<float> &U, const geometry &A, const mat<float> &b,
const tval3_options<float> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<float, float, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<float> &U, const geometry &A, const mat<float> &b,
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<float, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<double> &U, const geometry &A, const mat<double> &b,
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<double, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<float> tval3(mat<complex<float> > &U, const geometry &A,
const mat<complex<float> > &b, const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5,
int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<std::complex<float>, float, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}
const tval3_info<double> tval3(mat<complex<double> > &U, const geometry &A,
const mat<complex<double> > &b, const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5,
int virtual_procs_per_core=2) throw(tval3_exception)
{
return tval3<std::complex<double>, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
}

67
TVALCPU/src/tval3.h Normal file
View File

@@ -0,0 +1,67 @@
/*
* tval3.h
*
* Created on: Mar 14, 2011
* Author: ditlevsen
*/
#ifndef TVAL3_H_
#define TVAL3_H_
#include "container.h"
#include <complex>
#include "tval3_types.h"
extern const tval3_info<float> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b, const tval3_options<float> &opts,
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<double> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b, const tval3_options<double> &opts,
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<double> tval3(mat<double> &U, const mat<double> &A, const mat<double> &b, const tval3_options<double> &opts,
const mat<double> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<float> tval3(mat<std::complex<float> > &U, const mat<std::complex<float> > &A, const mat<std::complex<float> > &b,
const tval3_options<float> &opts, const mat<std::complex<float> > &Ut=0, int submat_mb=5, int virtual_procs_per_core=2)
throw(tval3_exception);
extern const tval3_info<double> tval3(mat<std::complex<double> > &U, const mat<std::complex<double> > &A,
const mat<std::complex<double> > &b, const tval3_options<double> &opts, const mat<std::complex<double> > &Ut=0, int submat_mb=5,
int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<float> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b, const tval3_options<float> &opts,
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<double> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b, const tval3_options<double> &opts,
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<double> tval3(mat<double> &U, const sparse_mat<double> &A, const mat<double> &b, const tval3_options<double> &opts,
const mat<double> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<float> tval3(mat<std::complex<float> > &U, const sparse_mat<std::complex<float> > &A,
const mat<std::complex<float> > &b, const tval3_options<float> &opts, const mat<std::complex<float> > &Ut=0, int submat_mb=5,
int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<double> tval3(mat<std::complex<double> > &U, const sparse_mat<std::complex<double> > &A,
const mat<std::complex<double> > &b, const tval3_options<double> &opts, const mat<std::complex<double> > &Ut=0, int submat_mb=5,
int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<float> tval3(mat<float> &U, const geometry &A, const mat<float> &b, const tval3_options<float> &opts,
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<double> tval3(mat<float> &U, const geometry &A, const mat<float> &b, const tval3_options<double> &opts,
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<double> tval3(mat<double> &U, const geometry &A, const mat<double> &b, const tval3_options<double> &opts,
const mat<double> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
extern const tval3_info<float> tval3(mat<std::complex<float> > &U, const geometry &A, const mat<std::complex<float> > &b,
const tval3_options<float> &opts, const mat<std::complex<float> > &Ut=0, int submat_mb=5, int virtual_procs_per_core=2)
throw(tval3_exception);
extern const tval3_info<double> tval3(mat<std::complex<double> > &U, const geometry &A, const mat<std::complex<double> > &b,
const tval3_options<double> &opts, const mat<std::complex<double> > &Ut=0, int submat_mb=5, int virtual_procs_per_core=2)
throw(tval3_exception);
#endif /* TVAL3_H_ */

49
TVALCPU/src/tval3_types.h Normal file
View File

@@ -0,0 +1,49 @@
#ifndef TVAL3_TYPES_H_
#define TVAL3_TYPES_H_
#include <stdexcept>
#include <string>
template<class T_scal>
struct tval3_options {
T_scal mu; // maximum value for penalty parameter mu, see (2.12)
// mu is mainly decided by noise level. Set mu big when b is noise-free,
// set mu small when b is very noisy
// suggested values for mu: 2⁴..2¹³
T_scal mu0; // initial value for mu
T_scal beta; // maximum value for penalty parameter beta, see (2.12)
T_scal beta0; // initial value for beta
T_scal rate_cnt; // continuation parameter for both mu and beta
T_scal tol; // outer loop tolerance
T_scal tol_inn; // inner loop tolerance
int maxcnt; // maximum number of outer iterations
int maxit; // maximum number of iterations (total)
T_scal c; // corresponds to the parameter delta in (2.33) (arminjo condition)
T_scal gamma; // corresponds to rho "Algorithm 2" on page 29
T_scal gam; // corresponds to parameter eta in (2.34)
// Control the degree of nonmonotonicity. 0 corresponds to monotone line search.
// The best convergence is obtained by using values closer to 1 when the iterates
// are far from the optimum, and using values closer to 0 when near an optimum.
T_scal rate_gam; // shrinkage rate of gam
bool isreal;
bool nonneg;
tval3_options(): mu(256.0f), mu0(256.0f), beta(32.0f), beta0(32.0f), rate_cnt(2.0f),
tol(1e-6f), tol_inn(1e-3f), maxcnt(10), maxit(1025), c(1e-5f), gamma(0.6f),
gam(0.9995f), rate_gam(0.9f), isreal(true), nonneg(true) {};
};
template<class T_scal>
struct tval3_info {
int total_iters;
int outer_iters;
T_scal rel_chg;
T_scal rel_error;
double secs;
};
class tval3_exception: public std::runtime_error {
public:
tval3_exception(const std::string& message) : std::runtime_error(message) {};
};
#endif

653
TVALCPU/src/tval3cpp3d.cpp Normal file
View File

@@ -0,0 +1,653 @@
#include "tval3cpp3d.h"
#include <matrix.h>
#include <mex.h>
#include <tval3.h>
#include <complex>
#include <exception>
#include <mkl_cblas.h>
using namespace std;
// create mat-object from mxArray (row major storage)
template<class Type>
mat<Type> *getMatrix(const mxArray *mi) {
if(mi != NULL) {
const mwSize *dims = mxGetDimensions(mi);
int dim_y = dims[0];
int dim_x = dims[1];
int dim_z = (mxGetNumberOfDimensions(mi) > 2) ? dims[2] : 1;
Type *mi_data = (Type *)mxGetData(mi);
mat<Type> *mo = new mat<Type>(dim_y, dim_x, dim_z);
for(int z=0; z < dim_z; z++) {
for(int y=0; y < dim_y; y++) {
for(int x=0; x < dim_x; x++) {
(*mo)[x + y*dim_x + z*dim_x*dim_y] =
mi_data[x*dim_y + y + z*dim_x*dim_y];
}
}
}
return mo;
} else {
mat<Type> *mo = new mat<Type>(0);
return mo;
}
}
// create mat-object from complex mxArray (row major)
template<class Type>
mat<complex<Type> > *getCompMatrix(const mxArray *mi) {
if(mi != NULL) {
const mwSize *dims = mxGetDimensions(mi);
int dim_y = dims[0];
int dim_x = dims[1];
int dim_z = (mxGetNumberOfDimensions(mi) > 2) ? dims[2] : 1;
Type *mi_real = (Type *)mxGetData(mi);
Type *mi_imag = (Type *)mxGetImagData(mi);
mat<complex<Type> > *mo = new mat<complex<Type> >(dim_y, dim_x, dim_z);
for(int z=0; z < dim_z; z++) {
for(int y=0; y < dim_y; y++) {
for(int x=0; x < dim_x; x++) {
(*mo)[x + y*dim_x].real(mi_real[x*dim_y + y]);
if(mxIsComplex(mi))
(*mo)[x + y*dim_x + z*dim_x*dim_y].imag(mi_imag[x*dim_y + y + z*dim_x*dim_y]);
}
}
}
return mo;
} else {
mat<complex<Type> > *mo = new mat<complex<Type> >(0);
return mo;
}
}
// create sparse_mat-object from mxArray
template<class Type>
sparse_mat<Type> *getSparseMatrix(const mxArray *mi) {
const mwSize *dims = mxGetDimensions(mi);
int dim_y = dims[0];
int dim_x = dims[1];
mwIndex *mi_row_ind = mxGetIr(mi);
mwIndex *mi_col_ptr = mxGetJc(mi);
int nnz = (int)mxGetNzmax(mi);
double *mi_data = (double *)mxGetData(mi);
sparse_mat<Type> *mo;
mo = new sparse_mat<Type>(dim_y, dim_x, nnz);
for(int i=0; i < (mo->dim_x + 1); i++) {
mo->ptr()[i] = (int) mi_col_ptr[i];
}
for(int i=0; i < mo->nnz; i++) {
mo->val()[i] = (Type) mi_data[i];
mo->ind()[i] = (int) mi_row_ind[i];
}
return mo;
}
// create sparse_mat-object from complex mxArray (row major)
template<class Type>
sparse_mat<complex<Type> > *getSparseCompMatrix(const mxArray *mi) {
const mwSize *dims = mxGetDimensions(mi);
int dim_y = dims[0];
int dim_x = dims[1];
double *mi_real = (double *)mxGetData(mi);
double *mi_imag = (double *)mxGetImagData(mi);
mwIndex *mi_row_ind = mxGetIr(mi);
mwIndex *mi_col_ptr = mxGetJc(mi);
int nnz = (int)mxGetNzmax(mi);
sparse_mat<complex<Type> > *mo = new sparse_mat<complex<Type> >(dim_y,
dim_x, nnz);
for(int i=0; i < mo->nnz; i++) {
mo->ind()[i] = mi_row_ind[i];
mo->val()[i].real((Type)mi_real[i]);
if(mxIsComplex(mi))
mo->val()[i].imag((Type)mi_imag[i]);
}
for(int i=0; i < dim_x + 1; i++)
mo->ptr()[i] = mi_col_ptr[i];
return mo;
}
// get geometry structure from mxArray
geometry *get_geometry(const mxArray *A) {
float scale_factor;
int *x_e, *y_e, *z_e, *x_r, *y_r, *z_r;
int ne, nr, rv_x, rv_y, rv_z;
mxArray *field;
field = mxGetField(A, 0, "rv_x");
if(field == NULL) {
mexErrMsgTxt("no field rv_x in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.rv_x must be of type Int32!");
}
rv_x = *( (int *)mxGetData(field) );
}
field = mxGetField(A, 0, "rv_y");
if(field == NULL) {
mexErrMsgTxt("no field rv_y in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.rv_y must be of type Int32!");
}
rv_y = *( (int *)mxGetData(field) );
}
field = mxGetField(A, 0, "rv_z");
if(field == NULL) {
mexErrMsgTxt("no field rv_z in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.rv_z must be of type Int32!");
}
rv_z = *( (int *)mxGetData(field) );
}
field = mxGetField(A, 0, "scale_factor");
if(field == NULL) {
mexErrMsgTxt("no field scale_factor in struct A!");
} else {
if(!mxIsSingle(field)) {
mexErrMsgTxt("A.scale_factor must be of type single!");
}
scale_factor = *( (float *)mxGetData(field) );
}
field = mxGetField(A, 0, "x_emitters");
if(field == NULL) {
mexErrMsgTxt("no field x_emitters in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.x_emitters must be of type int32!");
}
const mwSize *dims = mxGetDimensions(field);
if(dims[0] > 1)
mexErrMsgTxt("A.x_emitters must be a column-vector!");
ne = dims[1];
x_e = (int *)mxGetData(field);
}
field = mxGetField(A, 0, "y_emitters");
if(field == NULL) {
mexErrMsgTxt("no field y_emitters in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.y_emitters must be of type in32!");
}
const mwSize *dims = mxGetDimensions(field);
if(dims[0] > 1)
mexErrMsgTxt("A.y_emitters must be a column-vector!");
if(dims[1] != ne)
mexErrMsgTxt("A.y_emitters must be of the same size as A.x_emitters!");
y_e = (int *)mxGetData(field);
}
field = mxGetField(A, 0, "z_emitters");
if(field == NULL) {
mexErrMsgTxt("no field z_emitters in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.z_emitters must be of type in32!");
}
const mwSize *dims = mxGetDimensions(field);
if(dims[0] > 1)
mexErrMsgTxt("A.z_emitters must be a column-vector!");
if(dims[1] != ne)
mexErrMsgTxt("A.z_emitters must be of the same size as A.x_emitters!");
z_e = (int *)mxGetData(field);
}
field = mxGetField(A, 0, "x_receivers");
if(field == NULL) {
mexErrMsgTxt("no field x_receivers in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.x_receivers must be of type in32!");
}
const mwSize *dims = mxGetDimensions(field);
if(dims[0] > 1)
mexErrMsgTxt("A.x_receivers must be a column-vector!");
nr = dims[1];
x_r = (int *)mxGetData(field);
}
field = mxGetField(A, 0, "y_receivers");
if(field == NULL) {
mexErrMsgTxt("no field y_receivers in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.y_receivers must be of type in32!");
}
const mwSize *dims = mxGetDimensions(field);
if(dims[0] > 1)
mexErrMsgTxt("A.y_receivers must be a column-vector!");
if(dims[1] != nr)
mexErrMsgTxt("A.y_receivers must be of the same size as A.x_receivers!");
y_r = (int *)mxGetData(field);
}
field = mxGetField(A, 0, "z_receivers");
if(field == NULL) {
mexErrMsgTxt("no field z_receivers in struct A!");
} else {
if(!mxIsInt32(field)) {
mexErrMsgTxt("A.z_receivers must be of type int32!");
}
const mwSize *dims = mxGetDimensions(field);
if(dims[0] > 1)
mexErrMsgTxt("A.z_receivers must be a column-vector!");
if(dims[1] != nr)
mexErrMsgTxt("A.z_receivers must be of the same size as A.z_receivers!");
z_r = (int *)mxGetData(field);
}
geometry *geom = new geometry;
geom->num_emitters = ne;
geom->num_receivers = nr;
geom->x_emitters = x_e;
geom->y_emitters = y_e;
geom->z_emitters = z_e;
geom->x_receivers = x_r;
geom->y_receivers = y_r;
geom->z_receivers = z_r;
geom->scale_factor = scale_factor;
geom->rv_x = rv_x;
geom->rv_y = rv_y;
geom->rv_z = rv_z;
return geom;
}
// create tval3_options struct
template<class Type>
tval3_options<Type> *getOptions(const mxArray *optsi) {
tval3_options<Type> *optso = new tval3_options<Type>;
if(!mxIsStruct(optsi)) {
mexPrintf("Warning: opts is not a structure.");
return optso;
}
mxArray *field;
field = mxGetField(optsi, 0, "mu");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.mu is not of type double (ignored).");
else
optso->mu = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "mu0");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.mu0 is not of type double (ignored).");
else
optso->mu0 = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "beta");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.beta is not of type double (ignored).");
else
optso->beta = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "beta0");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.beta0 is not of type double (ignored).");
else
optso->beta0 = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "rate_cnt");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.rate_cnt is not of type double (ignored).");
else
optso->rate_cnt = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "tol");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.tol is not of type double (ignored).");
else
optso->tol = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "tol_inn");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.tol_inn is not of type double (ignored).");
else
optso->tol_inn = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "maxcnt");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.maxcnt is not of type double (ignored).");
else
optso->maxcnt = (int) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "maxit");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.maxit is not of type double (ignored).");
else
optso->maxit = (int) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "c");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.c is not of type double (ignored).");
else
optso->c = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "gamma");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.gamma is not of type double (ignored).");
else
optso->gamma = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "gam");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.gam is not of type double (ignored).");
else
optso->gam = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "rate_gam");
if(field != NULL) {
if(!mxIsDouble(field))
mexPrintf("Warning: opts.rate_gam is not of type double (ignored).");
else
optso->rate_gam = (Type) *mxGetPr(field);
}
field = mxGetField(optsi, 0, "isreal");
if(field != NULL) {
if(!mxIsLogical(field))
mexPrintf("Warning: opts.isreal is not of type logical (ignored).");
else
optso->isreal = (bool) *mxGetLogicals(field);
}
field = mxGetField(optsi, 0, "nonneg");
if(field != NULL) {
if(!mxIsLogical(field))
mexPrintf("Warning: opts.nonneg is not of type logical (ignored).");
else
optso->nonneg = (bool) *mxGetLogicals(field);
}
return optso;
}
// create mxArray from struct tval3_info
template<class Type>
mxArray *setInfo(tval3_info<Type> &info) {
const char *field_names[] = {"total_iters", "outer_iters", "rel_chg", "rel_error", "secs"};
mxArray *output = mxCreateStructMatrix(1, 1, 5, field_names);
mxArray *value;
value = mxCreateDoubleMatrix(1,1,mxREAL);
*mxGetPr(value) = info.total_iters;
mxSetField(output, 0, "total_iters", value);
value = mxCreateDoubleMatrix(1,1,mxREAL);
*mxGetPr(value) = info.outer_iters;
mxSetField(output, 0, "outer_iters", value);
value = mxCreateDoubleMatrix(1,1,mxREAL);
*mxGetPr(value) = info.rel_chg;
mxSetField(output, 0, "rel_chg", value);
value = mxCreateDoubleMatrix(1,1,mxREAL);
*mxGetPr(value) = info.rel_error;
mxSetField(output, 0, "rel_error", value);
value = mxCreateDoubleMatrix(1,1,mxREAL);
*mxGetPr(value) = info.secs;
mxSetField(output, 0, "secs", value);
return output;
}
// create mxArray from mat-object (row major)
template<class Type>
mxArray *setMatrix(const mat<Type> &mi) {
mwSize dims[] = {mi.dim_y, mi.dim_x, mi.dim_z};
mxArray *mo = mxCreateNumericArray(3, dims, mxDOUBLE_CLASS, mxREAL);
double *mo_data = mxGetPr(mo);
for(int z=0; z < mi.dim_z; z++) {
for(int y=0; y < mi.dim_y; y++) {
for(int x=0; x < mi.dim_x; x++) {
mo_data[x*mi.dim_y + y + z*mi.dim_x*mi.dim_y] = mi[x + y*mi.dim_x + z*mi.dim_x*mi.dim_y];
}
}
}
return mo;
}
// create complex mxArray from mat-object (row major)
template<class Type>
mxArray *setCompMatrix(const mat<complex<Type> > &mi) {
mwSize dims[] = {mi.dim_y, mi.dim_x, mi.dim_z};
mxArray *mo = mxCreateNumericArray(3, dims, mxDOUBLE_CLASS, mxCOMPLEX);
double *mo_real = mxGetPr(mo);
double *mo_imag = mxGetPi(mo);
for(int z=0; z < mi.dim_z; z++) {
for(int y=0; y < mi.dim_y; y++) {
for(int x=0; x < mi.dim_x; x++) {
mo_real[x*mi.dim_y + y + z*mi.dim_x*mi.dim_y] = mi[x + y*mi.dim_x + z*mi.dim_x*mi.dim_y].real();
mo_imag[x*mi.dim_y + y + z*mi.dim_x*mi.dim_y] = mi[x + y*mi.dim_x + z*mi.dim_x*mi.dim_y].imag();
}
}
}
return mo;
}
template<class T_mat, class T_scal>
void call_tval3(const mxArray *A, const mxArray *b, const mxArray *Ut,
int ip, int iq, int ir, const mxArray *opts, mxArray **U, mxArray **info,
geometry *geo, int submat_mb, int virtual_procs_per_core)
{
mat<T_mat> *mb = getMatrix<T_mat>(b);
mat<T_mat> mU(ip, iq, ir);
mat<T_mat> *mUt = getMatrix<T_mat>(Ut);
tval3_options<T_scal> *options = getOptions<T_scal>(opts);
tval3_info<T_scal> ti_info;
if(geo != NULL) {
ti_info = tval3(mU, *geo, *mb, *options, *mUt, submat_mb,
virtual_procs_per_core);
} else {
if(mxIsSparse(A)) {
sparse_mat<T_mat> *mA = getSparseMatrix<T_mat>(A);
ti_info = tval3(mU, *mA, *mb, *options, *mUt, submat_mb,
virtual_procs_per_core);
delete mA;
} else {
mat<T_mat> *mA = getMatrix<T_mat>(A);
ti_info = tval3(mU, *mA, *mb, *options, *mUt, submat_mb,
virtual_procs_per_core);
delete mA;
}
}
*U = setMatrix(mU);
if(info != NULL) *info = setInfo(ti_info);
delete mb; delete mUt; delete options;
}
template<class Type>
void call_tval3_comp(const mxArray *A, const mxArray *b, const mxArray *Ut,
int ip, int iq, int ir, const mxArray *opts, mxArray **U, mxArray **info,
geometry *geo, int submat_mb, int virtual_procs_per_core)
{
mat<complex<Type> > *mb = getCompMatrix<Type>(b);
mat<complex<Type> > mU(ip, iq, ir);
mat<complex<Type> > *mUt = getCompMatrix<Type>(Ut);
tval3_options<Type> *options = getOptions<Type>(opts);
tval3_info<Type> ti_info;
if(geo != NULL) {
ti_info = tval3(mU, *geo, *mb, *options, *mUt, submat_mb,
virtual_procs_per_core);
} else {
if(mxIsSparse(A)) {
sparse_mat<complex<Type> > *mA = getSparseCompMatrix<Type>(A);
ti_info = tval3(mU, *mA, *mb, *options, *mUt, submat_mb,
virtual_procs_per_core);
delete mA;
} else {
mat<complex<Type> > *mA = getCompMatrix<Type>(A);
ti_info = tval3(mU, *mA, *mb, *options, *mUt, submat_mb,
virtual_procs_per_core);
delete mA;
}
}
*U = setCompMatrix(mU);
if(info != NULL) *info = setInfo(ti_info);
delete mb; delete mUt; delete options;
}
// [U, out] = tval3cpp(A,b,p,q,opts,use_dbl)
void TVALCPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
// check number of arguments...
if ((nrhs < 6) || (nrhs > 10))
mexErrMsgTxt("Wrong number of input arguments.");
if(nlhs > 2)
mexErrMsgTxt("Too many output arguments.");
// assign input arguments...
const mxArray *A = prhs[0];
const mxArray *b = prhs[1];
const mxArray *p = prhs[2];
const mxArray *q = prhs[3];
const mxArray *r = prhs[4];
const mxArray *opts = prhs[5];
const mxArray *use_dbl = (nrhs < 7) ? NULL : prhs [6];
const mxArray *Ut = mxGetField(opts, 0, "Ut");
const mxArray *submat_mb = (nrhs < 8) ? NULL : prhs[7];
const mxArray *virtual_procs_per_core = (nrhs < 9) ? NULL : prhs[8];
// assign output arguments...
mxArray **U = &plhs[0];
mxArray **info = (nlhs < 2) ? NULL : &plhs[1];
// check data types and assign variables...
geometry *geo = NULL;
if(!mxIsStruct(A) && !mxIsSingle(A) && !mxIsDouble(A))
mexErrMsgTxt("A must be single, double or struct.");
if(!mxIsSparse(A) && !mxIsStruct(A) && mxGetClassID(A) != mxGetClassID(b))
mexErrMsgTxt("A and b must be of the same type when using dense matrices.");
if(!mxIsSingle(b) && !mxIsDouble(b))
mexErrMsgTxt("b must be single or double.");
if((Ut != NULL) && (mxGetClassID(Ut) != mxGetClassID(b)))
mexErrMsgTxt("opts.Ut must be of same type as b.");
if(!mxIsDouble(p))
mexErrMsgTxt("p must be of type double.");
if(!mxIsDouble(q))
mexErrMsgTxt("q must be of type double.");
if(!mxIsDouble(r))
mexErrMsgTxt("r must be of type double.");
if((use_dbl != NULL) && (!mxIsLogical(use_dbl)))
mexErrMsgTxt("use_dbl is not a logical value.");
if((submat_mb != NULL) && (!mxIsDouble(submat_mb)))
mexErrMsgTxt("submat_mb must be of type double.");
if((virtual_procs_per_core != NULL) && (!mxIsDouble(virtual_procs_per_core)))
mexErrMsgTxt("virtual_procs_per_core must be of type double.");
bool b_use_dbl = (use_dbl == NULL) ? false : (bool) mxGetLogicals(use_dbl)[0];
int ip = (int)(*mxGetPr(p));
int iq = (int)(*mxGetPr(q));
int ir = (int)(*mxGetPr(r));
int isubmat_mb = (submat_mb == NULL) ? 5 : (int)(*mxGetPr(submat_mb));
int ivirtual_procs_per_core = (virtual_procs_per_core == NULL) ? 2 :
(int)(*mxGetPr(virtual_procs_per_core));
if(mxIsStruct(A)) {
geo = get_geometry(A);
}
// call tval3()...
try{
if(!mxIsComplex(A) && !mxIsComplex(b)) { // scalar matrices...
if(mxIsSingle(b)) {
if(b_use_dbl) {
call_tval3<float, double>(A, b, Ut, ip, iq, ir, opts, U,
info, geo, isubmat_mb, ivirtual_procs_per_core);
} else {
call_tval3<float, float>(A, b, Ut, ip, iq, ir, opts, U,
info, geo, isubmat_mb, ivirtual_procs_per_core);
}
} else {
call_tval3<double, double>(A, b, Ut, ip, iq, ir, opts, U,
info, geo, isubmat_mb, ivirtual_procs_per_core);
}
} else { // complex matrices...
if(mxIsSingle(b)) {
call_tval3_comp<float>(A, b, Ut, ip, iq, ir, opts, U, info,
geo, isubmat_mb, ivirtual_procs_per_core);
} else {
call_tval3_comp<double>(A, b, Ut, ip, iq, ir, opts, U, info,
geo, isubmat_mb, ivirtual_procs_per_core);
}
}
} catch(const std::exception &ex) {
mexErrMsgTxt(ex.what());
}
if(geo != NULL) delete geo;
}

4
TVALCPU/src/tval3cpp3d.h Normal file
View File

@@ -0,0 +1,4 @@
#include <mex.h>
extern "C"{
void TVALCPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) ;
}

45
TVALCPU/src/utility.h Normal file
View File

@@ -0,0 +1,45 @@
/*
* utility.h
*
* Created on: Mar 22, 2011
* Author: ditlevsen
*/
#ifndef UTILITY_H_
#define UTILITY_H_
/**********************************************************************************/
/* functions, etc. for handling different data types in mathematical expressions */
/**********************************************************************************/
// overloaded versions of conj() and real() for scalar values...
// (not needed when compiled with gcc and -std_c++0x)
template <class Type>
inline Type conj(Type f) {
return f;
}
template<class Type>
inline Type real(Type f) {
return f;
}
template<class Type>
inline Type imag(Type f) {
return 0;
}
// sign function
template<class T_comp, class T_scal>
inline T_comp sign(T_comp x) {
T_scal ax = std::abs(x);
if(ax > 0)
return x/ax;
else
return 0;
}
#endif /* UTILITY_H_ */