Commit source
This commit is contained in:
237
TVALCPU/src/blas_wrap.h
Normal file
237
TVALCPU/src/blas_wrap.h
Normal file
@@ -0,0 +1,237 @@
|
||||
/*
|
||||
* blas_wrap.h
|
||||
*
|
||||
* Created on: Oct 7, 2011
|
||||
* Author: ditlevsen
|
||||
*/
|
||||
|
||||
#ifndef BLAS_WRAP_H_
|
||||
#define BLAS_WRAP_H_
|
||||
|
||||
#include <complex>
|
||||
#include <mkl_cblas.h>
|
||||
#include <mkl_spblas.h>
|
||||
|
||||
// overloaded wrapper functions for blas / mkl sparse blas calls...
|
||||
inline void cblas_scal(int N, float alpha, float *X, int incX) {
|
||||
cblas_sscal(N, alpha, X, incX);
|
||||
}
|
||||
|
||||
inline void cblas_scal(int N, double alpha, double *X, int incX) {
|
||||
cblas_dscal(N, alpha, X, incX);
|
||||
}
|
||||
|
||||
inline void cblas_scal(int N, std::complex<float> alpha, std::complex<float> *X, int incX) {
|
||||
cblas_cscal(N, &alpha, X, incX);
|
||||
}
|
||||
|
||||
inline void cblas_scal(int N, std::complex<double> alpha, std::complex<double> *X, int incX) {
|
||||
cblas_zscal(N, &alpha, X, incX);
|
||||
}
|
||||
|
||||
inline void cblas_scal(int N, float alpha, std::complex<float> *X, int incX) {
|
||||
cblas_csscal(N, alpha, X, incX);
|
||||
}
|
||||
|
||||
inline void cblas_scal(int N, double alpha, std::complex<double> *X, int incX) {
|
||||
cblas_zdscal(N, alpha, X, incX);
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline void cblas_axpy(int N, float alpha, const float *X, int incX, float *Y, int incY) {
|
||||
cblas_saxpy(N, alpha, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_axpy(int N, double alpha, const double *X, int incX, double *Y, int incY) {
|
||||
cblas_daxpy(N, alpha, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_axpy(int N, std::complex<float> alpha, const std::complex<float> *X, int incX, std::complex<float> *Y, int incY) {
|
||||
cblas_caxpy(N, &alpha, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_axpy(int N, std::complex<double> alpha, const std::complex<double> *X, int incX, std::complex<double> *Y, int incY) {
|
||||
cblas_zaxpy(N, &alpha, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
|
||||
inline void cblas_copy(int N, const float *X, int incX, float *Y, int incY) {
|
||||
cblas_scopy(N, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_copy(int N, const double *X, int incX, double *Y, int incY) {
|
||||
cblas_dcopy(N, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_copy(int N, const std::complex<float> *X, int incX, std::complex<float> *Y, int incY) {
|
||||
cblas_ccopy(N,X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_copy(int N, const std::complex<double> *X, int incX, std::complex<double> *Y, int incY) {
|
||||
cblas_zcopy(N, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
|
||||
inline float cblas_dotc(int N, const float *X, int incX, const float *Y, int incY) {
|
||||
return cblas_sdot(N, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline double cblas_dotc(int N, const double *X, int incX, const double *Y, int incY) {
|
||||
return cblas_ddot(N, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline std::complex<float> cblas_dotc(int N, const std::complex<float> *X, int incX, const std::complex<float> *Y, int incY) {
|
||||
std::complex<float> result;
|
||||
cblas_cdotc_sub(N, X, incX, Y, incY, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::complex<double> cblas_dotc(int N, const std::complex<double> *X, int incX, const std::complex<double> *Y, int incY) {
|
||||
std::complex<double> result;
|
||||
cblas_zdotc_sub(N, X, incX, Y, incY, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline float cblas_nrm2(int N, const float *X, int incX) {
|
||||
return cblas_snrm2(N, X, incX);
|
||||
}
|
||||
|
||||
inline double cblas_nrm2(int N, const double *X, int incX) {
|
||||
return cblas_dnrm2(N, X, incX);
|
||||
}
|
||||
|
||||
inline float cblas_nrm2(int N, const std::complex<float> *X, int incX) {
|
||||
return cblas_scnrm2(N, X, incX);
|
||||
}
|
||||
|
||||
inline double cblas_nrm2(int N, const std::complex<double> *X, int incX) {
|
||||
return cblas_dznrm2(N, X, incX);
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline void cblas_gemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
|
||||
float alpha, const float *A, int lda, const float *X, int incX, float beta, float *Y, int incY) {
|
||||
cblas_sgemv(Order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_gemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
|
||||
double alpha, const double *A, int lda, const double *X, int incX, double beta, double *Y, int incY) {
|
||||
cblas_dgemv(Order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_gemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
|
||||
std::complex<float> alpha, const std::complex<float> *A, int lda, const std::complex<float> *X, int incX,
|
||||
std::complex<float> beta, std::complex<float> *Y, int incY)
|
||||
{
|
||||
cblas_cgemv(Order, TransA, M, N, &alpha, A, lda, X, incX, &beta, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_gemv(CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, int M, int N,
|
||||
std::complex<double> alpha, const std::complex<double> *A, int lda, const std::complex<double> *X, int incX,
|
||||
std::complex<double> beta, std::complex<double> *Y, int incY)
|
||||
{
|
||||
cblas_zgemv(Order, TransA, M, N, &alpha, A, lda, X, incX, &beta, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_gemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA,
|
||||
const CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const float alpha, const float *A,
|
||||
const int lda, const float *B, const int ldb,
|
||||
const float beta, float *C, const int ldc)
|
||||
{
|
||||
cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
|
||||
}
|
||||
|
||||
inline void cblas_gemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA,
|
||||
const CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const double alpha, const double *A,
|
||||
const int lda, const double *B, const int ldb,
|
||||
const double beta, double *C, const int ldc)
|
||||
{
|
||||
cblas_dgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
|
||||
}
|
||||
|
||||
inline void cblas_gemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA,
|
||||
const CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const std::complex<float> alpha, const std::complex<float> *A,
|
||||
const int lda, const std::complex<float> *B, const int ldb,
|
||||
const std::complex<float> beta, std::complex<float> *C, const int ldc)
|
||||
{
|
||||
cblas_cgemm(Order, TransA, TransB, M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||
}
|
||||
|
||||
inline void cblas_gemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA,
|
||||
const CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const std::complex<double> alpha, const std::complex<double> *A,
|
||||
const int lda, const std::complex<double> *B, const int ldb,
|
||||
const std::complex<double> beta, std::complex<double> *C, const int ldc)
|
||||
{
|
||||
cblas_zgemm(Order, TransA, TransB, M, N, K, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline void mkl_cscmv(char *transa, MKL_INT *m, MKL_INT *k, float *alpha, char *matdescra, float *val, MKL_INT *indx,
|
||||
MKL_INT *pntrb, MKL_INT *pntre, float *x, float *beta, float *y)
|
||||
{
|
||||
mkl_scscmv(transa, m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y);
|
||||
}
|
||||
|
||||
inline void mkl_cscmv(char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx,
|
||||
MKL_INT *pntrb, MKL_INT *pntre, double *x, double *beta, double *y)
|
||||
{
|
||||
mkl_dcscmv(transa, m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y);
|
||||
}
|
||||
|
||||
inline void mkl_cscmv(char *transa, MKL_INT *m, MKL_INT *k, std::complex<float> *alpha, char *matdescra,
|
||||
std::complex<float> *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, std::complex<float> *x,
|
||||
std::complex<float> *beta, std::complex<float> *y)
|
||||
{
|
||||
mkl_ccscmv(transa, m, k, (MKL_Complex8 *)alpha, matdescra, (MKL_Complex8 *)val, indx, pntrb, pntre,
|
||||
(MKL_Complex8 *)x, (MKL_Complex8 *)beta, (MKL_Complex8 *)y);
|
||||
}
|
||||
|
||||
inline void mkl_cscmv(char *transa, MKL_INT *m, MKL_INT *k, std::complex<double> *alpha, char *matdescra,
|
||||
std::complex<double> *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, std::complex<double> *x,
|
||||
std::complex<double> *beta, std::complex<double> *y)
|
||||
{
|
||||
mkl_zcscmv(transa, m, k, (MKL_Complex16 *)alpha, matdescra, (MKL_Complex16 *)val, indx, pntrb, pntre,
|
||||
(MKL_Complex16 *)x, (MKL_Complex16 *)beta, (MKL_Complex16 *)y);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void mkl_cscmm(char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, float *alpha, char *matdescra, float *val,
|
||||
MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, float *b, MKL_INT *ldb, float *beta, float *c, MKL_INT *ldc)
|
||||
{
|
||||
mkl_scscmm(transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntre, b, ldb, beta, c, ldc);
|
||||
}
|
||||
|
||||
void mkl_cscmm(char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *alpha, char *matdescra,
|
||||
double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *b, MKL_INT *ldb, double *beta, double *c,
|
||||
MKL_INT *ldc)
|
||||
{
|
||||
mkl_dcscmm(transa, m, n, k, alpha, matdescra, val, indx, pntrb, pntre, b, ldb, beta, c, ldc);
|
||||
}
|
||||
|
||||
void mkl_cscmm(char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, std::complex<float> *alpha, char *matdescra,
|
||||
std::complex<float> *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, std::complex<float> *b, MKL_INT *ldb,
|
||||
std::complex<float> *beta, std::complex<float> *c, MKL_INT *ldc)
|
||||
{
|
||||
mkl_ccscmm(transa, m, n, k, (MKL_Complex8 *)alpha, matdescra, (MKL_Complex8 *)val, indx, pntrb, pntre,
|
||||
(MKL_Complex8 *)b, ldb, (MKL_Complex8 *)beta, (MKL_Complex8 *)c, ldc);
|
||||
}
|
||||
|
||||
void mkl_cscmm(char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, std::complex<double> *alpha, char *matdescra,
|
||||
std::complex<double> *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, std::complex<double> *b, MKL_INT *ldb,
|
||||
std::complex<double> *beta, std::complex<double> *c, MKL_INT *ldc)
|
||||
{
|
||||
mkl_zcscmm(transa, m, n, k, (MKL_Complex16 *)alpha, matdescra, (MKL_Complex16 *)val, indx, pntrb, pntre,
|
||||
(MKL_Complex16 *)b, ldb, (MKL_Complex16 *)beta, (MKL_Complex16 *)c, ldc);
|
||||
}
|
||||
|
||||
#endif /* BLAS_WRAP_H_ */
|
||||
291
TVALCPU/src/container.h
Normal file
291
TVALCPU/src/container.h
Normal file
@@ -0,0 +1,291 @@
|
||||
/*
|
||||
* Classes for handling matrices and vectors:
|
||||
*
|
||||
* Class mat<Type>: dense matrices/vectors
|
||||
* Class sparse_mat<Type>: sparse matrices
|
||||
* Class geometry: data for dynamic calculation of the measurement matrix
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef CONTAINER_H_
|
||||
#define CONTAINER_H_
|
||||
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <exception>
|
||||
|
||||
// enumerations...
|
||||
enum T_alloc {allocated_in_constructor, preallocated_buffer};
|
||||
enum T_mat_format {mat_col_major, mat_row_major};
|
||||
enum T_sparse_mat_format {sparse_mat_csc, sparse_mat_csr};
|
||||
|
||||
// exceptions...
|
||||
class illegal_mat_assignment: public std::exception
|
||||
{
|
||||
virtual const char* what() const throw()
|
||||
{
|
||||
return "Illegale Zuweisung von mat-Objekten.";
|
||||
}
|
||||
};
|
||||
|
||||
class illegal_sparse_mat_assignment: public std::exception
|
||||
{
|
||||
virtual const char* what() const throw()
|
||||
{
|
||||
return "Illegale Zuweisung von sparse_mat-Objekten.";
|
||||
}
|
||||
};
|
||||
|
||||
//---------------------------------------
|
||||
// class mat...
|
||||
//---------------------------------------
|
||||
|
||||
template <class Type>
|
||||
class mat {
|
||||
private:
|
||||
Type *pbuf;
|
||||
T_alloc alloc_info;
|
||||
public:
|
||||
const int dim_y;
|
||||
const int dim_x;
|
||||
const int dim_z;
|
||||
const int len;
|
||||
const T_mat_format format;
|
||||
|
||||
mat(int l_y, int l_x, int l_z, bool init=true, T_mat_format storage=mat_row_major);
|
||||
mat(int num_elements, bool init=true);
|
||||
mat(int l_y, int l_x, int l_z, Type *buffer, T_mat_format storage=mat_row_major);
|
||||
mat(int num_elements, Type *buffer);
|
||||
mat(const mat &m);
|
||||
~mat() { if(pbuf && (alloc_info == allocated_in_constructor)) delete[] pbuf; };
|
||||
|
||||
Type *data() { return pbuf; }
|
||||
const Type *data() const { return pbuf; }
|
||||
|
||||
Type &operator[](int i) { return pbuf[i]; }
|
||||
const Type &operator[](int i) const { return pbuf[i]; }
|
||||
|
||||
mat &operator=(const mat<Type> &m) throw(illegal_mat_assignment);
|
||||
|
||||
void randomFill(Type scl);
|
||||
};
|
||||
|
||||
|
||||
//---------------------------------------
|
||||
// class sparse_mat...
|
||||
//---------------------------------------
|
||||
|
||||
template<class Type>
|
||||
class sparse_mat {
|
||||
private:
|
||||
int *p_ptr;
|
||||
int *p_ind;
|
||||
Type *p_val;
|
||||
T_alloc alloc_info;
|
||||
public:
|
||||
const int dim_y;
|
||||
const int dim_x;
|
||||
const int nnz;
|
||||
const T_sparse_mat_format format;
|
||||
|
||||
sparse_mat(int l_y, int l_x, int n_nonzero, T_sparse_mat_format storage_format=sparse_mat_csc, bool init=true);
|
||||
sparse_mat(int l_y, int l_x, int n_nonzero, Type *buff_val, int *buff_ptr, int *buff_ind,
|
||||
T_sparse_mat_format storage_format=sparse_mat_csc);
|
||||
sparse_mat(const sparse_mat &m);
|
||||
~sparse_mat();
|
||||
|
||||
Type *val() { return p_val; }
|
||||
const Type *val() const { return p_val; }
|
||||
|
||||
int *ptr() { return p_ptr; }
|
||||
const int *ptr() const { return p_ptr; }
|
||||
|
||||
int *ind() { return p_ind; }
|
||||
const int *ind() const { return p_ind; }
|
||||
|
||||
sparse_mat<Type> &operator=(const mat<Type> &m) throw(illegal_sparse_mat_assignment);
|
||||
};
|
||||
|
||||
//---------------------------------------
|
||||
// Struct geomety
|
||||
//---------------------------------------
|
||||
|
||||
struct geometry {
|
||||
int *x_emitters;
|
||||
int *y_emitters;
|
||||
int *z_emitters;
|
||||
int *x_receivers;
|
||||
int *y_receivers;
|
||||
int *z_receivers;
|
||||
int num_emitters;
|
||||
int num_receivers;
|
||||
int rv_x;
|
||||
int rv_y;
|
||||
int rv_z;
|
||||
float scale_factor;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// member functions, etc. class mat
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
template<class Type>
|
||||
mat<Type>::mat(int l_y, int l_x, int l_z, bool init, T_mat_format storage): pbuf(NULL), alloc_info(allocated_in_constructor),
|
||||
dim_y(l_y), dim_x(l_x), dim_z(l_z), len(l_y*l_x*l_z), format(storage) {
|
||||
if(len > 0) {
|
||||
pbuf = new Type[len];
|
||||
if(init)
|
||||
memset(pbuf, 0, len*sizeof(Type));
|
||||
}
|
||||
};
|
||||
|
||||
template<class Type>
|
||||
mat<Type>::mat(int num_elements, bool init): pbuf(NULL), alloc_info(allocated_in_constructor), dim_y(num_elements), dim_x(1),
|
||||
dim_z(1), len(num_elements), format(mat_row_major) {
|
||||
if(len > 0) {
|
||||
pbuf = new Type[len];
|
||||
if(init)
|
||||
memset(pbuf, 0, len*sizeof(Type));
|
||||
}
|
||||
};
|
||||
|
||||
template<class Type>
|
||||
mat<Type>::mat(int l_y, int l_x, int l_z, Type *buffer, T_mat_format storage): pbuf(buffer), dim_y(l_y), dim_x(l_x),
|
||||
dim_z(l_z), len(l_y*l_x*l_z), alloc_info(preallocated_buffer), format(storage) {};
|
||||
|
||||
template<class Type>
|
||||
mat<Type>::mat(int num_elements, Type *buffer): pbuf(buffer), dim_y(num_elements), dim_x(1),
|
||||
dim_z(1), len(num_elements), alloc_info(preallocated_buffer), format(mat_row_major) {};
|
||||
|
||||
template<class Type>
|
||||
mat<Type>::mat(const mat &m): pbuf(NULL), alloc_info(allocated_in_constructor), dim_y(m.dim_y), dim_x(m.dim_x),
|
||||
dim_z(m.dim_z), len(m.len), format(m.format) {
|
||||
if(len > 0) {
|
||||
pbuf = new Type[len];
|
||||
memcpy(pbuf, m.pbuf, len*sizeof(Type));
|
||||
}
|
||||
};
|
||||
|
||||
template<class Type>
|
||||
mat<Type> &mat<Type>::operator=(const mat<Type> &m) throw(illegal_mat_assignment) {
|
||||
if(m.len == len)
|
||||
memcpy(pbuf, m.pbuf, len*sizeof(Type));
|
||||
else
|
||||
throw illegal_mat_assignment();
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class Type>
|
||||
void mat<Type>::randomFill(Type scl) {
|
||||
for(int i=0; i<len; i++)
|
||||
pbuf[i] = ((Type)rand() / RAND_MAX - 0.5)/scl;
|
||||
}
|
||||
|
||||
template<class Type>
|
||||
std::ostream &operator<<(std::ostream &stream, const mat<Type> &m) {
|
||||
for(int z=0; z < m.dim_z; z++) {
|
||||
stream << "\nz=" << z+1 << ":\n\n";
|
||||
for(int i=0; i<m.dim_y; i++) {
|
||||
for(int j=0; j<m.dim_x; j++) {
|
||||
if(m.format == mat_row_major)
|
||||
stream << m[z*m.dim_x*m.dim_y + i*m.dim_x + j] << " ";
|
||||
else
|
||||
stream << m[z*m.dim_x*m.dim_y + i + j*m.dim_y] << " ";
|
||||
}
|
||||
stream << "\n";
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// member functions, etc. class sparse_mat
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
template<class Type>
|
||||
sparse_mat<Type>::sparse_mat(int l_y, int l_x, int n_nonzero, T_sparse_mat_format storage_format, bool init):
|
||||
p_val(NULL), p_ptr(NULL), p_ind(NULL), dim_y(l_y), dim_x(l_x), nnz(n_nonzero), format(storage_format),
|
||||
alloc_info(allocated_in_constructor)
|
||||
{
|
||||
if(nnz > 0) {
|
||||
int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
|
||||
|
||||
p_val = new Type[nnz];
|
||||
p_ptr = new int[len_ptr];
|
||||
p_ind = new int[nnz];
|
||||
if(init) {
|
||||
memset(p_val, 0, nnz*sizeof(Type));
|
||||
memset(p_ptr, 0, (len_ptr)*sizeof(int));
|
||||
memset(p_ind, 0, nnz*sizeof(int));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class Type>
|
||||
sparse_mat<Type>::sparse_mat(int l_y, int l_x, int n_nonzero, Type *buff_val, int *buff_ptr, int *buff_ind,
|
||||
T_sparse_mat_format storage_format): p_val(buff_val), p_ptr(buff_ptr), p_ind(buff_ind), dim_y(l_y), dim_x(l_x),
|
||||
nnz(n_nonzero), format(storage_format), alloc_info(preallocated_buffer) {}
|
||||
|
||||
|
||||
template<class Type>
|
||||
sparse_mat<Type>::sparse_mat(const sparse_mat &m): p_val(NULL), p_ptr(NULL), p_ind(NULL), dim_y(m.dim_y),
|
||||
dim_x(m.dim_x), nnz(m.nnz), format(m.format), alloc_info(allocated_in_constructor)
|
||||
{
|
||||
if(nnz > 0) {
|
||||
int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
|
||||
|
||||
p_val = new Type[nnz];
|
||||
memcpy(p_val, m.p_val, nnz*sizeof(Type));
|
||||
p_ptr = new int[len_ptr];
|
||||
memcpy(p_ptr, m.p_ptr, (len_ptr)*sizeof(int));
|
||||
p_ind = new int[nnz];
|
||||
memcpy(p_ind, m.p_ind, nnz*sizeof(int));
|
||||
}
|
||||
};
|
||||
|
||||
template<class Type>
|
||||
sparse_mat<Type>::~sparse_mat() {
|
||||
if(alloc_info == allocated_in_constructor) {
|
||||
if(p_val) delete[] p_val;
|
||||
if(p_ptr) delete[] p_ptr;
|
||||
if(p_ind) delete[] p_ind;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Type>
|
||||
sparse_mat<Type> &sparse_mat<Type>::operator=(const mat<Type> &m) throw(illegal_sparse_mat_assignment){
|
||||
|
||||
if(m.nnz == nnz && m.dim_y == dim_y && m.dim_x == dim_x && m.format == format) {
|
||||
int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
|
||||
|
||||
memcpy(p_val, m.p_val, nnz*sizeof(Type));
|
||||
memcpy(p_ptr, m.p_ptr, (len_ptr)*sizeof(int));
|
||||
memcpy(p_ind, m.p_ind, nnz*sizeof(int));
|
||||
} else {
|
||||
throw illegal_sparse_mat_assignment();
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// output operator ...
|
||||
template<class Type>
|
||||
std::ostream &operator<<(std::ostream &stream, const sparse_mat<Type> &m) {
|
||||
mat<Type> tmp(m.dim_y, m.dim_x);
|
||||
if(m.format == sparse_mat_csc) {
|
||||
for(int c=0; c < m.dim_x; c++)
|
||||
for(int i = m.ptr()[c]; i < m.ptr()[c+1]; i++)
|
||||
tmp[c*m.dim_y + m.ind()[i]] = m.val()[i];
|
||||
} else {
|
||||
for(int r=0; r < m.dim_y; r++)
|
||||
for(int i = m.ptr()[r]; i < m.ptr()[r+1]; i++)
|
||||
tmp[m.ind()[i]*m.dim_y + r] = m.val()[i];
|
||||
}
|
||||
stream << tmp;
|
||||
return stream;
|
||||
}
|
||||
|
||||
#endif /* CONTAINER_H_ */
|
||||
260
TVALCPU/src/mat_mult.h
Normal file
260
TVALCPU/src/mat_mult.h
Normal file
@@ -0,0 +1,260 @@
|
||||
/*
|
||||
* mat_mult.h
|
||||
*
|
||||
* Created on: Oct 7, 2011
|
||||
* Author: ditlevsen
|
||||
*/
|
||||
|
||||
#ifndef MAT_MULT_H_
|
||||
#define MAT_MULT_H_
|
||||
|
||||
//#include <omp.h>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include "container.h"
|
||||
#include "blas_wrap.h"
|
||||
|
||||
// overloaded function for matrix-vector multiplication (dense / sparse / dynamically calculated)
|
||||
|
||||
template <class Type>
|
||||
// y = Op(A)*x
|
||||
// A Row Major, x, y column Major!
|
||||
inline void gemv(CBLAS_TRANSPOSE TransA, const mat<Type> &A, const mat<Type> &x, mat<Type> &y) {
|
||||
cblas_gemv(CblasRowMajor, TransA, A.dim_y, A.dim_x, (Type)1, A.data(), A.dim_x, x.data(), 1, (Type)0, y.data(), 1);
|
||||
}
|
||||
|
||||
template <class Type>
|
||||
inline void gemv(CBLAS_TRANSPOSE TransA, const sparse_mat<Type> &A, const mat<Type> &x, mat<Type> &y) {
|
||||
char transpose;
|
||||
if(TransA == CblasTrans)
|
||||
transpose = 't';
|
||||
else if(TransA == CblasConjTrans)
|
||||
transpose = 'c';
|
||||
else
|
||||
transpose = 'n';
|
||||
char *matdescra = (char *)"GLLC";
|
||||
Type alpha = 1;
|
||||
Type beta = 0;
|
||||
|
||||
mkl_cscmv(&transpose, (int *)&A.dim_y, (int *)&A.dim_x, &alpha, matdescra, (Type *)A.val(), (int *)A.ind(),
|
||||
(int *)A.ptr(), (int *)(A.ptr() + 1), (Type *)x.data(), &beta, y.data());
|
||||
}
|
||||
|
||||
// functions for the dynamic calculation of the measurement matrix...
|
||||
|
||||
template <class Type>
|
||||
inline void mult_element(CBLAS_TRANSPOSE TransA, int ray, int x_pixel, int y_pixel, int z_pixel, int rv_x, int rv_y, float scale_factor, const mat<Type> &x, mat<Type> &y) {
|
||||
int lin_index = x_pixel + y_pixel*rv_x + z_pixel*rv_x*rv_y;
|
||||
if(TransA == CblasNoTrans) {
|
||||
y[ray] += x[lin_index] * (Type) scale_factor;
|
||||
} else {
|
||||
y[lin_index] += x[ray] * (Type) scale_factor;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Type>
|
||||
void gemv(CBLAS_TRANSPOSE TransA, const geometry &A, const mat<Type> &x, mat<Type> &y) {
|
||||
memset(y.data(), 0, y.len*sizeof(Type));
|
||||
|
||||
mat<Type> **vecs;
|
||||
int numthreads;
|
||||
#pragma omp parallel
|
||||
{
|
||||
int dx, dy, dz, x_inc, mx, y_inc, my, z_inc, mz, x_pixel, y_pixel, z_pixel, ray, err_1, err_2;
|
||||
|
||||
int threadnum = omp_get_thread_num();
|
||||
|
||||
#pragma omp single
|
||||
{
|
||||
numthreads = omp_get_num_threads();
|
||||
vecs = new mat<Type>* [numthreads];
|
||||
}
|
||||
|
||||
if(threadnum==0)
|
||||
vecs[0] = &y;
|
||||
else
|
||||
vecs[threadnum] = new mat<Type>(y.len);
|
||||
|
||||
#pragma omp for
|
||||
for(int emitter=0; emitter < A.num_emitters; emitter++) {
|
||||
for(int receiver=0; receiver < A.num_receivers; receiver++) {
|
||||
|
||||
ray = emitter*A.num_receivers + receiver;
|
||||
|
||||
// trace path using Bresenheimer algorithm...
|
||||
|
||||
dx = A.x_receivers[receiver] - A.x_emitters[emitter];
|
||||
dy = A.y_receivers[receiver] - A.y_emitters[emitter];
|
||||
dz = A.z_receivers[receiver] - A.z_emitters[emitter];
|
||||
|
||||
x_inc = (dx < 0) ? -1 : 1;
|
||||
mx = abs(dx);
|
||||
|
||||
y_inc = (dy < 0) ? -1 : 1;
|
||||
my = abs(dy);
|
||||
|
||||
z_inc = (dz < 0) ? -1 : 1;
|
||||
mz = abs(dz);
|
||||
|
||||
x_pixel = A.x_emitters[emitter];
|
||||
y_pixel = A.y_emitters[emitter];
|
||||
z_pixel = A.z_emitters[emitter];
|
||||
|
||||
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
|
||||
|
||||
if (mx >= my && mx >= mz) {
|
||||
err_1 = 2*my - mx;
|
||||
err_2 = 2*mz - mx;
|
||||
for(int j = 1; j < mx+1; j++) {
|
||||
if (err_1 > 0) {
|
||||
y_pixel += y_inc;
|
||||
err_1 -= 2*mx;
|
||||
}
|
||||
if (err_2 > 0) {
|
||||
z_pixel += z_inc;
|
||||
err_2 -= 2*mx;
|
||||
}
|
||||
err_1 += 2*my;
|
||||
err_2 += 2*mz;
|
||||
x_pixel += x_inc;
|
||||
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
|
||||
}
|
||||
} else if(my >= mx && my >= mz) {
|
||||
err_1 = 2*mx - my;
|
||||
err_2 = 2*mz - my;
|
||||
for (int j = 1; j < my+1; j++) {
|
||||
if (err_1 > 0) {
|
||||
x_pixel += x_inc;
|
||||
err_1 -= 2*my;
|
||||
}
|
||||
if (err_2 > 0) {
|
||||
z_pixel += z_inc;
|
||||
err_2 -= 2*my;
|
||||
}
|
||||
err_1 += 2*mx;
|
||||
err_2 += 2*mz;
|
||||
y_pixel += y_inc;
|
||||
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
|
||||
}
|
||||
} else {
|
||||
err_1 = 2*mx - mz;
|
||||
err_2 = 2*my - mz;
|
||||
for (int j = 1; j < mz+1; j++) {
|
||||
if (err_1 > 0) {
|
||||
x_pixel += x_inc;
|
||||
err_1 -= 2*mz;
|
||||
}
|
||||
if (err_2 > 0) {
|
||||
y_pixel += y_inc;
|
||||
err_2 -= 2*mz;
|
||||
}
|
||||
err_1 += 2*mx;
|
||||
err_2 += 2*my;
|
||||
z_pixel += z_inc;
|
||||
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int i=1; i<numthreads; i++) {
|
||||
cblas_axpy(y.len, 1, vecs[i]->data(), 1, y.data(), 1);
|
||||
delete vecs[i];
|
||||
}
|
||||
delete vecs;
|
||||
}
|
||||
|
||||
/*
|
||||
// Version mit nur einem Loop für alle Richtungen (-> "Prototyp" für GraKa-Version), auf CPU etwas langsamer
|
||||
template <class Type>
|
||||
void gemv(CBLAS_TRANSPOSE TransA, const geometry &A, const mat<Type> &x, mat<Type> &y) {
|
||||
memset(y.data(), 0, y.len*sizeof(Type));
|
||||
|
||||
mat<Type> **vecs;
|
||||
int numthreads;
|
||||
#pragma omp parallel
|
||||
{
|
||||
int dx, dy, dz, x_inc, mx, y_inc, my, z_inc, mz, x_pixel, y_pixel, z_pixel, ray, err_1, err_2;
|
||||
int *m1, *m2, *m3, *inc1, *inc2, *inc3, *pixel1, *pixel2, *pixel3;
|
||||
|
||||
int threadnum = omp_get_thread_num();
|
||||
|
||||
#pragma omp single
|
||||
{
|
||||
numthreads = omp_get_num_threads();
|
||||
vecs = new mat<Type>* [numthreads];
|
||||
}
|
||||
|
||||
if(threadnum==0)
|
||||
vecs[0] = &y;
|
||||
else
|
||||
vecs[threadnum] = new mat<Type>(y.len);
|
||||
|
||||
#pragma omp for
|
||||
for(int emitter=0; emitter < A.num_emitters; emitter++) {
|
||||
for(int receiver=0; receiver < A.num_receivers; receiver++) {
|
||||
|
||||
ray = emitter*A.num_receivers + receiver;
|
||||
|
||||
// trace path using Bresenheimer algorithm...
|
||||
|
||||
dx = A.x_receivers[receiver] - A.x_emitters[emitter];
|
||||
dy = A.y_receivers[receiver] - A.y_emitters[emitter];
|
||||
dz = A.z_receivers[receiver] - A.z_emitters[emitter];
|
||||
|
||||
x_inc = (dx < 0) ? -1 : 1;
|
||||
mx = abs(dx);
|
||||
|
||||
y_inc = (dy < 0) ? -1 : 1;
|
||||
my = abs(dy);
|
||||
|
||||
z_inc = (dz < 0) ? -1 : 1;
|
||||
mz = abs(dz);
|
||||
|
||||
x_pixel = A.x_emitters[emitter];
|
||||
y_pixel = A.y_emitters[emitter];
|
||||
z_pixel = A.z_emitters[emitter];
|
||||
|
||||
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
|
||||
|
||||
if (mx >= my && mx >= mz) {
|
||||
m1 = &mx; pixel1 = &x_pixel; inc1 = &x_inc;
|
||||
m2 = &my; pixel2 = &y_pixel; inc2 = &y_inc;
|
||||
m3 = &mz; pixel3 = &z_pixel; inc3 = &z_inc;
|
||||
} else if(my >= mx && my >= mz) {
|
||||
m1 = &my; pixel1 = &y_pixel; inc1 = &y_inc;
|
||||
m2 = &mx; pixel2 = &x_pixel; inc2 = &x_inc;
|
||||
m3 = &mz; pixel3 = &z_pixel; inc3 = &z_inc;
|
||||
} else {
|
||||
m1 = &mz; pixel1 = &z_pixel; inc1 = &z_inc;
|
||||
m2 = &mx; pixel2 = &x_pixel; inc2 = &x_inc;
|
||||
m3 = &my; pixel3 = &y_pixel; inc3 = &y_inc;
|
||||
}
|
||||
|
||||
err_1 = 2 * *m2 - *m1;
|
||||
err_2 = 2 * *m3 - *m1;
|
||||
for(int j = 1; j < *m1+1; j++) {
|
||||
if (err_1 > 0) {
|
||||
*pixel2 += *inc2;
|
||||
err_1 -= 2 * *m1;
|
||||
}
|
||||
if (err_2 > 0) {
|
||||
*pixel3 += *inc3;
|
||||
err_2 -= 2 * *m1;
|
||||
}
|
||||
err_1 += 2 * *m2;
|
||||
err_2 += 2 * *m3;
|
||||
*pixel1 += *inc1;
|
||||
mult_element(TransA, ray, x_pixel, y_pixel, z_pixel, A.rv_x, A.rv_y, A.scale_factor, x, *vecs[threadnum]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int i=1; i<numthreads; i++) {
|
||||
cblas_axpy(y.len, 1, vecs[i]->data(), 1, y.data(), 1);
|
||||
delete vecs[i];
|
||||
}
|
||||
delete vecs;
|
||||
}*/
|
||||
|
||||
#endif /* MAT_MULT_H_ */
|
||||
21
TVALCPU/src/timestamp.cpp
Normal file
21
TVALCPU/src/timestamp.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* utility.cpp
|
||||
*
|
||||
* Created on: Mar 22, 2011
|
||||
* Author: ditlevsen
|
||||
*/
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <cstdlib>
|
||||
|
||||
// get time stamp in seconds using Native Linux Time Measurement
|
||||
double get_timestamp() {
|
||||
struct timeval tp;
|
||||
double sec, usec;
|
||||
gettimeofday(&tp, NULL);
|
||||
sec = static_cast<double>( tp.tv_sec );
|
||||
usec = static_cast<double>( tp.tv_usec )/1E6;
|
||||
return sec + usec;
|
||||
}
|
||||
|
||||
7
TVALCPU/src/timestamp.h
Normal file
7
TVALCPU/src/timestamp.h
Normal file
@@ -0,0 +1,7 @@
|
||||
#ifndef TIMESTAMP_H_
|
||||
#define TIMESTAMP_H_
|
||||
|
||||
// returns a time stamp in seconds, using native Linux time measurement
|
||||
extern double get_timestamp();
|
||||
|
||||
#endif
|
||||
861
TVALCPU/src/tval3.cpp
Normal file
861
TVALCPU/src/tval3.cpp
Normal file
@@ -0,0 +1,861 @@
|
||||
/*
|
||||
* tval3.cpp
|
||||
*
|
||||
* Created on: Mar 21, 2011d
|
||||
* Author: ditlevsen
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <omp.h>
|
||||
#include <complex>
|
||||
#include <string>
|
||||
#include "tval3_types.h"
|
||||
#include "utility.h"
|
||||
#include "timestamp.h"
|
||||
#include "blas_wrap.h"
|
||||
#include "mat_mult.h"
|
||||
#include <mkl.h>
|
||||
#include <fstream>
|
||||
#include <cfloat>
|
||||
|
||||
using namespace std;
|
||||
|
||||
//#define PROFILING
|
||||
|
||||
#ifdef PROFILING
|
||||
double t_mult, t_d, t_dt;
|
||||
bool rec_time;
|
||||
#endif
|
||||
|
||||
// this struct holds various values, accessed by the functions get_g(), update_g(), update_W() and update_mlp()
|
||||
// the member names correspond to the variable names in the MATLAB-version
|
||||
template<class T_comp, class T_scal>
|
||||
struct tval3_data { // parameters and intermediate results...
|
||||
// dimensions...
|
||||
int p; // y-dimension of reconstruction volume
|
||||
int q; // x-dimension of reconstruction volume
|
||||
int r; // z-dimension of reconstruction volume
|
||||
int n; // number of pixels (n = p*q*r)
|
||||
int m; // number of measurements
|
||||
|
||||
// penalty parameters...
|
||||
T_scal mu;
|
||||
T_scal beta;
|
||||
T_scal muDbeta; // mu/beta
|
||||
|
||||
// multiplier...
|
||||
mat<T_comp> delta;
|
||||
mat<T_comp> sigmaX;
|
||||
mat<T_comp> sigmaY;
|
||||
mat<T_comp> sigmaZ;
|
||||
|
||||
// lagrangian function and sub terms...
|
||||
T_comp f;
|
||||
T_scal lam1;
|
||||
T_scal lam2;
|
||||
T_scal lam3;
|
||||
T_comp lam4;
|
||||
T_comp lam5;
|
||||
|
||||
mat<T_comp> Up;
|
||||
mat<T_comp> dU; // U-Up, after steepest descent
|
||||
mat<T_comp> uup; // U-Up, after "backtracking"
|
||||
mat<T_comp> Ux; // gradients in x-direction
|
||||
mat<T_comp> Uxp;
|
||||
mat<T_comp> dUx;
|
||||
mat<T_comp> Uy; // gradients in y-direction
|
||||
mat<T_comp> Uyp;
|
||||
mat<T_comp> dUy;
|
||||
mat<T_comp> Uz; // gradients in y-direction
|
||||
mat<T_comp> Uzp;
|
||||
mat<T_comp> dUz;
|
||||
mat<T_comp> Wx;
|
||||
mat<T_comp> Wy;
|
||||
mat<T_comp> Wz;
|
||||
mat<T_comp> Atb; // A'*b
|
||||
mat<T_comp> Au; // A*u
|
||||
mat<T_comp> Aup;
|
||||
mat<T_comp> dAu;
|
||||
mat<T_comp> d; // gradient of objective function (2.28)
|
||||
mat<T_comp> g; // sub term of (2.28)
|
||||
mat<T_comp> gp;
|
||||
mat<T_comp> dg;
|
||||
mat<T_comp> g2; // sub term of (2.28)
|
||||
mat<T_comp> g2p;
|
||||
mat<T_comp> dg2;
|
||||
tval3_data<T_comp, T_scal>(const mat<T_comp> &U, const mat<T_comp> &b, T_scal mu0, T_scal beta0):
|
||||
p(U.dim_y), q(U.dim_x), r(U.dim_z), n(U.len), m(b.len), mu(mu0), beta(beta0), delta(b.len), sigmaX(U.dim_y, U.dim_x, U.dim_z),
|
||||
sigmaY(U.dim_y, U.dim_x, U.dim_z), sigmaZ(U.dim_y, U.dim_x, U.dim_z), Up(U.dim_y, U.dim_x, U.dim_z), dU(U.dim_y, U.dim_x, U.dim_z),
|
||||
uup(U.len), Ux(U.dim_y, U.dim_x, U.dim_z), Uxp(U.dim_y, U.dim_x, U.dim_z), dUx(U.dim_y, U.dim_x, U.dim_z), Uy(U.dim_y, U.dim_x, U.dim_z),
|
||||
Uyp(U.dim_y, U.dim_x, U.dim_z), dUy(U.dim_y, U.dim_x, U.dim_z), Uz(U.dim_y, U.dim_x, U.dim_z), Uzp(U.dim_y, U.dim_x, U.dim_z),
|
||||
dUz(U.dim_y, U.dim_x, U.dim_z), Wx(U.dim_y, U.dim_x, U.dim_z), Wy(U.dim_y, U.dim_x, U.dim_z), Wz(U.dim_y, U.dim_x, U.dim_z), Atb(U.len),
|
||||
Au(b.len), Aup(b.len), dAu(b.len), d(U.dim_y, U.dim_x, U.dim_z), g(U.len), gp(U.len), dg(U.len), g2(U.len), g2p(U.len), dg2(U.len) {}
|
||||
};
|
||||
|
||||
// z <- x + alpha*y
|
||||
// N: number of elements
|
||||
template<class T_mat, class T_scal>
|
||||
void vec_add(const int N, const int stride, const T_mat *x, const T_scal alpha, const T_mat *y, T_mat *z) {
|
||||
#pragma vector always
|
||||
#pragma parallel always
|
||||
for(int i=0; i<N; i++)
|
||||
z[i*stride] = x[i*stride] + alpha*y[i*stride];
|
||||
}
|
||||
|
||||
template<class T_mat, class T_scal>
|
||||
void vec_add(const int N, const int stride, const std::complex<T_mat> *x, const T_scal alpha, const std::complex<T_mat> *y, std::complex<T_mat> *z) {
|
||||
T_mat *data_x = (T_mat *)x;
|
||||
T_mat *data_y = (T_mat *)y;
|
||||
T_mat *data_z = (T_mat *)z;
|
||||
#pragma vector always
|
||||
#pragma parallel always
|
||||
for(int i=0; i<N; i++) {
|
||||
data_z[2*i*stride] = data_x[2*i*stride] + alpha*data_y[2*i*stride];
|
||||
data_z[2*i*stride + 1] = data_x[2*i*stride + 1] + alpha*data_y[2*i*stride + 1];
|
||||
}
|
||||
}
|
||||
|
||||
// Scales the measurement vector. Returns the scale factor.
|
||||
template<class T_comp, class T_scal>
|
||||
T_scal scaleb(mat<T_comp> &b) {
|
||||
T_scal scl=1;
|
||||
if(b.len > 0) {
|
||||
T_scal threshold1 = 0.5;
|
||||
T_scal threshold2 = 1.5;
|
||||
scl=1;
|
||||
T_comp val;
|
||||
T_scal val_abs;
|
||||
T_comp bmin = b[0];
|
||||
T_comp bmax = bmin;
|
||||
T_scal bmin_abs = abs(bmin);
|
||||
T_scal bmax_abs = bmin_abs;
|
||||
for(int i=0; i < b.len; i++) {
|
||||
val = b[i];
|
||||
val_abs = abs(val);
|
||||
if(val_abs < bmin_abs) {
|
||||
bmin = val;
|
||||
bmin_abs = val_abs;
|
||||
}
|
||||
if(val_abs > bmax_abs) {
|
||||
bmax = val;
|
||||
bmax_abs = val_abs;
|
||||
}
|
||||
}
|
||||
T_scal b_dif = abs(bmax - bmin);
|
||||
if(b_dif < threshold1) {
|
||||
scl = threshold1/b_dif;
|
||||
cblas_scal(b.len, scl, b.data(), 1);
|
||||
} else if(b_dif > threshold2) {
|
||||
scl = threshold2/b_dif;
|
||||
cblas_scal(b.len, scl, b.data(), 1);
|
||||
}
|
||||
}
|
||||
return scl;
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal>
|
||||
void D(mat<T_comp> &Ux, mat<T_comp> &Uy, mat<T_comp> &Uz, const mat<T_comp> &U) {
|
||||
#ifdef PROFILING
|
||||
double start = get_timestamp();
|
||||
#endif
|
||||
vec_add(U.dim_x*U.dim_y*U.dim_z - 1, 1, U.data()+1, -1, U.data(), Ux.data());
|
||||
vec_add(U.dim_y*U.dim_z, U.dim_x, U.data(), -1, U.data()+U.dim_x-1, Ux.data()+U.dim_x-1);
|
||||
|
||||
vec_add((U.dim_y*U.dim_z-1)*U.dim_x, 1, U.data() + U.dim_x, -1, U.data(), Uy.data());
|
||||
for(int z = 0; z < U.dim_z; z++)
|
||||
vec_add(U.dim_x, 1, U.data() + z*U.dim_x*U.dim_y, -1, U.data() + (U.dim_y-1)*U.dim_x + z*U.dim_x*U.dim_y,
|
||||
Uy.data() + (U.dim_y-1)*U.dim_x + z*U.dim_x*U.dim_y);
|
||||
|
||||
vec_add(U.dim_x*U.dim_y*(U.dim_z - 1), 1, U.data() + U.dim_x*U.dim_y, -1, U.data(), Uz.data());
|
||||
vec_add(U.dim_x*U.dim_y, 1, U.data(), -1, U.data() + (U.dim_z - 1) * U.dim_x * U.dim_y,
|
||||
Uz.data() + (U.dim_z - 1) * U.dim_x * U.dim_y);
|
||||
#ifdef PROFILING
|
||||
double stop = get_timestamp();
|
||||
if(rec_time) t_d += stop - start;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal>
|
||||
void Dt(mat<T_comp> &res, const mat<T_comp> &X, const mat<T_comp> &Y, const mat<T_comp> &Z) {
|
||||
#ifdef PROFILING
|
||||
double start = get_timestamp();
|
||||
#endif
|
||||
vec_add(X.len-1, 1, X.data(), -1, X.data()+1, res.data()+1);
|
||||
vec_add(X.dim_y * X.dim_z, X.dim_x, X.data()+X.dim_x-1, -1, X.data(), res.data());
|
||||
|
||||
cblas_axpy((Y.dim_y*Y.dim_z-1)*Y.dim_x, (T_comp)1, Y.data(), 1, res.data()+Y.dim_x, 1);
|
||||
cblas_axpy((Y.dim_y*Y.dim_z-1)*Y.dim_x, (T_comp)-1, Y.data()+Y.dim_x, 1, res.data()+Y.dim_x, 1);
|
||||
for(int z = 0; z < Y.dim_z; z++) {
|
||||
cblas_axpy(Y.dim_x, (T_comp)1, Y.data() + (Y.dim_y-1)*Y.dim_x + z*Y.dim_x*Y.dim_y, 1, res.data() + z*Y.dim_x*Y.dim_y, 1);
|
||||
cblas_axpy(Y.dim_x, (T_comp)-1, Y.data() + z*Y.dim_x*Y.dim_y, 1, res.data() + z*Y.dim_x*Y.dim_y, 1);
|
||||
}
|
||||
|
||||
cblas_axpy(Z.dim_y*Z.dim_x*(Z.dim_z-1), (T_comp)1, Z.data(), 1, res.data()+Z.dim_y*Z.dim_x, 1);
|
||||
cblas_axpy(Z.dim_y*Z.dim_x*(Z.dim_z-1), (T_comp)-1, Z.data()+Z.dim_y*Z.dim_x, 1, res.data()+Z.dim_y*Z.dim_x, 1);
|
||||
cblas_axpy(Z.dim_y*Z.dim_x, (T_comp)1, Z.data() + (Z.dim_z-1)*Z.dim_x*Z.dim_y, 1, res.data(), 1);
|
||||
cblas_axpy(Z.dim_y*Z.dim_x, (T_comp)-1, Z.data(), 1, res.data(), 1);
|
||||
#ifdef PROFILING
|
||||
double stop = get_timestamp();
|
||||
if(rec_time) t_dt += stop - start;
|
||||
#endif
|
||||
}
|
||||
|
||||
// overloaded function get_g_mult calculates:
|
||||
// Au = A*U...
|
||||
// g = A'*Au...
|
||||
|
||||
// standrad version (parameter submat_mb not used)
|
||||
template<class T_comp, class T_scal, class T_A>
|
||||
void get_g_mult(tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &U, int submat_mb) {
|
||||
gemv(CblasNoTrans, A, U, data.Au);
|
||||
gemv(CblasConjTrans, A, data.Au, data.g);
|
||||
}
|
||||
|
||||
// special version for dense matrices (cache blocking for single layer reconstructions....)
|
||||
// submat_mb: size of submatrices in MB
|
||||
template<class T_comp, class T_scal>
|
||||
void get_g_mult(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &A, const mat<T_comp> &U, int submat_mb) {
|
||||
|
||||
int submat_elements = submat_mb*1024*1024 / sizeof(T_comp);
|
||||
|
||||
mat<T_comp> g_tmp(data.n);
|
||||
|
||||
int submat_dim_y = max(submat_elements / data.n, 1);
|
||||
|
||||
int n_y = (data.m + submat_dim_y - 1) / submat_dim_y;
|
||||
|
||||
int r;
|
||||
memset(data.g.data(), 0, data.n * sizeof(T_comp));
|
||||
for(int y=0; y < n_y; y++) {
|
||||
r = min(submat_dim_y, data.m - y * submat_dim_y);
|
||||
|
||||
cblas_gemv(CblasRowMajor, CblasNoTrans, r, data.n, 1, A.data() + y * submat_dim_y * data.n, data.n, U.data(), 1, 0, data.Au.data() + y * submat_dim_y, 1);
|
||||
|
||||
cblas_gemv(CblasRowMajor, CblasConjTrans, r, data.n, 1, A.data() + y * submat_dim_y * data.n, data.n, data.Au.data() + y * submat_dim_y, 1, 0, g_tmp.data(), 1);
|
||||
|
||||
cblas_axpy(data.n, 1, g_tmp.data(), 1, data.g.data(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal, class T_A>
|
||||
void get_g(tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &U, const mat<T_comp> &b, int submat_mb) {
|
||||
#ifdef PROFILING
|
||||
double start = get_timestamp();
|
||||
#endif
|
||||
get_g_mult(data, A, U, submat_mb);
|
||||
#ifdef PROFILING
|
||||
double stop = get_timestamp();
|
||||
if(rec_time) t_mult += stop - start;
|
||||
#endif
|
||||
|
||||
cblas_axpy(data.n, (T_comp)-1, data.Atb.data(), 1, data.g.data(), 1);
|
||||
|
||||
// update g2, lam2, lam4...
|
||||
mat<T_comp> Vx(data.p, data.q, data.r);
|
||||
mat<T_comp> Vy(data.p, data.q, data.r);
|
||||
mat<T_comp> Vz(data.p, data.q, data.r);
|
||||
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
|
||||
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
|
||||
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
|
||||
Dt<T_comp, T_scal>(data.g2, Vx, Vy, Vz);
|
||||
|
||||
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
|
||||
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
|
||||
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
|
||||
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
|
||||
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
|
||||
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
|
||||
|
||||
//update lam3, lam5...
|
||||
mat<T_comp> Aub(data.m);
|
||||
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
|
||||
data.lam3=real(cblas_dotc(data.m, Aub.data(), 1, Aub.data(), 1));
|
||||
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
|
||||
|
||||
data.f = (data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3) - data.lam4 - data.lam5;
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal, class T_A>
|
||||
T_scal get_tau(tval3_data<T_comp, T_scal> &data, const T_A &A, bool fst_iter) {
|
||||
T_scal tau;
|
||||
|
||||
// calculate tau...
|
||||
if(fst_iter) {
|
||||
mat<T_comp> dx(data.p, data.q, data.r);
|
||||
mat<T_comp> dy(data.p, data.q, data.r);
|
||||
mat<T_comp> dz(data.p, data.q, data.r);
|
||||
D<T_comp, T_scal>(dx, dy, dz, data.d);
|
||||
T_comp dDd = cblas_dotc(data.n, dx.data(), 1, dx.data(), 1) +
|
||||
cblas_dotc(data.n, dy.data(), 1, dy.data(), 1) +
|
||||
cblas_dotc(data.n, dz.data(), 1, dz.data(), 1);
|
||||
T_comp dd = cblas_dotc(data.n, data.d.data(), 1, data.d.data(), 1);
|
||||
mat<T_comp> Ad(data.m);
|
||||
// Ad=A*d...
|
||||
#ifdef PROFILING
|
||||
double start = get_timestamp();
|
||||
#endif
|
||||
gemv(CblasNoTrans, A, data.d, Ad);
|
||||
#ifdef PROFILING
|
||||
double stop = get_timestamp();
|
||||
if(rec_time) t_mult += stop - start;
|
||||
#endif
|
||||
T_comp Add = cblas_dotc(data.m, Ad.data(), 1, Ad.data(), 1);
|
||||
|
||||
tau = abs(dd/(dDd + data.muDbeta*Add));
|
||||
} else {
|
||||
vec_add(data.n, 1, data.g.data(), -1, data.gp.data(), data.dg.data());
|
||||
vec_add(data.n, 1, data.g2.data(), -1, data.g2p.data(), data.dg2.data());
|
||||
T_comp ss = cblas_dotc(data.n, data.uup.data(), 1, data.uup.data(), 1);
|
||||
mat<T_comp> tmp(data.dg2);
|
||||
cblas_axpy(data.n, data.muDbeta, data.dg.data(), 1, tmp.data(), 1);
|
||||
T_comp sy = cblas_dotc(data.n, data.uup.data(), 1, tmp.data(), 1);
|
||||
tau = abs(ss/sy);
|
||||
}
|
||||
return tau;
|
||||
}
|
||||
|
||||
template<class T_mat>
|
||||
void nonneg(mat<T_mat> &U) {
|
||||
T_mat val;
|
||||
#pragma parallel always
|
||||
#pragma vector always
|
||||
for(int i=0; i < U.len; i++) {
|
||||
val = U[i];
|
||||
U[i] = (val < 0) ? 0 : val;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T_mat>
|
||||
void nonneg(mat<complex<T_mat> > &U) {
|
||||
T_mat val;
|
||||
T_mat *data = (T_mat *) U.data();
|
||||
#pragma parallel always
|
||||
#pragma vector always
|
||||
for(int i=0; i < U.len; i++) {
|
||||
val = data[2*i];
|
||||
data[2*i] = (val < 0) ? 0 : val;
|
||||
data[2*i + 1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal, class T_A>
|
||||
void descend(mat<T_comp> &U, tval3_data<T_comp, T_scal> &data, const T_A &A, const mat<T_comp> &b, T_scal tau, bool non_neg,
|
||||
int submat_mb) {
|
||||
cblas_axpy(data.n, -1*tau, data.d.data(), 1, U.data(),1); // U = U - tau*d
|
||||
if(non_neg) {
|
||||
nonneg(U);
|
||||
}
|
||||
D<T_comp, T_scal>(data.Ux, data.Uy, data.Uz, U);
|
||||
get_g<T_comp, T_scal>(data, A, U, b, submat_mb);
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal, class T_A>
|
||||
void update_g(tval3_data<T_comp, T_scal> &data, const T_scal alpha, const T_A &A, mat<T_comp> &U, const mat<T_comp> &b) {
|
||||
vec_add(data.n, 1, data.gp.data(), alpha, data.dg.data(), data.g.data());
|
||||
vec_add(data.n, 1, data.g2p.data(), alpha, data.dg2.data(), data.g2.data());
|
||||
vec_add(data.n, 1, data.Up.data(), alpha, data.dU.data(), U.data());
|
||||
vec_add(data.m, 1, data.Aup.data(), alpha, data.dAu.data(), data.Au.data());
|
||||
vec_add(data.n, 1, data.Uxp.data(), alpha, data.dUx.data(), data.Ux.data());
|
||||
vec_add(data.n, 1, data.Uyp.data(), alpha, data.dUy.data(), data.Uy.data());
|
||||
vec_add(data.n, 1, data.Uzp.data(), alpha, data.dUz.data(), data.Uz.data());
|
||||
|
||||
// update lam2, lam4...
|
||||
mat<T_comp> Vx(data.p, data.q, data.r);
|
||||
mat<T_comp> Vy(data.p, data.q, data.r);
|
||||
mat<T_comp> Vz(data.p, data.q, data.r);
|
||||
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
|
||||
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
|
||||
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
|
||||
|
||||
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
|
||||
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
|
||||
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
|
||||
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
|
||||
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
|
||||
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
|
||||
|
||||
//update lam3, lam5...
|
||||
mat<T_comp> Aub(data.m);
|
||||
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
|
||||
data.lam3 =real(cblas_dotc(data.m, Aub.data(), 1, Aub.data(), 1));
|
||||
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
|
||||
|
||||
data.f = data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3 - data.lam4 - data.lam5;
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal, class T_A>
|
||||
void min_u(tval3_data<T_comp, T_scal> &data, mat<T_comp> &U, T_scal &gam, const T_A &A, const mat<T_comp> &b,
|
||||
const tval3_options<T_scal> &opts, T_comp C, bool fst_iter, int submat_mb) {
|
||||
T_scal tau, alpha, c_armin;
|
||||
|
||||
tau = get_tau<T_comp, T_scal, T_A>(data, A, fst_iter);
|
||||
|
||||
// keep previous values...
|
||||
data.Up = U; data.gp = data.g; data.g2p = data.g2;
|
||||
data.Aup = data.Au; data.Uxp = data.Ux; data.Uyp = data.Uy;
|
||||
data.Uzp = data.Uz;
|
||||
|
||||
// one step steepest descend...
|
||||
descend<T_comp, T_scal, T_A>(U, data, A, b, tau, opts.nonneg, submat_mb);
|
||||
|
||||
// NMLA...
|
||||
alpha=1;
|
||||
vec_add(data.n, 1, U.data(), -1, data.Up.data(), data.dU.data()); // Ud = U - Up
|
||||
c_armin = real(cblas_dotc(data.n, data.d.data(), 1, data.d.data(), 1) * tau * opts.c * data.beta); // c_armin = d'*d*tau*c*beta
|
||||
if(abs(data.f) > abs(C - alpha*c_armin)) { // Arminjo condition...
|
||||
|
||||
vec_add(data.n, 1, data.g.data(), -1, data.gp.data(), data.dg.data()); // dg=g-gp
|
||||
vec_add(data.n, 1, data.g2.data(), -1, data.g2p.data(), data.dg2.data()); // dg2=g2-g2p
|
||||
vec_add(data.m, 1, data.Au.data(), -1, data.Aup.data(), data.dAu.data()); // dAu=Au-Aup
|
||||
vec_add(data.n, 1, data.Ux.data(), -1, data.Uxp.data(), data.dUx.data()); // dUx=Ux-Uxp
|
||||
vec_add(data.n, 1, data.Uy.data(), -1, data.Uyp.data(), data.dUy.data()); // Uy = Uy-Uyp
|
||||
vec_add(data.n, 1, data.Uz.data(), -1, data.Uzp.data(), data.dUz.data()); // Uz = Uz-Uzp
|
||||
|
||||
int cnt=0;
|
||||
|
||||
while(abs(data.f) > abs(C - alpha*c_armin)) { // Arminjo condition...
|
||||
|
||||
if(cnt==5) { // "backtracking" not successful...
|
||||
gam *= opts.rate_gam;
|
||||
tau = get_tau<T_comp, T_scal, T_A>(data, A, true);
|
||||
U = data.Up;
|
||||
descend<T_comp, T_scal, T_A>(U, data, A, b, tau, opts.nonneg, submat_mb);
|
||||
break;
|
||||
}
|
||||
alpha *= opts.gamma;
|
||||
|
||||
update_g<T_comp, T_scal, T_A>(data, alpha, A, U, b);
|
||||
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal>
|
||||
void get_gradient(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &DtsAtd) {
|
||||
// d = g2 + muDbeta*g + DtsAtd... (DtsAtd has opposite sign, compared to the MATLAB-version!)
|
||||
data.d = DtsAtd;
|
||||
cblas_axpy(data.n, 1, data.g2.data(), 1, data.d.data(), 1);
|
||||
cblas_axpy(data.n, data.muDbeta, data.g.data(), 1, data.d.data(), 1);
|
||||
}
|
||||
|
||||
// scalar
|
||||
template<class T_mat, class T_scal>
|
||||
void shrinkage(tval3_data<T_mat, T_scal> &data) {
|
||||
T_scal sum=0;
|
||||
T_scal temp_wx, temp_wy, temp_wz;
|
||||
T_mat Uxbar, Uybar, Uzbar;
|
||||
#pragma parallel always
|
||||
#pragma vector always
|
||||
for(int i=0; i < data.n; i++) {
|
||||
Uxbar = data.Ux[i] - data.sigmaX[i]/data.beta;
|
||||
Uybar = data.Uy[i] - data.sigmaY[i]/data.beta;
|
||||
Uzbar = data.Uz[i] - data.sigmaZ[i]/data.beta;
|
||||
|
||||
temp_wx = abs(Uxbar) - 1/data.beta;
|
||||
temp_wy = abs(Uybar) - 1/data.beta;
|
||||
temp_wz = abs(Uzbar) - 1/data.beta;
|
||||
temp_wx = (temp_wx >= 0) ? temp_wx : 0;
|
||||
temp_wy = (temp_wy >= 0) ? temp_wy : 0;
|
||||
temp_wz = (temp_wz >= 0) ? temp_wz : 0;
|
||||
sum += temp_wx + temp_wy + temp_wz;
|
||||
data.Wx[i] = (Uxbar >= 0) ? temp_wx : -1 * temp_wx;
|
||||
data.Wy[i] = (Uybar >= 0) ? temp_wy : -1 * temp_wy;
|
||||
data.Wz[i] = (Uzbar >= 0) ? temp_wz : -1 * temp_wz;
|
||||
}
|
||||
data.lam1 = sum;
|
||||
}
|
||||
|
||||
// complex
|
||||
template<class T_mat, class T_scal>
|
||||
void shrinkage(tval3_data<complex<T_mat>, T_scal> &data) {
|
||||
T_mat sum=0;
|
||||
T_mat temp_wx, temp_wy, temp_wz;
|
||||
T_mat Uxbar_real, Uxbar_imag, Uxbar_abs, Uybar_real, Uybar_imag, Uybar_abs, Uzbar_real, Uzbar_imag, Uzbar_abs;
|
||||
T_mat *Ux_data = (T_mat *) data.Ux.data();
|
||||
T_mat *Uy_data = (T_mat *) data.Uy.data();
|
||||
T_mat *Uz_data = (T_mat *) data.Uz.data();
|
||||
T_mat *sigmaX_data = (T_mat *) data.sigmaX.data();
|
||||
T_mat *sigmaY_data = (T_mat *) data.sigmaY.data();
|
||||
T_mat *sigmaZ_data = (T_mat *) data.sigmaZ.data();
|
||||
T_mat *Wx_data = (T_mat *) data.Wx.data();
|
||||
T_mat *Wy_data = (T_mat *) data.Wy.data();
|
||||
T_mat *Wz_data = (T_mat *) data.Wz.data();
|
||||
const int len = data.n;
|
||||
#pragma parallel always
|
||||
#pragma vector always
|
||||
for(int i=0; i < len; i++) {
|
||||
Uxbar_real = Ux_data[2*i] - sigmaX_data[2*i]/data.beta;
|
||||
Uxbar_imag = Ux_data[2*i + 1] - sigmaX_data[2*i + 1]/data.beta;
|
||||
Uybar_real = Uy_data[2*i] - sigmaY_data[2*i]/data.beta;
|
||||
Uybar_imag = Uy_data[2*i + 1] - sigmaY_data[2*i + 1]/data.beta;
|
||||
Uzbar_real = Uz_data[2*i] - sigmaZ_data[2*i]/data.beta;
|
||||
Uzbar_imag = Uz_data[2*i + 1] - sigmaZ_data[2*i + 1]/data.beta;
|
||||
|
||||
Uxbar_abs = sqrt(Uxbar_real*Uxbar_real + Uxbar_imag*Uxbar_imag);
|
||||
Uybar_abs = sqrt(Uybar_real*Uybar_real + Uybar_imag*Uybar_imag);
|
||||
Uzbar_abs = sqrt(Uzbar_real*Uzbar_real + Uzbar_imag*Uzbar_imag);
|
||||
|
||||
temp_wx = Uxbar_abs - 1/data.beta;
|
||||
temp_wy = Uybar_abs - 1/data.beta;
|
||||
temp_wz = Uzbar_abs - 1/data.beta;
|
||||
temp_wx = (temp_wx >= 0) ? temp_wx : 0;
|
||||
temp_wy = (temp_wy >= 0) ? temp_wy : 0;
|
||||
temp_wz = (temp_wz >= 0) ? temp_wz : 0;
|
||||
sum += temp_wx + temp_wy + temp_wz;
|
||||
|
||||
Wx_data[2*i] = (Uxbar_abs > 0) ? temp_wx*Uxbar_real/Uxbar_abs : 0;
|
||||
Wx_data[2*i+1] = (Uxbar_abs > 0) ? temp_wx*Uxbar_imag/Uxbar_abs : 0;
|
||||
Wy_data[2*i] = (Uybar_abs > 0) ? temp_wy*Uybar_real/Uybar_abs : 0;
|
||||
Wy_data[2*i+1] = (Uybar_abs > 0) ? temp_wy*Uybar_imag/Uybar_abs : 0;
|
||||
Wz_data[2*i] = (Uzbar_abs > 0) ? temp_wz*Uybar_real/Uzbar_abs : 0;
|
||||
Wz_data[2*i+1] = (Uzbar_abs > 0) ? temp_wz*Uybar_imag/Uzbar_abs : 0;
|
||||
}
|
||||
data.lam1 = sum;
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal>
|
||||
void update_W(tval3_data<T_comp, T_scal> &data) {
|
||||
|
||||
data.f -= (data.lam1 + data.beta/2*data.lam2 - data.lam4);
|
||||
|
||||
shrinkage(data);
|
||||
|
||||
// update g2, lam2, lam4...
|
||||
mat<T_comp> Vx(data.p, data.q, data.r);
|
||||
mat<T_comp> Vy(data.p, data.q, data.r);
|
||||
mat<T_comp> Vz(data.p, data.q, data.r);
|
||||
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
|
||||
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
|
||||
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
|
||||
Dt<T_comp, T_scal>(data.g2, Vx, Vy, Vz);
|
||||
|
||||
data.lam2=real(cblas_dotc(data.n, Vx.data(), 1, Vx.data(), 1));
|
||||
data.lam2 += real(cblas_dotc(data.n, Vy.data(), 1, Vy.data(), 1));
|
||||
data.lam2 += real(cblas_dotc(data.n, Vz.data(), 1, Vz.data(), 1));
|
||||
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
|
||||
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
|
||||
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
|
||||
|
||||
data.f += (data.lam1 + data.beta/2*data.lam2 - data.lam4);
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal>
|
||||
void update_mlp(tval3_data<T_comp, T_scal> &data, const mat<T_comp> &b) {
|
||||
data.f += (data.lam4 + data.lam5);
|
||||
|
||||
mat<T_comp> Vx(data.p, data.q, data.r);
|
||||
mat<T_comp> Vy(data.p, data.q, data.r);
|
||||
mat<T_comp> Vz(data.p, data.q, data.r);
|
||||
vec_add(data.n, 1, data.Ux.data(), (T_scal)-1, data.Wx.data(), Vx.data()); // Vx = Ux - Wx
|
||||
vec_add(data.n, 1, data.Uy.data(), (T_scal)-1, data.Wy.data(), Vy.data()); // Vy = Uy - Wy
|
||||
vec_add(data.n, 1, data.Uz.data(), (T_scal)-1, data.Wz.data(), Vz.data()); // Vz = Uz - Wz
|
||||
|
||||
cblas_axpy(data.n, -1*data.beta, Vx.data(), 1, data.sigmaX.data(), 1); // sigmaX -= beta*Vx
|
||||
cblas_axpy(data.n, -1*data.beta, Vy.data(), 1, data.sigmaY.data(), 1); // sigmaY -= beta*Vy
|
||||
cblas_axpy(data.n, -1*data.beta, Vz.data(), 1, data.sigmaZ.data(), 1); // sigmaz -= beta*Vz
|
||||
|
||||
data.lam4=real(cblas_dotc(data.n, data.sigmaX.data(), 1, Vx.data(), 1));
|
||||
data.lam4 += real(cblas_dotc(data.n, data.sigmaY.data(), 1, Vy.data(), 1));
|
||||
data.lam4 += real(cblas_dotc(data.n, data.sigmaZ.data(), 1, Vz.data(), 1));
|
||||
|
||||
mat<T_comp> Aub(data.m);
|
||||
vec_add(data.m, 1, data.Au.data(), (T_scal)-1, b.data(), Aub.data()); // Aub = Au - b
|
||||
cblas_axpy(data.m, -1*data.mu, Aub.data(), 1, data.delta.data(), 1); // delta -= mu*Aub
|
||||
data.lam5=real(cblas_dotc(data.m, data.delta.data(), 1, Aub.data(), 1));
|
||||
|
||||
data.f -= (data.lam4 + data.lam5);
|
||||
}
|
||||
|
||||
template<class T_comp>
|
||||
void check_params(mat<T_comp> &U, const mat<T_comp> &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
|
||||
if(U.format != mat_row_major || Ut.format != mat_row_major || A.format != mat_row_major)
|
||||
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
|
||||
else if(Ut.len > 0 && Ut.len != U.len)
|
||||
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
|
||||
else if(U.len != A.dim_x)
|
||||
throw tval3_exception("Argument error: the length of U must be equal to A.dim_x!");
|
||||
else if(b.len != A.dim_y)
|
||||
throw tval3_exception(string("Argument error: b.len must be equal to A.dim_y!"));
|
||||
}
|
||||
|
||||
template<class T_comp>
|
||||
void check_params(mat<T_comp> &U, const sparse_mat<T_comp> &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
|
||||
if(U.format != mat_row_major || Ut.format != mat_row_major)
|
||||
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
|
||||
if(A.format != sparse_mat_csc)
|
||||
throw tval3_exception(string("Argument error: A must be in CSC format!"));
|
||||
if(Ut.len > 0 && Ut.len != U.len)
|
||||
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
|
||||
else if(U.len != A.dim_x)
|
||||
throw tval3_exception("Argument error: the length of U must be equal to A.dim_x!");
|
||||
else if(b.len != A.dim_y)
|
||||
throw tval3_exception(string("Argument error: b.len must be equal to A.dim_y!"));
|
||||
}
|
||||
|
||||
template<class T_comp>
|
||||
void check_params(mat<T_comp> &U, const geometry &A, const mat<T_comp> &b, const mat<T_comp> &Ut) throw(tval3_exception) {
|
||||
if(U.format != mat_row_major || Ut.format != mat_row_major)
|
||||
throw tval3_exception(string("Argument error: Matrices must be in row major format!"));
|
||||
else if(Ut.len > 0 && Ut.len != U.len)
|
||||
throw tval3_exception(string("Argument error: U and Ut must have the same size!"));
|
||||
else if(b.len != A.num_emitters * A.num_receivers)
|
||||
throw tval3_exception(string("Argument error: b.len must be equal to A.num_emitters * A.num_receivers!"));
|
||||
}
|
||||
|
||||
template<class T_comp, class T_scal, class T_A>
|
||||
const tval3_info<T_scal> tval3(mat<T_comp> &U, const T_A &A, const mat<T_comp> &b, const tval3_options<T_scal> &opts, const mat<T_comp> &Ut,
|
||||
int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception) {
|
||||
|
||||
double start, stop;
|
||||
#ifdef PROFILING
|
||||
double start_loop, stop_loop;
|
||||
t_mult = 0; t_d = 0; t_dt = 0; rec_time = false;
|
||||
#endif
|
||||
tval3_info<T_scal> info;
|
||||
start = get_timestamp();
|
||||
try {
|
||||
|
||||
check_params (U, A, b, Ut);
|
||||
|
||||
omp_set_num_threads(omp_get_num_procs() / virtual_procs_per_core);
|
||||
|
||||
mat<T_comp> bs = b; // create a copy of b (scaling...)
|
||||
tval3_data<T_comp, T_scal> data(U, bs, opts.mu0, opts.beta0);
|
||||
T_scal muf=opts.mu;
|
||||
T_scal betaf=opts.beta, beta0=0;
|
||||
mat<T_comp> DtsAtd(data.n);
|
||||
T_scal gam = opts.gam;
|
||||
T_scal scl = scaleb<T_comp, T_scal>(bs);
|
||||
gemv(CblasConjTrans, A, bs, data.Atb); // Atb = A'*bs
|
||||
U = data.Atb; // initial guess: U=A'*b
|
||||
if(data.mu > muf) data.mu = muf;
|
||||
if(data.beta > betaf) data.beta = betaf;
|
||||
data.muDbeta = data.mu/data.beta;
|
||||
mat <T_comp> rcdU(U), UrcdU(data.n);
|
||||
D<T_comp, T_scal>(data.Ux, data.Uy, data.Uz, U);
|
||||
shrinkage(data);
|
||||
get_g<T_comp, T_scal>(data, A, U, bs, submat_mb);
|
||||
get_gradient<T_comp, T_scal>(data, DtsAtd);
|
||||
T_scal Q=1, Qp;
|
||||
T_comp C=data.f;
|
||||
int count_outer=0, count_total=0;
|
||||
bool fst_iter;
|
||||
T_scal RelChg, RelChgOut=0, nrmup=0;
|
||||
|
||||
#ifdef PROFILING
|
||||
rec_time = true;
|
||||
start_loop = get_timestamp();
|
||||
#endif
|
||||
|
||||
while((count_outer < opts.maxcnt) && (count_total < opts.maxit)) {
|
||||
|
||||
fst_iter = true;
|
||||
while(count_total < opts.maxit) {
|
||||
// u-subproblem...
|
||||
min_u<T_comp, T_scal>(data, U, gam, A, bs, opts, C, fst_iter, submat_mb);
|
||||
|
||||
// shrinkage like step...
|
||||
update_W<T_comp, T_scal>(data);
|
||||
|
||||
// update reference values...
|
||||
Qp = Q, Q = gam*Qp + 1; C = (gam*Qp*C + data.f)/Q;
|
||||
vec_add(data.n, 1, U.data(), -1, data.Up.data(), data.uup.data());
|
||||
|
||||
// compute gradient...
|
||||
get_gradient<T_comp, T_scal>(data, DtsAtd);
|
||||
|
||||
count_total++;
|
||||
|
||||
// calculate relative change...
|
||||
nrmup = cblas_nrm2(data.n, data.Up.data(), 1);
|
||||
RelChg = cblas_nrm2(data.n, data.uup.data(), 1) / nrmup;
|
||||
|
||||
if((RelChg < opts.tol_inn) && (!fst_iter))
|
||||
break;
|
||||
|
||||
fst_iter = false;
|
||||
|
||||
}
|
||||
count_outer++;
|
||||
|
||||
// calculate relative change...
|
||||
vec_add(data.n, 1, U.data(), -1, rcdU.data(), UrcdU.data()); // UrcdU = U - rcdU
|
||||
RelChgOut = cblas_nrm2(data.n, UrcdU.data(), 1) / nrmup;
|
||||
rcdU = U;
|
||||
|
||||
if(RelChgOut < opts.tol)
|
||||
break;
|
||||
|
||||
// update multipliers...
|
||||
update_mlp<T_comp, T_scal>(data, bs);
|
||||
|
||||
// update penalty parameters for continuation scheme...
|
||||
beta0 = data.beta;
|
||||
data.beta *= opts.rate_cnt;
|
||||
data.mu *= opts.rate_cnt;
|
||||
if(data.beta > betaf) data.beta = betaf;
|
||||
if(data.mu > muf) data.mu = muf;
|
||||
data.muDbeta = data.mu/data.beta;
|
||||
|
||||
// update f...
|
||||
data.f = data.lam1 + data.beta/2*data.lam2 + data.mu/2*data.lam3 - data.lam4 - data.lam5;
|
||||
|
||||
// DtsAtd = beta0/beta*d
|
||||
DtsAtd = data.d;
|
||||
cblas_scal(data.n, beta0/data.beta, DtsAtd.data(), 1);
|
||||
|
||||
// compute gradient...
|
||||
get_gradient<T_comp, T_scal>(data, DtsAtd);
|
||||
|
||||
// reset reference values...
|
||||
gam=opts.gam; Q=1; C=data.f;
|
||||
}
|
||||
#ifdef PROFILING
|
||||
stop_loop = get_timestamp();
|
||||
rec_time = false;
|
||||
|
||||
ofstream file;
|
||||
file.open("profile.txt");
|
||||
file << "loop [ms]: " << (stop_loop - start_loop)*1000 << "\n";
|
||||
file << "mult [ms]: " << t_mult*1000 << "\n";
|
||||
file << "d [ms]: " << t_d*1000 << "\n";
|
||||
file << "dt [ms]: " << t_dt*1000 << "\n";
|
||||
file.close();
|
||||
#endif
|
||||
|
||||
// scale U. Take real part only, if opts.isreal==true
|
||||
for(int i=0; i < data.n; i++) {
|
||||
if(opts.isreal)
|
||||
U[i] = (T_comp)real(U[i]);
|
||||
U[i] = U[i]/scl;
|
||||
}
|
||||
|
||||
stop = get_timestamp();
|
||||
|
||||
// generate return value...
|
||||
info.secs = stop - start;
|
||||
info.outer_iters = count_outer;
|
||||
info.total_iters = count_total;
|
||||
info.rel_chg = RelChgOut;
|
||||
if(Ut.len == U.len) {
|
||||
mat<T_comp> deltaU = Ut;
|
||||
cblas_axpy(Ut.len, (T_comp)-1, U.data(), 1, deltaU.data(), 1);
|
||||
T_scal nrm_deltaU = cblas_nrm2(deltaU.len, deltaU.data(), 1);
|
||||
T_scal nrm_Ut = cblas_nrm2(Ut.len, Ut.data(), 1);
|
||||
info.rel_error = nrm_deltaU/nrm_Ut;
|
||||
} else {
|
||||
info.rel_error = 0;
|
||||
}
|
||||
} catch (tval3_exception) {
|
||||
throw;
|
||||
} catch (const std::exception &ex) {
|
||||
string msg = "Internal error: ";
|
||||
msg += ex.what();
|
||||
throw tval3_exception(msg);
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
const tval3_info<float> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b, const tval3_options<float> &opts,
|
||||
const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<float, float, mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b,
|
||||
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<float, double, mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<double> &U, const mat<double> &A, const mat<double> &b,
|
||||
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
|
||||
return tval3<double, double, mat<double> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<float> tval3(mat<complex<float> > &U, const mat<complex<float> > &A, const mat<complex<float> > &b,
|
||||
const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<std::complex<float>, float, mat<std::complex<float> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<complex<double> > &U, const mat<complex<double> > &A, const mat<complex<double> > &b,
|
||||
const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<std::complex<double>, double, mat<std::complex<double> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
|
||||
const tval3_info<float> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b,
|
||||
const tval3_options<float> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<float, float, sparse_mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b,
|
||||
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<float, double, sparse_mat<float> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<double> &U, const sparse_mat<double> &A, const mat<double> &b,
|
||||
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<double, double, sparse_mat<double> >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<float> tval3(mat<complex<float> > &U, const sparse_mat<complex<float> > &A,
|
||||
const mat<complex<float> > &b, const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<std::complex<float>, float, sparse_mat<std::complex<float> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<complex<double> > &U, const sparse_mat<complex<double> > &A,
|
||||
const mat<complex<double> > &b, const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<std::complex<double>, double, sparse_mat<std::complex<double> > >(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
const tval3_info<float> tval3(mat<float> &U, const geometry &A, const mat<float> &b,
|
||||
const tval3_options<float> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<float, float, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<float> &U, const geometry &A, const mat<float> &b,
|
||||
const tval3_options<double> &opts, const mat<float> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<float, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<double> &U, const geometry &A, const mat<double> &b,
|
||||
const tval3_options<double> &opts, const mat<double> &Ut, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<double, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<float> tval3(mat<complex<float> > &U, const geometry &A,
|
||||
const mat<complex<float> > &b, const tval3_options<float> &opts, const mat<complex<float> > &Ut, int submat_mb=5,
|
||||
int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<std::complex<float>, float, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
|
||||
const tval3_info<double> tval3(mat<complex<double> > &U, const geometry &A,
|
||||
const mat<complex<double> > &b, const tval3_options<double> &opts, const mat<complex<double> > &Ut, int submat_mb=5,
|
||||
int virtual_procs_per_core=2) throw(tval3_exception)
|
||||
{
|
||||
return tval3<std::complex<double>, double, geometry>(U, A, b, opts, Ut, submat_mb, virtual_procs_per_core);
|
||||
}
|
||||
67
TVALCPU/src/tval3.h
Normal file
67
TVALCPU/src/tval3.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* tval3.h
|
||||
*
|
||||
* Created on: Mar 14, 2011
|
||||
* Author: ditlevsen
|
||||
*/
|
||||
|
||||
#ifndef TVAL3_H_
|
||||
#define TVAL3_H_
|
||||
|
||||
#include "container.h"
|
||||
#include <complex>
|
||||
#include "tval3_types.h"
|
||||
|
||||
extern const tval3_info<float> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b, const tval3_options<float> &opts,
|
||||
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<float> &U, const mat<float> &A, const mat<float> &b, const tval3_options<double> &opts,
|
||||
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<double> &U, const mat<double> &A, const mat<double> &b, const tval3_options<double> &opts,
|
||||
const mat<double> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<float> tval3(mat<std::complex<float> > &U, const mat<std::complex<float> > &A, const mat<std::complex<float> > &b,
|
||||
const tval3_options<float> &opts, const mat<std::complex<float> > &Ut=0, int submat_mb=5, int virtual_procs_per_core=2)
|
||||
throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<std::complex<double> > &U, const mat<std::complex<double> > &A,
|
||||
const mat<std::complex<double> > &b, const tval3_options<double> &opts, const mat<std::complex<double> > &Ut=0, int submat_mb=5,
|
||||
int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<float> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b, const tval3_options<float> &opts,
|
||||
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<float> &U, const sparse_mat<float> &A, const mat<float> &b, const tval3_options<double> &opts,
|
||||
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<double> &U, const sparse_mat<double> &A, const mat<double> &b, const tval3_options<double> &opts,
|
||||
const mat<double> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<float> tval3(mat<std::complex<float> > &U, const sparse_mat<std::complex<float> > &A,
|
||||
const mat<std::complex<float> > &b, const tval3_options<float> &opts, const mat<std::complex<float> > &Ut=0, int submat_mb=5,
|
||||
int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<std::complex<double> > &U, const sparse_mat<std::complex<double> > &A,
|
||||
const mat<std::complex<double> > &b, const tval3_options<double> &opts, const mat<std::complex<double> > &Ut=0, int submat_mb=5,
|
||||
int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<float> tval3(mat<float> &U, const geometry &A, const mat<float> &b, const tval3_options<float> &opts,
|
||||
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<float> &U, const geometry &A, const mat<float> &b, const tval3_options<double> &opts,
|
||||
const mat<float> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<double> &U, const geometry &A, const mat<double> &b, const tval3_options<double> &opts,
|
||||
const mat<double> &Ut=0, int submat_mb=5, int virtual_procs_per_core=2) throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<float> tval3(mat<std::complex<float> > &U, const geometry &A, const mat<std::complex<float> > &b,
|
||||
const tval3_options<float> &opts, const mat<std::complex<float> > &Ut=0, int submat_mb=5, int virtual_procs_per_core=2)
|
||||
throw(tval3_exception);
|
||||
|
||||
extern const tval3_info<double> tval3(mat<std::complex<double> > &U, const geometry &A, const mat<std::complex<double> > &b,
|
||||
const tval3_options<double> &opts, const mat<std::complex<double> > &Ut=0, int submat_mb=5, int virtual_procs_per_core=2)
|
||||
throw(tval3_exception);
|
||||
|
||||
|
||||
#endif /* TVAL3_H_ */
|
||||
49
TVALCPU/src/tval3_types.h
Normal file
49
TVALCPU/src/tval3_types.h
Normal file
@@ -0,0 +1,49 @@
|
||||
#ifndef TVAL3_TYPES_H_
|
||||
#define TVAL3_TYPES_H_
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
template<class T_scal>
|
||||
struct tval3_options {
|
||||
T_scal mu; // maximum value for penalty parameter mu, see (2.12)
|
||||
// mu is mainly decided by noise level. Set mu big when b is noise-free,
|
||||
// set mu small when b is very noisy
|
||||
// suggested values for mu: 2⁴..2¹³
|
||||
T_scal mu0; // initial value for mu
|
||||
T_scal beta; // maximum value for penalty parameter beta, see (2.12)
|
||||
T_scal beta0; // initial value for beta
|
||||
T_scal rate_cnt; // continuation parameter for both mu and beta
|
||||
T_scal tol; // outer loop tolerance
|
||||
T_scal tol_inn; // inner loop tolerance
|
||||
int maxcnt; // maximum number of outer iterations
|
||||
int maxit; // maximum number of iterations (total)
|
||||
T_scal c; // corresponds to the parameter delta in (2.33) (arminjo condition)
|
||||
T_scal gamma; // corresponds to rho "Algorithm 2" on page 29
|
||||
T_scal gam; // corresponds to parameter eta in (2.34)
|
||||
// Control the degree of nonmonotonicity. 0 corresponds to monotone line search.
|
||||
// The best convergence is obtained by using values closer to 1 when the iterates
|
||||
// are far from the optimum, and using values closer to 0 when near an optimum.
|
||||
T_scal rate_gam; // shrinkage rate of gam
|
||||
bool isreal;
|
||||
bool nonneg;
|
||||
tval3_options(): mu(256.0f), mu0(256.0f), beta(32.0f), beta0(32.0f), rate_cnt(2.0f),
|
||||
tol(1e-6f), tol_inn(1e-3f), maxcnt(10), maxit(1025), c(1e-5f), gamma(0.6f),
|
||||
gam(0.9995f), rate_gam(0.9f), isreal(true), nonneg(true) {};
|
||||
};
|
||||
|
||||
template<class T_scal>
|
||||
struct tval3_info {
|
||||
int total_iters;
|
||||
int outer_iters;
|
||||
T_scal rel_chg;
|
||||
T_scal rel_error;
|
||||
double secs;
|
||||
};
|
||||
|
||||
class tval3_exception: public std::runtime_error {
|
||||
public:
|
||||
tval3_exception(const std::string& message) : std::runtime_error(message) {};
|
||||
};
|
||||
|
||||
#endif
|
||||
653
TVALCPU/src/tval3cpp3d.cpp
Normal file
653
TVALCPU/src/tval3cpp3d.cpp
Normal file
@@ -0,0 +1,653 @@
|
||||
#include "tval3cpp3d.h"
|
||||
#include <matrix.h>
|
||||
#include <mex.h>
|
||||
#include <tval3.h>
|
||||
#include <complex>
|
||||
#include <exception>
|
||||
#include <mkl_cblas.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
// create mat-object from mxArray (row major storage)
|
||||
template<class Type>
|
||||
mat<Type> *getMatrix(const mxArray *mi) {
|
||||
|
||||
if(mi != NULL) {
|
||||
const mwSize *dims = mxGetDimensions(mi);
|
||||
int dim_y = dims[0];
|
||||
int dim_x = dims[1];
|
||||
int dim_z = (mxGetNumberOfDimensions(mi) > 2) ? dims[2] : 1;
|
||||
|
||||
Type *mi_data = (Type *)mxGetData(mi);
|
||||
|
||||
mat<Type> *mo = new mat<Type>(dim_y, dim_x, dim_z);
|
||||
|
||||
for(int z=0; z < dim_z; z++) {
|
||||
for(int y=0; y < dim_y; y++) {
|
||||
for(int x=0; x < dim_x; x++) {
|
||||
(*mo)[x + y*dim_x + z*dim_x*dim_y] =
|
||||
mi_data[x*dim_y + y + z*dim_x*dim_y];
|
||||
}
|
||||
}
|
||||
}
|
||||
return mo;
|
||||
} else {
|
||||
mat<Type> *mo = new mat<Type>(0);
|
||||
return mo;
|
||||
}
|
||||
}
|
||||
|
||||
// create mat-object from complex mxArray (row major)
|
||||
template<class Type>
|
||||
mat<complex<Type> > *getCompMatrix(const mxArray *mi) {
|
||||
|
||||
if(mi != NULL) {
|
||||
const mwSize *dims = mxGetDimensions(mi);
|
||||
int dim_y = dims[0];
|
||||
int dim_x = dims[1];
|
||||
int dim_z = (mxGetNumberOfDimensions(mi) > 2) ? dims[2] : 1;
|
||||
|
||||
Type *mi_real = (Type *)mxGetData(mi);
|
||||
Type *mi_imag = (Type *)mxGetImagData(mi);
|
||||
|
||||
mat<complex<Type> > *mo = new mat<complex<Type> >(dim_y, dim_x, dim_z);
|
||||
|
||||
for(int z=0; z < dim_z; z++) {
|
||||
for(int y=0; y < dim_y; y++) {
|
||||
for(int x=0; x < dim_x; x++) {
|
||||
(*mo)[x + y*dim_x].real(mi_real[x*dim_y + y]);
|
||||
if(mxIsComplex(mi))
|
||||
(*mo)[x + y*dim_x + z*dim_x*dim_y].imag(mi_imag[x*dim_y + y + z*dim_x*dim_y]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return mo;
|
||||
} else {
|
||||
mat<complex<Type> > *mo = new mat<complex<Type> >(0);
|
||||
return mo;
|
||||
}
|
||||
}
|
||||
|
||||
// create sparse_mat-object from mxArray
|
||||
template<class Type>
|
||||
sparse_mat<Type> *getSparseMatrix(const mxArray *mi) {
|
||||
|
||||
const mwSize *dims = mxGetDimensions(mi);
|
||||
int dim_y = dims[0];
|
||||
int dim_x = dims[1];
|
||||
mwIndex *mi_row_ind = mxGetIr(mi);
|
||||
mwIndex *mi_col_ptr = mxGetJc(mi);
|
||||
int nnz = (int)mxGetNzmax(mi);
|
||||
|
||||
double *mi_data = (double *)mxGetData(mi);
|
||||
|
||||
sparse_mat<Type> *mo;
|
||||
|
||||
mo = new sparse_mat<Type>(dim_y, dim_x, nnz);
|
||||
|
||||
for(int i=0; i < (mo->dim_x + 1); i++) {
|
||||
mo->ptr()[i] = (int) mi_col_ptr[i];
|
||||
}
|
||||
|
||||
for(int i=0; i < mo->nnz; i++) {
|
||||
mo->val()[i] = (Type) mi_data[i];
|
||||
mo->ind()[i] = (int) mi_row_ind[i];
|
||||
}
|
||||
|
||||
return mo;
|
||||
}
|
||||
|
||||
|
||||
// create sparse_mat-object from complex mxArray (row major)
|
||||
template<class Type>
|
||||
sparse_mat<complex<Type> > *getSparseCompMatrix(const mxArray *mi) {
|
||||
|
||||
const mwSize *dims = mxGetDimensions(mi);
|
||||
int dim_y = dims[0];
|
||||
int dim_x = dims[1];
|
||||
|
||||
double *mi_real = (double *)mxGetData(mi);
|
||||
double *mi_imag = (double *)mxGetImagData(mi);
|
||||
mwIndex *mi_row_ind = mxGetIr(mi);
|
||||
mwIndex *mi_col_ptr = mxGetJc(mi);
|
||||
int nnz = (int)mxGetNzmax(mi);
|
||||
|
||||
sparse_mat<complex<Type> > *mo = new sparse_mat<complex<Type> >(dim_y,
|
||||
dim_x, nnz);
|
||||
|
||||
for(int i=0; i < mo->nnz; i++) {
|
||||
mo->ind()[i] = mi_row_ind[i];
|
||||
mo->val()[i].real((Type)mi_real[i]);
|
||||
if(mxIsComplex(mi))
|
||||
mo->val()[i].imag((Type)mi_imag[i]);
|
||||
}
|
||||
|
||||
for(int i=0; i < dim_x + 1; i++)
|
||||
mo->ptr()[i] = mi_col_ptr[i];
|
||||
|
||||
return mo;
|
||||
}
|
||||
|
||||
// get geometry structure from mxArray
|
||||
geometry *get_geometry(const mxArray *A) {
|
||||
float scale_factor;
|
||||
int *x_e, *y_e, *z_e, *x_r, *y_r, *z_r;
|
||||
int ne, nr, rv_x, rv_y, rv_z;
|
||||
mxArray *field;
|
||||
|
||||
field = mxGetField(A, 0, "rv_x");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field rv_x in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.rv_x must be of type Int32!");
|
||||
}
|
||||
rv_x = *( (int *)mxGetData(field) );
|
||||
}
|
||||
|
||||
field = mxGetField(A, 0, "rv_y");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field rv_y in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.rv_y must be of type Int32!");
|
||||
}
|
||||
rv_y = *( (int *)mxGetData(field) );
|
||||
}
|
||||
|
||||
field = mxGetField(A, 0, "rv_z");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field rv_z in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.rv_z must be of type Int32!");
|
||||
}
|
||||
rv_z = *( (int *)mxGetData(field) );
|
||||
}
|
||||
|
||||
field = mxGetField(A, 0, "scale_factor");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field scale_factor in struct A!");
|
||||
} else {
|
||||
if(!mxIsSingle(field)) {
|
||||
mexErrMsgTxt("A.scale_factor must be of type single!");
|
||||
}
|
||||
scale_factor = *( (float *)mxGetData(field) );
|
||||
}
|
||||
|
||||
field = mxGetField(A, 0, "x_emitters");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field x_emitters in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.x_emitters must be of type int32!");
|
||||
}
|
||||
const mwSize *dims = mxGetDimensions(field);
|
||||
if(dims[0] > 1)
|
||||
mexErrMsgTxt("A.x_emitters must be a column-vector!");
|
||||
ne = dims[1];
|
||||
x_e = (int *)mxGetData(field);
|
||||
}
|
||||
|
||||
field = mxGetField(A, 0, "y_emitters");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field y_emitters in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.y_emitters must be of type in32!");
|
||||
}
|
||||
const mwSize *dims = mxGetDimensions(field);
|
||||
if(dims[0] > 1)
|
||||
mexErrMsgTxt("A.y_emitters must be a column-vector!");
|
||||
if(dims[1] != ne)
|
||||
mexErrMsgTxt("A.y_emitters must be of the same size as A.x_emitters!");
|
||||
y_e = (int *)mxGetData(field);
|
||||
}
|
||||
|
||||
field = mxGetField(A, 0, "z_emitters");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field z_emitters in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.z_emitters must be of type in32!");
|
||||
}
|
||||
const mwSize *dims = mxGetDimensions(field);
|
||||
if(dims[0] > 1)
|
||||
mexErrMsgTxt("A.z_emitters must be a column-vector!");
|
||||
if(dims[1] != ne)
|
||||
mexErrMsgTxt("A.z_emitters must be of the same size as A.x_emitters!");
|
||||
z_e = (int *)mxGetData(field);
|
||||
}
|
||||
|
||||
|
||||
field = mxGetField(A, 0, "x_receivers");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field x_receivers in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.x_receivers must be of type in32!");
|
||||
}
|
||||
const mwSize *dims = mxGetDimensions(field);
|
||||
if(dims[0] > 1)
|
||||
mexErrMsgTxt("A.x_receivers must be a column-vector!");
|
||||
nr = dims[1];
|
||||
x_r = (int *)mxGetData(field);
|
||||
}
|
||||
|
||||
field = mxGetField(A, 0, "y_receivers");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field y_receivers in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.y_receivers must be of type in32!");
|
||||
}
|
||||
const mwSize *dims = mxGetDimensions(field);
|
||||
if(dims[0] > 1)
|
||||
mexErrMsgTxt("A.y_receivers must be a column-vector!");
|
||||
if(dims[1] != nr)
|
||||
mexErrMsgTxt("A.y_receivers must be of the same size as A.x_receivers!");
|
||||
y_r = (int *)mxGetData(field);
|
||||
}
|
||||
|
||||
field = mxGetField(A, 0, "z_receivers");
|
||||
if(field == NULL) {
|
||||
mexErrMsgTxt("no field z_receivers in struct A!");
|
||||
} else {
|
||||
if(!mxIsInt32(field)) {
|
||||
mexErrMsgTxt("A.z_receivers must be of type int32!");
|
||||
}
|
||||
const mwSize *dims = mxGetDimensions(field);
|
||||
if(dims[0] > 1)
|
||||
mexErrMsgTxt("A.z_receivers must be a column-vector!");
|
||||
if(dims[1] != nr)
|
||||
mexErrMsgTxt("A.z_receivers must be of the same size as A.z_receivers!");
|
||||
z_r = (int *)mxGetData(field);
|
||||
}
|
||||
|
||||
geometry *geom = new geometry;
|
||||
geom->num_emitters = ne;
|
||||
geom->num_receivers = nr;
|
||||
geom->x_emitters = x_e;
|
||||
geom->y_emitters = y_e;
|
||||
geom->z_emitters = z_e;
|
||||
geom->x_receivers = x_r;
|
||||
geom->y_receivers = y_r;
|
||||
geom->z_receivers = z_r;
|
||||
geom->scale_factor = scale_factor;
|
||||
geom->rv_x = rv_x;
|
||||
geom->rv_y = rv_y;
|
||||
geom->rv_z = rv_z;
|
||||
|
||||
return geom;
|
||||
}
|
||||
|
||||
// create tval3_options struct
|
||||
template<class Type>
|
||||
tval3_options<Type> *getOptions(const mxArray *optsi) {
|
||||
|
||||
tval3_options<Type> *optso = new tval3_options<Type>;
|
||||
|
||||
if(!mxIsStruct(optsi)) {
|
||||
mexPrintf("Warning: opts is not a structure.");
|
||||
return optso;
|
||||
}
|
||||
|
||||
mxArray *field;
|
||||
|
||||
field = mxGetField(optsi, 0, "mu");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.mu is not of type double (ignored).");
|
||||
else
|
||||
optso->mu = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "mu0");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.mu0 is not of type double (ignored).");
|
||||
else
|
||||
optso->mu0 = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "beta");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.beta is not of type double (ignored).");
|
||||
else
|
||||
optso->beta = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "beta0");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.beta0 is not of type double (ignored).");
|
||||
else
|
||||
optso->beta0 = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "rate_cnt");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.rate_cnt is not of type double (ignored).");
|
||||
else
|
||||
optso->rate_cnt = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "tol");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.tol is not of type double (ignored).");
|
||||
else
|
||||
optso->tol = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "tol_inn");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.tol_inn is not of type double (ignored).");
|
||||
else
|
||||
|
||||
optso->tol_inn = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "maxcnt");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.maxcnt is not of type double (ignored).");
|
||||
else
|
||||
optso->maxcnt = (int) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "maxit");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.maxit is not of type double (ignored).");
|
||||
else
|
||||
optso->maxit = (int) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "c");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.c is not of type double (ignored).");
|
||||
else
|
||||
optso->c = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "gamma");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.gamma is not of type double (ignored).");
|
||||
else
|
||||
optso->gamma = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "gam");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.gam is not of type double (ignored).");
|
||||
else
|
||||
optso->gam = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "rate_gam");
|
||||
if(field != NULL) {
|
||||
if(!mxIsDouble(field))
|
||||
mexPrintf("Warning: opts.rate_gam is not of type double (ignored).");
|
||||
else
|
||||
optso->rate_gam = (Type) *mxGetPr(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "isreal");
|
||||
if(field != NULL) {
|
||||
if(!mxIsLogical(field))
|
||||
mexPrintf("Warning: opts.isreal is not of type logical (ignored).");
|
||||
else
|
||||
optso->isreal = (bool) *mxGetLogicals(field);
|
||||
}
|
||||
|
||||
field = mxGetField(optsi, 0, "nonneg");
|
||||
if(field != NULL) {
|
||||
if(!mxIsLogical(field))
|
||||
mexPrintf("Warning: opts.nonneg is not of type logical (ignored).");
|
||||
else
|
||||
optso->nonneg = (bool) *mxGetLogicals(field);
|
||||
}
|
||||
|
||||
return optso;
|
||||
}
|
||||
|
||||
// create mxArray from struct tval3_info
|
||||
template<class Type>
|
||||
mxArray *setInfo(tval3_info<Type> &info) {
|
||||
const char *field_names[] = {"total_iters", "outer_iters", "rel_chg", "rel_error", "secs"};
|
||||
|
||||
mxArray *output = mxCreateStructMatrix(1, 1, 5, field_names);
|
||||
|
||||
mxArray *value;
|
||||
|
||||
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
||||
*mxGetPr(value) = info.total_iters;
|
||||
mxSetField(output, 0, "total_iters", value);
|
||||
|
||||
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
||||
*mxGetPr(value) = info.outer_iters;
|
||||
mxSetField(output, 0, "outer_iters", value);
|
||||
|
||||
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
||||
*mxGetPr(value) = info.rel_chg;
|
||||
mxSetField(output, 0, "rel_chg", value);
|
||||
|
||||
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
||||
*mxGetPr(value) = info.rel_error;
|
||||
mxSetField(output, 0, "rel_error", value);
|
||||
|
||||
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
||||
*mxGetPr(value) = info.secs;
|
||||
mxSetField(output, 0, "secs", value);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
// create mxArray from mat-object (row major)
|
||||
template<class Type>
|
||||
mxArray *setMatrix(const mat<Type> &mi) {
|
||||
|
||||
mwSize dims[] = {mi.dim_y, mi.dim_x, mi.dim_z};
|
||||
mxArray *mo = mxCreateNumericArray(3, dims, mxDOUBLE_CLASS, mxREAL);
|
||||
double *mo_data = mxGetPr(mo);
|
||||
|
||||
|
||||
for(int z=0; z < mi.dim_z; z++) {
|
||||
for(int y=0; y < mi.dim_y; y++) {
|
||||
for(int x=0; x < mi.dim_x; x++) {
|
||||
mo_data[x*mi.dim_y + y + z*mi.dim_x*mi.dim_y] = mi[x + y*mi.dim_x + z*mi.dim_x*mi.dim_y];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mo;
|
||||
}
|
||||
|
||||
// create complex mxArray from mat-object (row major)
|
||||
template<class Type>
|
||||
mxArray *setCompMatrix(const mat<complex<Type> > &mi) {
|
||||
|
||||
mwSize dims[] = {mi.dim_y, mi.dim_x, mi.dim_z};
|
||||
mxArray *mo = mxCreateNumericArray(3, dims, mxDOUBLE_CLASS, mxCOMPLEX);
|
||||
double *mo_real = mxGetPr(mo);
|
||||
double *mo_imag = mxGetPi(mo);
|
||||
|
||||
for(int z=0; z < mi.dim_z; z++) {
|
||||
for(int y=0; y < mi.dim_y; y++) {
|
||||
for(int x=0; x < mi.dim_x; x++) {
|
||||
mo_real[x*mi.dim_y + y + z*mi.dim_x*mi.dim_y] = mi[x + y*mi.dim_x + z*mi.dim_x*mi.dim_y].real();
|
||||
mo_imag[x*mi.dim_y + y + z*mi.dim_x*mi.dim_y] = mi[x + y*mi.dim_x + z*mi.dim_x*mi.dim_y].imag();
|
||||
}
|
||||
}
|
||||
}
|
||||
return mo;
|
||||
}
|
||||
|
||||
template<class T_mat, class T_scal>
|
||||
void call_tval3(const mxArray *A, const mxArray *b, const mxArray *Ut,
|
||||
int ip, int iq, int ir, const mxArray *opts, mxArray **U, mxArray **info,
|
||||
geometry *geo, int submat_mb, int virtual_procs_per_core)
|
||||
{
|
||||
mat<T_mat> *mb = getMatrix<T_mat>(b);
|
||||
mat<T_mat> mU(ip, iq, ir);
|
||||
mat<T_mat> *mUt = getMatrix<T_mat>(Ut);
|
||||
|
||||
tval3_options<T_scal> *options = getOptions<T_scal>(opts);
|
||||
|
||||
tval3_info<T_scal> ti_info;
|
||||
if(geo != NULL) {
|
||||
ti_info = tval3(mU, *geo, *mb, *options, *mUt, submat_mb,
|
||||
virtual_procs_per_core);
|
||||
} else {
|
||||
if(mxIsSparse(A)) {
|
||||
sparse_mat<T_mat> *mA = getSparseMatrix<T_mat>(A);
|
||||
ti_info = tval3(mU, *mA, *mb, *options, *mUt, submat_mb,
|
||||
virtual_procs_per_core);
|
||||
delete mA;
|
||||
} else {
|
||||
mat<T_mat> *mA = getMatrix<T_mat>(A);
|
||||
ti_info = tval3(mU, *mA, *mb, *options, *mUt, submat_mb,
|
||||
virtual_procs_per_core);
|
||||
delete mA;
|
||||
}
|
||||
}
|
||||
|
||||
*U = setMatrix(mU);
|
||||
if(info != NULL) *info = setInfo(ti_info);
|
||||
|
||||
delete mb; delete mUt; delete options;
|
||||
}
|
||||
|
||||
template<class Type>
|
||||
void call_tval3_comp(const mxArray *A, const mxArray *b, const mxArray *Ut,
|
||||
int ip, int iq, int ir, const mxArray *opts, mxArray **U, mxArray **info,
|
||||
geometry *geo, int submat_mb, int virtual_procs_per_core)
|
||||
{
|
||||
mat<complex<Type> > *mb = getCompMatrix<Type>(b);
|
||||
mat<complex<Type> > mU(ip, iq, ir);
|
||||
mat<complex<Type> > *mUt = getCompMatrix<Type>(Ut);
|
||||
|
||||
tval3_options<Type> *options = getOptions<Type>(opts);
|
||||
|
||||
tval3_info<Type> ti_info;
|
||||
|
||||
if(geo != NULL) {
|
||||
ti_info = tval3(mU, *geo, *mb, *options, *mUt, submat_mb,
|
||||
virtual_procs_per_core);
|
||||
} else {
|
||||
if(mxIsSparse(A)) {
|
||||
sparse_mat<complex<Type> > *mA = getSparseCompMatrix<Type>(A);
|
||||
ti_info = tval3(mU, *mA, *mb, *options, *mUt, submat_mb,
|
||||
virtual_procs_per_core);
|
||||
delete mA;
|
||||
} else {
|
||||
mat<complex<Type> > *mA = getCompMatrix<Type>(A);
|
||||
ti_info = tval3(mU, *mA, *mb, *options, *mUt, submat_mb,
|
||||
virtual_procs_per_core);
|
||||
delete mA;
|
||||
}
|
||||
}
|
||||
|
||||
*U = setCompMatrix(mU);
|
||||
if(info != NULL) *info = setInfo(ti_info);
|
||||
|
||||
delete mb; delete mUt; delete options;
|
||||
}
|
||||
|
||||
// [U, out] = tval3cpp(A,b,p,q,opts,use_dbl)
|
||||
void TVALCPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
|
||||
|
||||
// check number of arguments...
|
||||
if ((nrhs < 6) || (nrhs > 10))
|
||||
mexErrMsgTxt("Wrong number of input arguments.");
|
||||
if(nlhs > 2)
|
||||
mexErrMsgTxt("Too many output arguments.");
|
||||
|
||||
// assign input arguments...
|
||||
const mxArray *A = prhs[0];
|
||||
const mxArray *b = prhs[1];
|
||||
const mxArray *p = prhs[2];
|
||||
const mxArray *q = prhs[3];
|
||||
const mxArray *r = prhs[4];
|
||||
const mxArray *opts = prhs[5];
|
||||
const mxArray *use_dbl = (nrhs < 7) ? NULL : prhs [6];
|
||||
const mxArray *Ut = mxGetField(opts, 0, "Ut");
|
||||
const mxArray *submat_mb = (nrhs < 8) ? NULL : prhs[7];
|
||||
const mxArray *virtual_procs_per_core = (nrhs < 9) ? NULL : prhs[8];
|
||||
|
||||
// assign output arguments...
|
||||
mxArray **U = &plhs[0];
|
||||
mxArray **info = (nlhs < 2) ? NULL : &plhs[1];
|
||||
|
||||
// check data types and assign variables...
|
||||
geometry *geo = NULL;
|
||||
|
||||
if(!mxIsStruct(A) && !mxIsSingle(A) && !mxIsDouble(A))
|
||||
mexErrMsgTxt("A must be single, double or struct.");
|
||||
if(!mxIsSparse(A) && !mxIsStruct(A) && mxGetClassID(A) != mxGetClassID(b))
|
||||
mexErrMsgTxt("A and b must be of the same type when using dense matrices.");
|
||||
if(!mxIsSingle(b) && !mxIsDouble(b))
|
||||
mexErrMsgTxt("b must be single or double.");
|
||||
if((Ut != NULL) && (mxGetClassID(Ut) != mxGetClassID(b)))
|
||||
mexErrMsgTxt("opts.Ut must be of same type as b.");
|
||||
if(!mxIsDouble(p))
|
||||
mexErrMsgTxt("p must be of type double.");
|
||||
if(!mxIsDouble(q))
|
||||
mexErrMsgTxt("q must be of type double.");
|
||||
if(!mxIsDouble(r))
|
||||
mexErrMsgTxt("r must be of type double.");
|
||||
if((use_dbl != NULL) && (!mxIsLogical(use_dbl)))
|
||||
mexErrMsgTxt("use_dbl is not a logical value.");
|
||||
if((submat_mb != NULL) && (!mxIsDouble(submat_mb)))
|
||||
mexErrMsgTxt("submat_mb must be of type double.");
|
||||
if((virtual_procs_per_core != NULL) && (!mxIsDouble(virtual_procs_per_core)))
|
||||
mexErrMsgTxt("virtual_procs_per_core must be of type double.");
|
||||
|
||||
bool b_use_dbl = (use_dbl == NULL) ? false : (bool) mxGetLogicals(use_dbl)[0];
|
||||
int ip = (int)(*mxGetPr(p));
|
||||
int iq = (int)(*mxGetPr(q));
|
||||
int ir = (int)(*mxGetPr(r));
|
||||
int isubmat_mb = (submat_mb == NULL) ? 5 : (int)(*mxGetPr(submat_mb));
|
||||
int ivirtual_procs_per_core = (virtual_procs_per_core == NULL) ? 2 :
|
||||
(int)(*mxGetPr(virtual_procs_per_core));
|
||||
|
||||
if(mxIsStruct(A)) {
|
||||
geo = get_geometry(A);
|
||||
}
|
||||
|
||||
// call tval3()...
|
||||
try{
|
||||
if(!mxIsComplex(A) && !mxIsComplex(b)) { // scalar matrices...
|
||||
if(mxIsSingle(b)) {
|
||||
if(b_use_dbl) {
|
||||
call_tval3<float, double>(A, b, Ut, ip, iq, ir, opts, U,
|
||||
info, geo, isubmat_mb, ivirtual_procs_per_core);
|
||||
} else {
|
||||
call_tval3<float, float>(A, b, Ut, ip, iq, ir, opts, U,
|
||||
info, geo, isubmat_mb, ivirtual_procs_per_core);
|
||||
}
|
||||
} else {
|
||||
call_tval3<double, double>(A, b, Ut, ip, iq, ir, opts, U,
|
||||
info, geo, isubmat_mb, ivirtual_procs_per_core);
|
||||
}
|
||||
} else { // complex matrices...
|
||||
if(mxIsSingle(b)) {
|
||||
call_tval3_comp<float>(A, b, Ut, ip, iq, ir, opts, U, info,
|
||||
geo, isubmat_mb, ivirtual_procs_per_core);
|
||||
} else {
|
||||
call_tval3_comp<double>(A, b, Ut, ip, iq, ir, opts, U, info,
|
||||
geo, isubmat_mb, ivirtual_procs_per_core);
|
||||
}
|
||||
}
|
||||
} catch(const std::exception &ex) {
|
||||
mexErrMsgTxt(ex.what());
|
||||
}
|
||||
if(geo != NULL) delete geo;
|
||||
}
|
||||
4
TVALCPU/src/tval3cpp3d.h
Normal file
4
TVALCPU/src/tval3cpp3d.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#include <mex.h>
|
||||
extern "C"{
|
||||
void TVALCPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) ;
|
||||
}
|
||||
45
TVALCPU/src/utility.h
Normal file
45
TVALCPU/src/utility.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* utility.h
|
||||
*
|
||||
* Created on: Mar 22, 2011
|
||||
* Author: ditlevsen
|
||||
*/
|
||||
|
||||
#ifndef UTILITY_H_
|
||||
#define UTILITY_H_
|
||||
|
||||
/**********************************************************************************/
|
||||
/* functions, etc. for handling different data types in mathematical expressions */
|
||||
/**********************************************************************************/
|
||||
|
||||
// overloaded versions of conj() and real() for scalar values...
|
||||
// (not needed when compiled with gcc and -std_c++0x)
|
||||
|
||||
template <class Type>
|
||||
inline Type conj(Type f) {
|
||||
return f;
|
||||
}
|
||||
|
||||
template<class Type>
|
||||
inline Type real(Type f) {
|
||||
return f;
|
||||
}
|
||||
|
||||
template<class Type>
|
||||
inline Type imag(Type f) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// sign function
|
||||
|
||||
template<class T_comp, class T_scal>
|
||||
inline T_comp sign(T_comp x) {
|
||||
T_scal ax = std::abs(x);
|
||||
if(ax > 0)
|
||||
return x/ax;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* UTILITY_H_ */
|
||||
Reference in New Issue
Block a user