522 lines
16 KiB
C++
522 lines
16 KiB
C++
#include <cuda_runtime.h>
|
|
#include <cuda.h>
|
|
#include <cublas_v2.h>
|
|
#include <cusparse.h>
|
|
#include <handle_error.h>
|
|
|
|
#include <matrix.h>
|
|
#include <mex.h>
|
|
#include <tval3_gpu.h>
|
|
|
|
#define OUTPUT_TYPES
|
|
|
|
using namespace std;
|
|
|
|
// Create mat_host-object from mxArray.
|
|
// The data buffer of the mxArray is used directly.
|
|
// If pagelocked==true a part of the buffer gets page locked in this function
|
|
// using cudaHostRegister and must later be unregistered using
|
|
// cudaHostUnregister() !!
|
|
// the address of the page locked part is returned in plb
|
|
// if there is no part of the buffer which fulfills the alignment
|
|
// requirements, no memory gets page locked and *plb is set to NULL
|
|
mat_host *getMatrix(const mxArray *mi, bool pagelocked, void **plb) {
|
|
|
|
if(mi != NULL) {
|
|
const mwSize *dims = mxGetDimensions(mi);
|
|
mwSize dim_y = dims[0];
|
|
mwSize dim_x = dims[1];
|
|
mwSize dim_z = (mxGetNumberOfDimensions(mi) >= 3) ? dims[2] : 1;
|
|
|
|
float *mi_data = (float *)mxGetData(mi);
|
|
|
|
if(pagelocked) {
|
|
// start of page locked area...
|
|
*plb = (void *) ((((long)mi_data + (4*1024) - 1) / (4*1024)) * 4*1024);
|
|
|
|
//size of page locked area...
|
|
size_t size = (dim_y * dim_x * dim_z * sizeof(float) - ((long)*plb - (long)mi_data)) / (4*1024) * (4*1024);
|
|
|
|
if(size > 0) {
|
|
HANDLE_ERROR(cudaHostRegister(*plb, size, cudaHostRegisterDefault));
|
|
mexPrintf("Pagelocked %i bytes. Offset: %i\n", size,
|
|
((long)*plb - (long)mi_data));
|
|
} else {
|
|
*plb = NULL;
|
|
}
|
|
} else {
|
|
*plb = NULL;
|
|
}
|
|
|
|
mat_host *mo = new mat_host(dim_y, dim_x, dim_z, mi_data);
|
|
|
|
return mo;
|
|
} else {
|
|
mat_host *mo = new mat_host(0);
|
|
return mo;
|
|
}
|
|
|
|
}
|
|
|
|
// create sparse_mat_host-object from mxArray
|
|
sparse_mat_host *getSparseMatrix(const mxArray *mi, bool pagelocked) {
|
|
|
|
const mwSize *dims = mxGetDimensions(mi);
|
|
int dim_y = dims[0];
|
|
int dim_x = dims[1];
|
|
mwIndex *mi_dim_y = mxGetIr(mi);
|
|
mwIndex *mi_columnIndex = mxGetJc(mi);
|
|
int n_nonzero = (int)mxGetNzmax(mi);
|
|
|
|
double *mi_data = (double *)mxGetData(mi);
|
|
|
|
sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero,
|
|
sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined);
|
|
|
|
for(int i=0; i < dim_x + 1; i++)
|
|
mo->ptr()[i] = mi_columnIndex[i];
|
|
|
|
for(int i=0; i < mo->nnz; i++) {
|
|
mo->ind()[i] = mi_dim_y[i];
|
|
mo->val()[i] = (float)mi_data[i];
|
|
}
|
|
|
|
return mo;
|
|
}
|
|
|
|
// get geometry_host structure from mxArray
|
|
geometry_host *get_geometry(const mxArray *A) {
|
|
float scale_factor;
|
|
int *x_e, *y_e, *z_e, *x_r, *y_r, *z_r;
|
|
int ne, nr, rv_x, rv_y, rv_z;
|
|
mxArray *field;
|
|
|
|
field = mxGetField(A, 0, "rv_x");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field rv_x in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.rv_x must be of type Int32!");
|
|
}
|
|
rv_x = *( (int *)mxGetData(field) );
|
|
}
|
|
|
|
field = mxGetField(A, 0, "rv_y");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field rv_y in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.rv_y must be of type Int32!");
|
|
}
|
|
rv_y = *( (int *)mxGetData(field) );
|
|
}
|
|
|
|
field = mxGetField(A, 0, "rv_z");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field rv_z in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.rv_z must be of type Int32!");
|
|
}
|
|
rv_z = *( (int *)mxGetData(field) );
|
|
}
|
|
|
|
field = mxGetField(A, 0, "scale_factor");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field scale_factor in struct A!");
|
|
} else {
|
|
if(!mxIsSingle(field)) {
|
|
mexErrMsgTxt("A.scale_factor must be of type single!");
|
|
}
|
|
scale_factor = *( (float *)mxGetData(field) );
|
|
}
|
|
|
|
field = mxGetField(A, 0, "x_emitters");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field x_emitters in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.x_emitters must be of type int32!");
|
|
}
|
|
const mwSize *dims = mxGetDimensions(field);
|
|
if(dims[0] > 1)
|
|
mexErrMsgTxt("A.x_emitters must be a column-vector!");
|
|
ne = dims[1];
|
|
x_e = (int *)mxGetData(field);
|
|
}
|
|
|
|
field = mxGetField(A, 0, "y_emitters");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field y_emitters in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.y_emitters must be of type in32!");
|
|
}
|
|
const mwSize *dims = mxGetDimensions(field);
|
|
if(dims[0] > 1)
|
|
mexErrMsgTxt("A.y_emitters must be a column-vector!");
|
|
if(dims[1] != ne)
|
|
mexErrMsgTxt("A.y_emitters must be of the same size as A.x_emitters!");
|
|
y_e = (int *)mxGetData(field);
|
|
}
|
|
|
|
field = mxGetField(A, 0, "z_emitters");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field z_emitters in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.z_emitters must be of type in32!");
|
|
}
|
|
const mwSize *dims = mxGetDimensions(field);
|
|
if(dims[0] > 1)
|
|
mexErrMsgTxt("A.z_emitters must be a column-vector!");
|
|
if(dims[1] != ne)
|
|
mexErrMsgTxt("A.z_emitters must be of the same size as A.x_emitters!");
|
|
z_e = (int *)mxGetData(field);
|
|
}
|
|
|
|
|
|
field = mxGetField(A, 0, "x_receivers");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field x_receivers in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.x_receivers must be of type in32!");
|
|
}
|
|
const mwSize *dims = mxGetDimensions(field);
|
|
if(dims[0] > 1)
|
|
mexErrMsgTxt("A.x_receivers must be a column-vector!");
|
|
nr = dims[1];
|
|
x_r = (int *)mxGetData(field);
|
|
}
|
|
|
|
field = mxGetField(A, 0, "y_receivers");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field y_receivers in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.y_receivers must be of type in32!");
|
|
}
|
|
const mwSize *dims = mxGetDimensions(field);
|
|
if(dims[0] > 1)
|
|
mexErrMsgTxt("A.y_receivers must be a column-vector!");
|
|
if(dims[1] != nr)
|
|
mexErrMsgTxt("A.y_receivers must be of the same size as A.x_receivers!");
|
|
y_r = (int *)mxGetData(field);
|
|
}
|
|
|
|
field = mxGetField(A, 0, "z_receivers");
|
|
if(field == NULL) {
|
|
mexErrMsgTxt("no field z_receivers in struct A!");
|
|
} else {
|
|
if(!mxIsInt32(field)) {
|
|
mexErrMsgTxt("A.z_receivers must be of type int32!");
|
|
}
|
|
const mwSize *dims = mxGetDimensions(field);
|
|
if(dims[0] > 1)
|
|
mexErrMsgTxt("A.z_receivers must be a column-vector!");
|
|
if(dims[1] != nr)
|
|
mexErrMsgTxt("A.z_receivers must be of the same size as A.z_receivers!");
|
|
z_r = (int *)mxGetData(field);
|
|
}
|
|
|
|
geometry_host *geom = new geometry_host;
|
|
geom->num_emitters = ne;
|
|
geom->num_receivers = nr;
|
|
geom->x_emitters = x_e;
|
|
geom->y_emitters = y_e;
|
|
geom->z_emitters = z_e;
|
|
geom->x_receivers = x_r;
|
|
geom->y_receivers = y_r;
|
|
geom->z_receivers = z_r;
|
|
geom->scale_factor = scale_factor;
|
|
geom->rv_x = rv_x;
|
|
geom->rv_y = rv_y;
|
|
geom->rv_z = rv_z;
|
|
|
|
return geom;
|
|
}
|
|
|
|
// create tval3_options struct
|
|
tval3_options *getOptions(const mxArray *optsi) {
|
|
|
|
tval3_options *optso = new tval3_options;
|
|
|
|
if(!mxIsStruct(optsi)) {
|
|
mexPrintf("Warning: opts is not a structure.");
|
|
return optso;
|
|
}
|
|
|
|
mxArray *field;
|
|
|
|
field = mxGetField(optsi, 0, "mu");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.mu is not of type double (ignored).");
|
|
else
|
|
optso->mu = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "mu0");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.mu0 is not of type double (ignored).");
|
|
else
|
|
optso->mu0 = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "beta");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.beta is not of type double (ignored).");
|
|
else
|
|
optso->beta = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "beta0");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.beta0 is not of type double (ignored).");
|
|
else
|
|
optso->beta0 = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "rate_cnt");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.rate_cnt is not of type double (ignored).");
|
|
else
|
|
optso->rate_cnt = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "tol");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.tol is not of type double (ignored).");
|
|
else
|
|
optso->tol = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "tol_inn");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.tol_inn is not of type double (ignored).");
|
|
else
|
|
|
|
optso->tol_inn = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "maxcnt");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.maxcnt is not of type double (ignored).");
|
|
else
|
|
optso->maxcnt = (int) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "maxit");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.maxit is not of type double (ignored).");
|
|
else
|
|
optso->maxit = (int) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "c");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.c is not of type double (ignored).");
|
|
else
|
|
optso->c = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "gamma");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.gamma is not of type double (ignored).");
|
|
else
|
|
optso->gamma = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "gam");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.gam is not of type double (ignored).");
|
|
else
|
|
optso->gam = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "rate_gam");
|
|
if(field != NULL) {
|
|
if(!mxIsDouble(field))
|
|
mexPrintf("Warning: opts.rate_gam is not of type double (ignored).");
|
|
else
|
|
optso->rate_gam = (float) *mxGetPr(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "isreal");
|
|
if(field != NULL) {
|
|
if(!mxIsLogical(field))
|
|
mexPrintf("Warning: opts.isreal is not of type logical (ignored).");
|
|
else
|
|
optso->isreal = (bool) *mxGetLogicals(field);
|
|
}
|
|
|
|
field = mxGetField(optsi, 0, "nonneg");
|
|
if(field != NULL) {
|
|
if(!mxIsLogical(field))
|
|
mexPrintf("Warning: opts.nonneg is not of type logical (ignored).");
|
|
else
|
|
optso->nonneg = (bool) *mxGetLogicals(field);
|
|
}
|
|
|
|
return optso;
|
|
}
|
|
|
|
// create mxArray from struct tval3_info
|
|
mxArray *setInfo(tval3_info &info) {
|
|
const char *field_names[] = {"total_iters", "outer_iters", "rel_chg", "rel_error", "secs"};
|
|
|
|
mxArray *output = mxCreateStructMatrix(1, 1, 5, field_names);
|
|
|
|
mxArray *value;
|
|
|
|
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
|
*mxGetPr(value) = info.total_iters;
|
|
mxSetField(output, 0, "total_iters", value);
|
|
|
|
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
|
*mxGetPr(value) = info.outer_iters;
|
|
mxSetField(output, 0, "outer_iters", value);
|
|
|
|
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
|
*mxGetPr(value) = info.rel_chg;
|
|
mxSetField(output, 0, "rel_chg", value);
|
|
|
|
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
|
*mxGetPr(value) = info.rel_error;
|
|
mxSetField(output, 0, "rel_error", value);
|
|
|
|
value = mxCreateDoubleMatrix(1,1,mxREAL);
|
|
*mxGetPr(value) = info.secs;
|
|
mxSetField(output, 0, "secs", value);
|
|
|
|
return output;
|
|
}
|
|
|
|
// create mxArray from mat-object
|
|
mxArray *setMatrix(const mat_host &mi) {
|
|
|
|
mwSize dimensions[3];
|
|
dimensions[0] = mi.dim_y;
|
|
dimensions[1] = mi.dim_x;
|
|
dimensions[2] = mi.dim_z;
|
|
mxArray *mo = mxCreateNumericArray(3, dimensions, mxSINGLE_CLASS, mxREAL);
|
|
float *mo_data = (float *) mxGetData(mo);
|
|
|
|
memcpy(mo_data, mi.data(), mi.len * sizeof(float));
|
|
|
|
return mo;
|
|
}
|
|
|
|
// [U, out] = tval3cpp(A,b,p,q,opts,use_dbl)
|
|
void TVALGPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
|
|
|
|
// check number of arguments...
|
|
if ((nrhs < 6) || (nrhs > 9))
|
|
mexErrMsgTxt("Wrong number of input arguments.");
|
|
if(nlhs > 2)
|
|
mexErrMsgTxt("Too many output arguments.");
|
|
|
|
// assign input arguments...
|
|
const mxArray *A = prhs[0];
|
|
const mxArray *b = prhs[1];
|
|
const mxArray *p = prhs[2];
|
|
const mxArray *q = prhs[3];
|
|
const mxArray *r = prhs[4];
|
|
const mxArray *opts = prhs[5];
|
|
const mxArray *Ut = mxGetField(opts, 0, "Ut");
|
|
const mxArray *device = (nrhs < 7) ? NULL : prhs [6];
|
|
const mxArray *pagelocked = (nrhs < 8) ? NULL : prhs [7];
|
|
|
|
// assign output arguments...
|
|
mxArray **U = &plhs[0];
|
|
mxArray **info = (nlhs < 2) ? NULL : &plhs[1];
|
|
|
|
// check data types and assign variables...
|
|
geometry_host *geo = NULL;
|
|
|
|
if(!mxIsStruct(A) && !mxIsSingle(A) &&
|
|
!(mxIsDouble(A) && mxIsSparse(A)) || mxIsComplex(A))
|
|
mexErrMsgTxt("A must be a struct or a non complex dense matrix of type single or a non complex sparse matrix.");
|
|
if(!mxIsSingle(b) || mxIsComplex(b))
|
|
mexErrMsgTxt("b must be non complex single.");
|
|
if((Ut != NULL) && (!mxIsSingle(Ut) || mxIsComplex(Ut)))
|
|
mexErrMsgTxt("opts.Ut must be non complex single.");
|
|
if(!mxIsDouble(p))
|
|
mexErrMsgTxt("p must be of type double.");
|
|
if(!mxIsDouble(q))
|
|
mexErrMsgTxt("q must be of type double.");
|
|
if(!mxIsDouble(r))
|
|
mexErrMsgTxt("r must be of type double.");
|
|
if((device != NULL) && (!mxIsDouble(device)))
|
|
mexErrMsgTxt("device must be of type double.");
|
|
if((pagelocked != NULL) && (!mxIsLogical(pagelocked)))
|
|
mexErrMsgTxt("pagelocked is not a logical value.");
|
|
|
|
int ip = (int)(*mxGetPr(p));
|
|
int iq = (int)(*mxGetPr(q));
|
|
int ir = (int)(*mxGetPr(r));
|
|
int i_device = (device == 0) ? 0 : (int)(*mxGetPr(device));
|
|
bool b_pagelocked = (pagelocked == NULL) ? true : (bool) mxGetLogicals(pagelocked)[0];
|
|
|
|
if(mxIsStruct(A)) {
|
|
geo = get_geometry(A);
|
|
}
|
|
|
|
try {
|
|
HANDLE_ERROR(cudaSetDevice(i_device));
|
|
|
|
void *plb_b, *plb_Ut, *plb_A;
|
|
mat_host *mb = getMatrix(b, b_pagelocked, &plb_b);
|
|
mat_host mU(ip, iq, ir, mat_col_major, false, b_pagelocked);
|
|
mat_host *mUt = getMatrix(Ut, b_pagelocked, &plb_Ut);
|
|
|
|
tval3_options *options = getOptions(opts);
|
|
|
|
tval3_info ti_info;
|
|
|
|
|
|
if(geo != NULL) {
|
|
ti_info = tval3_gpu(mU, *geo, *mb, *options, *mUt, b_pagelocked);
|
|
} else if(mxIsSparse(A)) {
|
|
sparse_mat_host *mA = getSparseMatrix(A, b_pagelocked);
|
|
ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, b_pagelocked);
|
|
delete mA;
|
|
} else {
|
|
mat_host *mA = getMatrix(A, b_pagelocked, &plb_A);
|
|
ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, b_pagelocked);
|
|
if(plb_A != NULL)
|
|
HANDLE_ERROR(cudaHostUnregister(plb_A));
|
|
delete mA;
|
|
}
|
|
|
|
*U = setMatrix(mU);
|
|
if(info != NULL) *info = setInfo(ti_info);
|
|
|
|
if(plb_b != NULL)
|
|
HANDLE_ERROR(cudaHostUnregister(plb_b));
|
|
|
|
/* Commented as this variable has not been registered using cudaHostRegister (TH Feb.2021) */
|
|
//if(plb_Ut != NULL)
|
|
// HANDLE_ERROR(cudaHostUnregister(plb_Ut));
|
|
|
|
delete mb;
|
|
delete mUt;
|
|
delete options;
|
|
|
|
} catch(const std::exception &ex) {
|
|
mexErrMsgTxt(ex.what());
|
|
}
|
|
}
|