#include #include #include #include #include #include #include #include #define OUTPUT_TYPES using namespace std; // Create mat_host-object from mxArray. // The data buffer of the mxArray is used directly. // If pagelocked==true a part of the buffer gets page locked in this function // using cudaHostRegister and must later be unregistered using // cudaHostUnregister() !! // the address of the page locked part is returned in plb // if there is no part of the buffer which fulfills the alignment // requirements, no memory gets page locked and *plb is set to NULL mat_host *getMatrix(const mxArray *mi, bool pagelocked, void **plb) { if(mi != NULL) { const mwSize *dims = mxGetDimensions(mi); mwSize dim_y = dims[0]; mwSize dim_x = dims[1]; mwSize dim_z = (mxGetNumberOfDimensions(mi) >= 3) ? dims[2] : 1; float *mi_data = (float *)mxGetData(mi); if(pagelocked) { // start of page locked area... *plb = (void *) ((((long)mi_data + (4*1024) - 1) / (4*1024)) * 4*1024); //size of page locked area... size_t size = (dim_y * dim_x * dim_z * sizeof(float) - ((long)*plb - (long)mi_data)) / (4*1024) * (4*1024); if(size > 0) { HANDLE_ERROR(cudaHostRegister(*plb, size, cudaHostRegisterDefault)); mexPrintf("Pagelocked %i bytes. Offset: %i\n", size, ((long)*plb - (long)mi_data)); } else { *plb = NULL; } } else { *plb = NULL; } mat_host *mo = new mat_host(dim_y, dim_x, dim_z, mi_data); return mo; } else { mat_host *mo = new mat_host(0); return mo; } } // create sparse_mat_host-object from mxArray sparse_mat_host *getSparseMatrix(const mxArray *mi, bool pagelocked) { const mwSize *dims = mxGetDimensions(mi); int dim_y = dims[0]; int dim_x = dims[1]; mwIndex *mi_dim_y = mxGetIr(mi); mwIndex *mi_columnIndex = mxGetJc(mi); int n_nonzero = (int)mxGetNzmax(mi); double *mi_data = (double *)mxGetData(mi); sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero, sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined); for(int i=0; i < dim_x + 1; i++) mo->ptr()[i] = mi_columnIndex[i]; for(int i=0; i < mo->nnz; i++) { mo->ind()[i] = mi_dim_y[i]; mo->val()[i] = (float)mi_data[i]; } return mo; } // get geometry_host structure from mxArray geometry_host *get_geometry(const mxArray *A) { float scale_factor; int *x_e, *y_e, *z_e, *x_r, *y_r, *z_r; int ne, nr, rv_x, rv_y, rv_z; mxArray *field; field = mxGetField(A, 0, "rv_x"); if(field == NULL) { mexErrMsgTxt("no field rv_x in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.rv_x must be of type Int32!"); } rv_x = *( (int *)mxGetData(field) ); } field = mxGetField(A, 0, "rv_y"); if(field == NULL) { mexErrMsgTxt("no field rv_y in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.rv_y must be of type Int32!"); } rv_y = *( (int *)mxGetData(field) ); } field = mxGetField(A, 0, "rv_z"); if(field == NULL) { mexErrMsgTxt("no field rv_z in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.rv_z must be of type Int32!"); } rv_z = *( (int *)mxGetData(field) ); } field = mxGetField(A, 0, "scale_factor"); if(field == NULL) { mexErrMsgTxt("no field scale_factor in struct A!"); } else { if(!mxIsSingle(field)) { mexErrMsgTxt("A.scale_factor must be of type single!"); } scale_factor = *( (float *)mxGetData(field) ); } field = mxGetField(A, 0, "x_emitters"); if(field == NULL) { mexErrMsgTxt("no field x_emitters in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.x_emitters must be of type int32!"); } const mwSize *dims = mxGetDimensions(field); if(dims[0] > 1) mexErrMsgTxt("A.x_emitters must be a column-vector!"); ne = dims[1]; x_e = (int *)mxGetData(field); } field = mxGetField(A, 0, "y_emitters"); if(field == NULL) { mexErrMsgTxt("no field y_emitters in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.y_emitters must be of type in32!"); } const mwSize *dims = mxGetDimensions(field); if(dims[0] > 1) mexErrMsgTxt("A.y_emitters must be a column-vector!"); if(dims[1] != ne) mexErrMsgTxt("A.y_emitters must be of the same size as A.x_emitters!"); y_e = (int *)mxGetData(field); } field = mxGetField(A, 0, "z_emitters"); if(field == NULL) { mexErrMsgTxt("no field z_emitters in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.z_emitters must be of type in32!"); } const mwSize *dims = mxGetDimensions(field); if(dims[0] > 1) mexErrMsgTxt("A.z_emitters must be a column-vector!"); if(dims[1] != ne) mexErrMsgTxt("A.z_emitters must be of the same size as A.x_emitters!"); z_e = (int *)mxGetData(field); } field = mxGetField(A, 0, "x_receivers"); if(field == NULL) { mexErrMsgTxt("no field x_receivers in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.x_receivers must be of type in32!"); } const mwSize *dims = mxGetDimensions(field); if(dims[0] > 1) mexErrMsgTxt("A.x_receivers must be a column-vector!"); nr = dims[1]; x_r = (int *)mxGetData(field); } field = mxGetField(A, 0, "y_receivers"); if(field == NULL) { mexErrMsgTxt("no field y_receivers in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.y_receivers must be of type in32!"); } const mwSize *dims = mxGetDimensions(field); if(dims[0] > 1) mexErrMsgTxt("A.y_receivers must be a column-vector!"); if(dims[1] != nr) mexErrMsgTxt("A.y_receivers must be of the same size as A.x_receivers!"); y_r = (int *)mxGetData(field); } field = mxGetField(A, 0, "z_receivers"); if(field == NULL) { mexErrMsgTxt("no field z_receivers in struct A!"); } else { if(!mxIsInt32(field)) { mexErrMsgTxt("A.z_receivers must be of type int32!"); } const mwSize *dims = mxGetDimensions(field); if(dims[0] > 1) mexErrMsgTxt("A.z_receivers must be a column-vector!"); if(dims[1] != nr) mexErrMsgTxt("A.z_receivers must be of the same size as A.z_receivers!"); z_r = (int *)mxGetData(field); } geometry_host *geom = new geometry_host; geom->num_emitters = ne; geom->num_receivers = nr; geom->x_emitters = x_e; geom->y_emitters = y_e; geom->z_emitters = z_e; geom->x_receivers = x_r; geom->y_receivers = y_r; geom->z_receivers = z_r; geom->scale_factor = scale_factor; geom->rv_x = rv_x; geom->rv_y = rv_y; geom->rv_z = rv_z; return geom; } // create tval3_options struct tval3_options *getOptions(const mxArray *optsi) { tval3_options *optso = new tval3_options; if(!mxIsStruct(optsi)) { mexPrintf("Warning: opts is not a structure."); return optso; } mxArray *field; field = mxGetField(optsi, 0, "mu"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.mu is not of type double (ignored)."); else optso->mu = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "mu0"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.mu0 is not of type double (ignored)."); else optso->mu0 = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "beta"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.beta is not of type double (ignored)."); else optso->beta = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "beta0"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.beta0 is not of type double (ignored)."); else optso->beta0 = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "rate_cnt"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.rate_cnt is not of type double (ignored)."); else optso->rate_cnt = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "tol"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.tol is not of type double (ignored)."); else optso->tol = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "tol_inn"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.tol_inn is not of type double (ignored)."); else optso->tol_inn = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "maxcnt"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.maxcnt is not of type double (ignored)."); else optso->maxcnt = (int) *mxGetPr(field); } field = mxGetField(optsi, 0, "maxit"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.maxit is not of type double (ignored)."); else optso->maxit = (int) *mxGetPr(field); } field = mxGetField(optsi, 0, "c"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.c is not of type double (ignored)."); else optso->c = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "gamma"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.gamma is not of type double (ignored)."); else optso->gamma = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "gam"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.gam is not of type double (ignored)."); else optso->gam = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "rate_gam"); if(field != NULL) { if(!mxIsDouble(field)) mexPrintf("Warning: opts.rate_gam is not of type double (ignored)."); else optso->rate_gam = (float) *mxGetPr(field); } field = mxGetField(optsi, 0, "isreal"); if(field != NULL) { if(!mxIsLogical(field)) mexPrintf("Warning: opts.isreal is not of type logical (ignored)."); else optso->isreal = (bool) *mxGetLogicals(field); } field = mxGetField(optsi, 0, "nonneg"); if(field != NULL) { if(!mxIsLogical(field)) mexPrintf("Warning: opts.nonneg is not of type logical (ignored)."); else optso->nonneg = (bool) *mxGetLogicals(field); } return optso; } // create mxArray from struct tval3_info mxArray *setInfo(tval3_info &info) { const char *field_names[] = {"total_iters", "outer_iters", "rel_chg", "rel_error", "secs"}; mxArray *output = mxCreateStructMatrix(1, 1, 5, field_names); mxArray *value; value = mxCreateDoubleMatrix(1,1,mxREAL); *mxGetPr(value) = info.total_iters; mxSetField(output, 0, "total_iters", value); value = mxCreateDoubleMatrix(1,1,mxREAL); *mxGetPr(value) = info.outer_iters; mxSetField(output, 0, "outer_iters", value); value = mxCreateDoubleMatrix(1,1,mxREAL); *mxGetPr(value) = info.rel_chg; mxSetField(output, 0, "rel_chg", value); value = mxCreateDoubleMatrix(1,1,mxREAL); *mxGetPr(value) = info.rel_error; mxSetField(output, 0, "rel_error", value); value = mxCreateDoubleMatrix(1,1,mxREAL); *mxGetPr(value) = info.secs; mxSetField(output, 0, "secs", value); return output; } // create mxArray from mat-object mxArray *setMatrix(const mat_host &mi) { mwSize dimensions[3]; dimensions[0] = mi.dim_y; dimensions[1] = mi.dim_x; dimensions[2] = mi.dim_z; mxArray *mo = mxCreateNumericArray(3, dimensions, mxSINGLE_CLASS, mxREAL); float *mo_data = (float *) mxGetData(mo); memcpy(mo_data, mi.data(), mi.len * sizeof(float)); return mo; } // [U, out] = tval3cpp(A,b,p,q,opts,use_dbl) void TVALGPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { // check number of arguments... if ((nrhs < 6) || (nrhs > 9)) mexErrMsgTxt("Wrong number of input arguments."); if(nlhs > 2) mexErrMsgTxt("Too many output arguments."); // assign input arguments... const mxArray *A = prhs[0]; const mxArray *b = prhs[1]; const mxArray *p = prhs[2]; const mxArray *q = prhs[3]; const mxArray *r = prhs[4]; const mxArray *opts = prhs[5]; const mxArray *Ut = mxGetField(opts, 0, "Ut"); const mxArray *device = (nrhs < 7) ? NULL : prhs [6]; const mxArray *pagelocked = (nrhs < 8) ? NULL : prhs [7]; // assign output arguments... mxArray **U = &plhs[0]; mxArray **info = (nlhs < 2) ? NULL : &plhs[1]; // check data types and assign variables... geometry_host *geo = NULL; if(!mxIsStruct(A) && !mxIsSingle(A) && !(mxIsDouble(A) && mxIsSparse(A)) || mxIsComplex(A)) mexErrMsgTxt("A must be a struct or a non complex dense matrix of type single or a non complex sparse matrix."); if(!mxIsSingle(b) || mxIsComplex(b)) mexErrMsgTxt("b must be non complex single."); if((Ut != NULL) && (!mxIsSingle(Ut) || mxIsComplex(Ut))) mexErrMsgTxt("opts.Ut must be non complex single."); if(!mxIsDouble(p)) mexErrMsgTxt("p must be of type double."); if(!mxIsDouble(q)) mexErrMsgTxt("q must be of type double."); if(!mxIsDouble(r)) mexErrMsgTxt("r must be of type double."); if((device != NULL) && (!mxIsDouble(device))) mexErrMsgTxt("device must be of type double."); if((pagelocked != NULL) && (!mxIsLogical(pagelocked))) mexErrMsgTxt("pagelocked is not a logical value."); int ip = (int)(*mxGetPr(p)); int iq = (int)(*mxGetPr(q)); int ir = (int)(*mxGetPr(r)); int i_device = (device == 0) ? 0 : (int)(*mxGetPr(device)); bool b_pagelocked = (pagelocked == NULL) ? true : (bool) mxGetLogicals(pagelocked)[0]; if(mxIsStruct(A)) { geo = get_geometry(A); } try { HANDLE_ERROR(cudaSetDevice(i_device)); void *plb_b, *plb_Ut, *plb_A; mat_host *mb = getMatrix(b, b_pagelocked, &plb_b); mat_host mU(ip, iq, ir, mat_col_major, false, b_pagelocked); mat_host *mUt = getMatrix(Ut, b_pagelocked, &plb_Ut); tval3_options *options = getOptions(opts); tval3_info ti_info; if(geo != NULL) { ti_info = tval3_gpu(mU, *geo, *mb, *options, *mUt, b_pagelocked); } else if(mxIsSparse(A)) { sparse_mat_host *mA = getSparseMatrix(A, b_pagelocked); ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, b_pagelocked); delete mA; } else { mat_host *mA = getMatrix(A, b_pagelocked, &plb_A); ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, b_pagelocked); if(plb_A != NULL) HANDLE_ERROR(cudaHostUnregister(plb_A)); delete mA; } *U = setMatrix(mU); if(info != NULL) *info = setInfo(ti_info); if(plb_b != NULL) HANDLE_ERROR(cudaHostUnregister(plb_b)); /* Commented as this variable has not been registered using cudaHostRegister (TH Feb.2021) */ //if(plb_Ut != NULL) // HANDLE_ERROR(cudaHostUnregister(plb_Ut)); delete mb; delete mUt; delete options; } catch(const std::exception &ex) { mexErrMsgTxt(ex.what()); } }