#include "tval3gpu3d.h" #include #include #include #include #define OUTPUT_TYPES using namespace std; mat_host *getMatrix(float* mi,size_t* dims, bool pagelocked, void **plb) { if(mi) { size_t dim_y = dims[0]; size_t dim_x = dims[1]; size_t dim_z = dims[2]==0?1:dims[2]; float *mi_data = mi; if(pagelocked) { // start of page locked area... *plb = (void *) ((((long)mi_data + (4*1024) - 1) / (4*1024)) * 4*1024); //size of page locked area... size_t size = (dim_y * dim_x * dim_z * sizeof(float) - ((long)*plb - (long)mi_data)) / (4*1024) * (4*1024); if(size > 0) { HANDLE_ERROR(cudaHostRegister(*plb, size, cudaHostRegisterDefault)); printf("Pagelocked %li bytes. Offset: %li\n", size, ((long)*plb - (long)mi_data)); } else { *plb = NULL; } } else { *plb = NULL; } mat_host *mo = new mat_host(dim_y, dim_x, dim_z, mi_data); return mo; } else { mat_host *mo = new mat_host(0); return mo; } } tval3_options* getOptions(const TVALOptions& opt){ tval3_options *optso = new tval3_options; optso->beta = opt.beta; optso->beta0 = opt.beta0; optso->mu = opt.mu; optso->mu0 = opt.mu0; optso->tol = opt.tol; optso->maxit = opt.maxit; optso->nonneg = opt.nonneg; if(!opt.isreal)optso->isreal = false; return optso; } sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, float * mValues,size_t mM, size_t mN, int nz, bool pagelocked) { size_t dim_y = mM; size_t dim_x = mN; int *mi_dim_y = xIdxs; int *mi_columnIndex = yIdxs; int n_nonzero = nz; float *mi_data = mValues; sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero, sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined); for(int i=0; i < dim_x + 1; i++) mo->ptr()[i] = mi_columnIndex[i]; for(int i=0; i < mo->nnz; i++) { mo->ind()[i] = mi_dim_y[i]; mo->val()[i] = mi_data[i]; } return mo; } TVALResult TVALGPU(int *xIdxs, int *yIdxs, float *mValues, size_t mM, size_t mN, int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt, int device, bool pagelocked) { int ip = dims[0]; int iq = dims[1]; int ir = dims[2]; int i_device = (device == 0) ? 0 :device; TVALResult result; //M is a sparse, not a struct, so geo not need try{ HANDLE_ERROR(cudaSetDevice(i_device)); void *plb_b, *plb_Ut, *plb_A; mat_host *mb = getMatrix(bData,bDims,pagelocked,&plb_b); mat_host mU(ip, iq, ir, mat_col_major, false, pagelocked); //按照前面的代码,UT为空指针 mat_host *mUt = getMatrix(nullptr,nullptr, pagelocked, &plb_Ut); tval3_options *options = getOptions(opt); tval3_info ti_info; //按照前面代码,输入的必为稀疏矩阵 sparse_mat_host *mA = getSparseMatrix(xIdxs,yIdxs, mValues, mM, mN, nz, pagelocked); ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, pagelocked); delete mA; if(plb_b != NULL) HANDLE_ERROR(cudaHostUnregister(plb_b)); result.data = new float[mU.len]; std::copy(mU.data(),mU.data()+mU.len,result.data); result.dims[0] = mU.dim_x; result.dims[1] = mU.dim_y; result.dims[2] = mU.dim_z; // if(info != NULL) *info = setInfo(ti_info); delete mb; delete mUt; delete options; } catch(const std::exception &ex) { result.errormsg = ex.what(); } return result; }