129 lines
3.8 KiB
C++
129 lines
3.8 KiB
C++
#include "tval3gpu3d.h"
|
||
#include <algorithm>
|
||
#include <cstddef>
|
||
#include <handle_error.h>
|
||
#include <tval3_gpu.h>
|
||
|
||
#define OUTPUT_TYPES
|
||
|
||
using namespace std;
|
||
|
||
|
||
mat_host *getMatrix(float* mi,size_t* dims, bool pagelocked, void **plb) {
|
||
|
||
if(mi) {
|
||
size_t dim_y = dims[0];
|
||
size_t dim_x = dims[1];
|
||
size_t dim_z = dims[2]==0?1:dims[2];
|
||
|
||
float *mi_data = mi;
|
||
|
||
if(pagelocked) {
|
||
// start of page locked area...
|
||
*plb = (void *) ((((long)mi_data + (4*1024) - 1) / (4*1024)) * 4*1024);
|
||
|
||
//size of page locked area...
|
||
size_t size = (dim_y * dim_x * dim_z * sizeof(float) - ((long)*plb - (long)mi_data)) / (4*1024) * (4*1024);
|
||
|
||
if(size > 0) {
|
||
HANDLE_ERROR(cudaHostRegister(*plb, size, cudaHostRegisterDefault));
|
||
printf("Pagelocked %li bytes. Offset: %li\n", size,
|
||
((long)*plb - (long)mi_data));
|
||
} else {
|
||
*plb = NULL;
|
||
}
|
||
} else {
|
||
*plb = NULL;
|
||
}
|
||
|
||
mat_host *mo = new mat_host(dim_y, dim_x, dim_z, mi_data);
|
||
|
||
return mo;
|
||
} else {
|
||
mat_host *mo = new mat_host(0);
|
||
return mo;
|
||
}
|
||
}
|
||
|
||
tval3_options* getOptions(const TVALOptions& opt){
|
||
tval3_options *optso = new tval3_options;
|
||
optso->beta = opt.beta;
|
||
optso->beta0 = opt.beta0;
|
||
optso->mu = opt.mu;
|
||
optso->mu0 = opt.mu0;
|
||
optso->tol = opt.tol;
|
||
optso->maxit = opt.maxit;
|
||
optso->nonneg = opt.nonneg;
|
||
if(!opt.isreal)optso->isreal = false;
|
||
return optso;
|
||
}
|
||
|
||
sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, float * mValues,size_t mM, size_t mN, int nz, bool pagelocked) {
|
||
|
||
size_t dim_y = mM;
|
||
size_t dim_x = mN;
|
||
int *mi_dim_y = xIdxs;
|
||
int *mi_columnIndex = yIdxs;
|
||
int n_nonzero = nz;
|
||
|
||
float *mi_data = mValues;
|
||
|
||
sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero,
|
||
sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined);
|
||
|
||
for(int i=0; i < dim_x + 1; i++)
|
||
mo->ptr()[i] = mi_columnIndex[i];
|
||
|
||
for(int i=0; i < mo->nnz; i++) {
|
||
mo->ind()[i] = mi_dim_y[i];
|
||
mo->val()[i] = mi_data[i];
|
||
}
|
||
|
||
return mo;
|
||
}
|
||
|
||
TVALResult TVALGPU(int *xIdxs, int *yIdxs, float *mValues, size_t mM, size_t mN,
|
||
int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt,
|
||
int device, bool pagelocked) {
|
||
int ip = dims[0];
|
||
int iq = dims[1];
|
||
int ir = dims[2];
|
||
int i_device = (device == 0) ? 0 :device;
|
||
TVALResult result;
|
||
//M is a sparse, not a struct, so geo not need
|
||
try{
|
||
HANDLE_ERROR(cudaSetDevice(i_device));
|
||
|
||
void *plb_b, *plb_Ut, *plb_A;
|
||
mat_host *mb = getMatrix(bData,bDims,pagelocked,&plb_b);
|
||
mat_host mU(ip, iq, ir, mat_col_major, false, pagelocked);
|
||
//按照前面的代码,UT为空指针
|
||
mat_host *mUt = getMatrix(nullptr,nullptr, pagelocked, &plb_Ut);
|
||
tval3_options *options = getOptions(opt);
|
||
tval3_info ti_info;
|
||
//按照前面代码,输入的必为稀疏矩阵
|
||
sparse_mat_host *mA = getSparseMatrix(xIdxs,yIdxs, mValues, mM, mN, nz, pagelocked);
|
||
ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, pagelocked);
|
||
delete mA;
|
||
if(plb_b != NULL)
|
||
HANDLE_ERROR(cudaHostUnregister(plb_b));
|
||
|
||
result.data = new float[mU.len];
|
||
std::copy(mU.data(),mU.data()+mU.len,result.data);
|
||
result.dims[0] = mU.dim_x;
|
||
result.dims[1] = mU.dim_y;
|
||
result.dims[2] = mU.dim_z;
|
||
|
||
|
||
// if(info != NULL) *info = setInfo(ti_info);
|
||
|
||
delete mb;
|
||
delete mUt;
|
||
delete options;
|
||
}
|
||
catch(const std::exception &ex) {
|
||
result.errormsg = ex.what();
|
||
}
|
||
return result;
|
||
}
|