Files
URDepends/TVALGPU/src/tval3gpu3d.cpp

129 lines
3.8 KiB
C++
Raw Permalink Normal View History

2023-05-30 17:32:52 +08:00
#include "tval3gpu3d.h"
#include <algorithm>
#include <cstddef>
2023-05-18 16:04:27 +08:00
#include <handle_error.h>
#include <tval3_gpu.h>
#define OUTPUT_TYPES
using namespace std;
2023-05-30 17:32:52 +08:00
mat_host *getMatrix(float* mi,size_t* dims, bool pagelocked, void **plb) {
if(mi) {
size_t dim_y = dims[0];
size_t dim_x = dims[1];
size_t dim_z = dims[2]==0?1:dims[2];
2023-05-18 16:04:27 +08:00
2023-05-30 17:32:52 +08:00
float *mi_data = mi;
2023-05-18 16:04:27 +08:00
if(pagelocked) {
// start of page locked area...
*plb = (void *) ((((long)mi_data + (4*1024) - 1) / (4*1024)) * 4*1024);
//size of page locked area...
size_t size = (dim_y * dim_x * dim_z * sizeof(float) - ((long)*plb - (long)mi_data)) / (4*1024) * (4*1024);
if(size > 0) {
HANDLE_ERROR(cudaHostRegister(*plb, size, cudaHostRegisterDefault));
2023-05-30 17:32:52 +08:00
printf("Pagelocked %li bytes. Offset: %li\n", size,
2023-05-18 16:04:27 +08:00
((long)*plb - (long)mi_data));
} else {
*plb = NULL;
}
} else {
*plb = NULL;
}
mat_host *mo = new mat_host(dim_y, dim_x, dim_z, mi_data);
return mo;
} else {
mat_host *mo = new mat_host(0);
return mo;
}
}
2023-05-30 17:32:52 +08:00
tval3_options* getOptions(const TVALOptions& opt){
tval3_options *optso = new tval3_options;
optso->beta = opt.beta;
optso->beta0 = opt.beta0;
optso->mu = opt.mu;
optso->mu0 = opt.mu0;
optso->tol = opt.tol;
optso->maxit = opt.maxit;
optso->nonneg = opt.nonneg;
if(!opt.isreal)optso->isreal = false;
return optso;
}
2023-10-09 09:50:54 +08:00
sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, float * mValues,size_t mM, size_t mN, int nz, bool pagelocked) {
2023-05-18 16:04:27 +08:00
2023-05-30 17:32:52 +08:00
size_t dim_y = mM;
size_t dim_x = mN;
int *mi_dim_y = xIdxs;
int *mi_columnIndex = yIdxs;
int n_nonzero = nz;
2023-05-18 16:04:27 +08:00
2023-10-09 09:50:54 +08:00
float *mi_data = mValues;
2023-05-18 16:04:27 +08:00
sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero,
sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined);
for(int i=0; i < dim_x + 1; i++)
mo->ptr()[i] = mi_columnIndex[i];
for(int i=0; i < mo->nnz; i++) {
mo->ind()[i] = mi_dim_y[i];
2023-10-09 09:50:54 +08:00
mo->val()[i] = mi_data[i];
2023-05-18 16:04:27 +08:00
}
return mo;
}
2023-10-09 09:50:54 +08:00
TVALResult TVALGPU(int *xIdxs, int *yIdxs, float *mValues, size_t mM, size_t mN,
2023-05-30 17:32:52 +08:00
int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt,
int device, bool pagelocked) {
int ip = dims[0];
int iq = dims[1];
int ir = dims[2];
int i_device = (device == 0) ? 0 :device;
TVALResult result;
//M is a sparse, not a struct, so geo not need
try{
2023-05-18 16:04:27 +08:00
HANDLE_ERROR(cudaSetDevice(i_device));
void *plb_b, *plb_Ut, *plb_A;
2023-05-30 17:32:52 +08:00
mat_host *mb = getMatrix(bData,bDims,pagelocked,&plb_b);
mat_host mU(ip, iq, ir, mat_col_major, false, pagelocked);
//按照前面的代码UT为空指针
mat_host *mUt = getMatrix(nullptr,nullptr, pagelocked, &plb_Ut);
tval3_options *options = getOptions(opt);
2023-05-18 16:04:27 +08:00
tval3_info ti_info;
2023-05-30 17:32:52 +08:00
//按照前面代码,输入的必为稀疏矩阵
sparse_mat_host *mA = getSparseMatrix(xIdxs,yIdxs, mValues, mM, mN, nz, pagelocked);
ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, pagelocked);
delete mA;
2023-05-18 16:04:27 +08:00
if(plb_b != NULL)
HANDLE_ERROR(cudaHostUnregister(plb_b));
2023-10-09 09:50:54 +08:00
result.data = new float[mU.len];
2023-05-30 17:32:52 +08:00
std::copy(mU.data(),mU.data()+mU.len,result.data);
result.dims[0] = mU.dim_x;
result.dims[1] = mU.dim_y;
result.dims[2] = mU.dim_z;
2023-10-09 09:50:54 +08:00
2023-05-30 17:32:52 +08:00
// if(info != NULL) *info = setInfo(ti_info);
2023-05-18 16:04:27 +08:00
delete mb;
delete mUt;
2023-05-30 17:32:52 +08:00
delete options;
}
catch(const std::exception &ex) {
result.errormsg = ex.what();
2023-05-18 16:04:27 +08:00
}
2023-05-30 17:32:52 +08:00
return result;
2023-05-18 16:04:27 +08:00
}