Files
URDepends/TVALGPU/src/tval3gpu3d.cpp
2023-10-09 09:50:54 +08:00

129 lines
3.8 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "tval3gpu3d.h"
#include <algorithm>
#include <cstddef>
#include <handle_error.h>
#include <tval3_gpu.h>
#define OUTPUT_TYPES
using namespace std;
mat_host *getMatrix(float* mi,size_t* dims, bool pagelocked, void **plb) {
if(mi) {
size_t dim_y = dims[0];
size_t dim_x = dims[1];
size_t dim_z = dims[2]==0?1:dims[2];
float *mi_data = mi;
if(pagelocked) {
// start of page locked area...
*plb = (void *) ((((long)mi_data + (4*1024) - 1) / (4*1024)) * 4*1024);
//size of page locked area...
size_t size = (dim_y * dim_x * dim_z * sizeof(float) - ((long)*plb - (long)mi_data)) / (4*1024) * (4*1024);
if(size > 0) {
HANDLE_ERROR(cudaHostRegister(*plb, size, cudaHostRegisterDefault));
printf("Pagelocked %li bytes. Offset: %li\n", size,
((long)*plb - (long)mi_data));
} else {
*plb = NULL;
}
} else {
*plb = NULL;
}
mat_host *mo = new mat_host(dim_y, dim_x, dim_z, mi_data);
return mo;
} else {
mat_host *mo = new mat_host(0);
return mo;
}
}
tval3_options* getOptions(const TVALOptions& opt){
tval3_options *optso = new tval3_options;
optso->beta = opt.beta;
optso->beta0 = opt.beta0;
optso->mu = opt.mu;
optso->mu0 = opt.mu0;
optso->tol = opt.tol;
optso->maxit = opt.maxit;
optso->nonneg = opt.nonneg;
if(!opt.isreal)optso->isreal = false;
return optso;
}
sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, float * mValues,size_t mM, size_t mN, int nz, bool pagelocked) {
size_t dim_y = mM;
size_t dim_x = mN;
int *mi_dim_y = xIdxs;
int *mi_columnIndex = yIdxs;
int n_nonzero = nz;
float *mi_data = mValues;
sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero,
sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined);
for(int i=0; i < dim_x + 1; i++)
mo->ptr()[i] = mi_columnIndex[i];
for(int i=0; i < mo->nnz; i++) {
mo->ind()[i] = mi_dim_y[i];
mo->val()[i] = mi_data[i];
}
return mo;
}
TVALResult TVALGPU(int *xIdxs, int *yIdxs, float *mValues, size_t mM, size_t mN,
int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt,
int device, bool pagelocked) {
int ip = dims[0];
int iq = dims[1];
int ir = dims[2];
int i_device = (device == 0) ? 0 :device;
TVALResult result;
//M is a sparse, not a struct, so geo not need
try{
HANDLE_ERROR(cudaSetDevice(i_device));
void *plb_b, *plb_Ut, *plb_A;
mat_host *mb = getMatrix(bData,bDims,pagelocked,&plb_b);
mat_host mU(ip, iq, ir, mat_col_major, false, pagelocked);
//按照前面的代码UT为空指针
mat_host *mUt = getMatrix(nullptr,nullptr, pagelocked, &plb_Ut);
tval3_options *options = getOptions(opt);
tval3_info ti_info;
//按照前面代码,输入的必为稀疏矩阵
sparse_mat_host *mA = getSparseMatrix(xIdxs,yIdxs, mValues, mM, mN, nz, pagelocked);
ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, pagelocked);
delete mA;
if(plb_b != NULL)
HANDLE_ERROR(cudaHostUnregister(plb_b));
result.data = new float[mU.len];
std::copy(mU.data(),mU.data()+mU.len,result.data);
result.dims[0] = mU.dim_x;
result.dims[1] = mU.dim_y;
result.dims[2] = mU.dim_z;
// if(info != NULL) *info = setInfo(ti_info);
delete mb;
delete mUt;
delete options;
}
catch(const std::exception &ex) {
result.errormsg = ex.what();
}
return result;
}