diff --git a/TVALGPU/CMakeLists.txt b/TVALGPU/CMakeLists.txt index f384e23..b3b49d2 100644 --- a/TVALGPU/CMakeLists.txt +++ b/TVALGPU/CMakeLists.txt @@ -2,18 +2,19 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR) project(TVALGPU) set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc) enable_language(CUDA) -set(Matlab_ROOT_DIR /usr/local/Polyspace/R2019b) -find_package(Matlab) - -add_library(TVALGPU SHARED ./src/tval3gpu3d.cpp ./src/tval3.cu ) -target_include_directories(TVALGPU PRIVATE ./include /usr/local/cuda/include /usr/local/Polyspace/R2019b/extern/include) +file(GLOB_RECURSE tval3_cpp_files src/*.cpp) +file(GLOB_RECURSE tval3_cu_files src/*.cu) +find_package(CUDAToolkit REQUIRED) +add_library(TVALGPU SHARED ${tval3_cpp_files} ${tval3_cu_files}) +target_include_directories(TVALGPU PRIVATE ./src /usr/local/cuda/include) set_target_properties(TVALGPU PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -target_compile_options(TVALGPU PRIVATE $<$: +target_compile_options(TVALGPU PRIVATE $<$: --compiler-options -fPIC --use_fast_math --ptxas-options=-v -arch compute_30 -code compute_30,sm_30 >) -target_link_libraries(TVALGPU PRIVATE ${CUDA_RUNTIME_LIBRARY} ${Matlab_MEX_LIBRARY} ${Matlab_MX_LIBRARY}) -set_target_properties(TVALGPU PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_LIST_DIR}/include/tval3gpu3d.h) \ No newline at end of file +target_link_libraries(TVALGPU PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cusparse CUDA::cublas) +set(TVALGPU_HEADERS ${CMAKE_CURRENT_LIST_DIR}/src/tval3gpu3d.h ${CMAKE_CURRENT_LIST_DIR}/src/tvalstruct.h) +set_target_properties(TVALGPU PROPERTIES PUBLIC_HEADER "${TVALGPU_HEADERS}") \ No newline at end of file diff --git a/TVALGPU/include/tval3gpu3d.h b/TVALGPU/include/tval3gpu3d.h deleted file mode 100644 index f5c0fba..0000000 --- a/TVALGPU/include/tval3gpu3d.h +++ /dev/null @@ -1,4 +0,0 @@ -#include -extern "C"{ - void TVALGPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) ; -} \ No newline at end of file diff --git a/TVALGPU/include/container_device.h b/TVALGPU/src/container_device.cpp similarity index 82% rename from TVALGPU/include/container_device.h rename to TVALGPU/src/container_device.cpp index f7e7e0f..c0a410b 100644 --- a/TVALGPU/include/container_device.h +++ b/TVALGPU/src/container_device.cpp @@ -1,108 +1,4 @@ -/* - * container_gpu.h - * - * Created on: Jun 9, 2011 - * Author: ditlevsen - */ - -#ifndef CONTAINER_DEVICE_H_ -#define CONTAINER_DEVICE_H_ - -#include "container_host.h" - -class mat_device { -private: - float *pbuf; - int ld; -public: - const int dim_x; - const int dim_y; - const int dim_z; - const int len; - const T_mat_format format; - - mat_device(int l_y, int l_x, int l_z, bool init=true, bool pitch=false, T_mat_format storage=mat_col_major); - mat_device(int num_elements, bool init=true); - mat_device(const mat_device &m); - mat_device(const mat_host &m, bool pitch=false); - ~mat_device() { if(pbuf) HANDLE_ERROR(cudaFree(pbuf)); } - - int leading_dim() const { return ld; } - - float *data_dev_ptr() { return pbuf; } - const float *data_dev_ptr() const { return pbuf; } - - mat_device &operator=(const mat_device &m) throw(illegal_mat_gpu_assignment); - mat_device &operator=(const mat_host &m) throw(illegal_mat_gpu_assignment); -}; - -class sparse_mat_device { -private: - int *p_ind; - int *p_ptr; - float *p_val; -public: - const int dim_y; - const int dim_x; - const int nnz; - T_sparse_mat_format format; - - sparse_mat_device(int num_dim_y, int num_dim_x, int n_nonzero, T_sparse_mat_format storage_format, bool init=true); - sparse_mat_device(const sparse_mat_device &m); - sparse_mat_device(const sparse_mat_host &m); - ~sparse_mat_device(); - - float *val() { return p_val; } - const float *val() const { return p_val; } - - int *ptr() { return p_ptr; } - const int *ptr() const { return p_ptr; } - - int *ind() { return p_ind; } - const int *ind() const { return p_ind; } - - sparse_mat_device &operator=(const sparse_mat_device &m) throw(illegal_mat_gpu_assignment); - sparse_mat_device &operator=(const sparse_mat_host &m) throw(illegal_mat_gpu_assignment); -}; - -//--------------------------------------- -// class geomety_device -//--------------------------------------- - -class geometry_device { -private: - int *x_emitters; - int *y_emitters; - int *z_emitters; - int *x_receivers; - int *y_receivers; - int *z_receivers; - -public: - int *x_em_dev_ptr() { return x_emitters; } - const int *x_em_dev_ptr() const { return x_emitters; } - int *y_em_dev_ptr() { return y_emitters; } - const int *y_em_dev_ptr() const { return y_emitters; } - int *z_em_dev_ptr() { return z_emitters; } - const int *z_em_dev_ptr() const { return z_emitters; } - int *x_re_dev_ptr() { return x_receivers; } - const int *x_re_dev_ptr() const { return x_receivers; } - int *y_re_dev_ptr() { return y_receivers; } - const int *y_re_dev_ptr() const { return y_receivers; } - int *z_re_dev_ptr() { return z_receivers; } - const int *z_re_dev_ptr() const { return z_receivers; } - - const int num_emitters; - const int num_receivers; - const int rv_x; - const int rv_y; - const int rv_z; - const float scale_factor; - - geometry_device(const geometry_host &geom_host); - geometry_device(const geometry_device &geom_dev); - ~geometry_device(); -}; +#include "container_device.h" // ------------------------------------------------------------------------ // member functions, etc. class mat_device @@ -381,7 +277,4 @@ geometry_device::~geometry_device() { if(y_emitters) cudaFree(y_emitters); if(x_receivers) cudaFree(x_receivers); if(y_receivers) cudaFree(y_receivers); -} - - -#endif /* CONTAINER_DEVICE_H_ */ +} \ No newline at end of file diff --git a/TVALGPU/src/container_device.h b/TVALGPU/src/container_device.h new file mode 100644 index 0000000..64671ff --- /dev/null +++ b/TVALGPU/src/container_device.h @@ -0,0 +1,112 @@ +/* + * container_gpu.h + * + * Created on: Jun 9, 2011 + * Author: ditlevsen + */ + +#ifndef CONTAINER_DEVICE_H_ +#define CONTAINER_DEVICE_H_ + +#include "container_host.h" + +class mat_device { +private: + float *pbuf; + int ld; +public: + const int dim_x; + const int dim_y; + const int dim_z; + const int len; + const T_mat_format format; + + mat_device(int l_y, int l_x, int l_z, bool init=true, bool pitch=false, T_mat_format storage=mat_col_major); + mat_device(int num_elements, bool init=true); + mat_device(const mat_device &m); + mat_device(const mat_host &m, bool pitch=false); + ~mat_device() { if(pbuf) HANDLE_ERROR(cudaFree(pbuf)); } + + int leading_dim() const { return ld; } + + float *data_dev_ptr() { return pbuf; } + const float *data_dev_ptr() const { return pbuf; } + + mat_device &operator=(const mat_device &m) throw(illegal_mat_gpu_assignment); + mat_device &operator=(const mat_host &m) throw(illegal_mat_gpu_assignment); +}; + +class sparse_mat_device { +private: + int *p_ind; + int *p_ptr; + float *p_val; +public: + const int dim_y; + const int dim_x; + const int nnz; + T_sparse_mat_format format; + + sparse_mat_device(int num_dim_y, int num_dim_x, int n_nonzero, T_sparse_mat_format storage_format, bool init=true); + sparse_mat_device(const sparse_mat_device &m); + sparse_mat_device(const sparse_mat_host &m); + ~sparse_mat_device(); + + float *val() { return p_val; } + const float *val() const { return p_val; } + + int *ptr() { return p_ptr; } + const int *ptr() const { return p_ptr; } + + int *ind() { return p_ind; } + const int *ind() const { return p_ind; } + + sparse_mat_device &operator=(const sparse_mat_device &m) throw(illegal_mat_gpu_assignment); + sparse_mat_device &operator=(const sparse_mat_host &m) throw(illegal_mat_gpu_assignment); +}; + +//--------------------------------------- +// class geomety_device +//--------------------------------------- + +class geometry_device { +private: + int *x_emitters; + int *y_emitters; + int *z_emitters; + int *x_receivers; + int *y_receivers; + int *z_receivers; + +public: + int *x_em_dev_ptr() { return x_emitters; } + const int *x_em_dev_ptr() const { return x_emitters; } + int *y_em_dev_ptr() { return y_emitters; } + const int *y_em_dev_ptr() const { return y_emitters; } + int *z_em_dev_ptr() { return z_emitters; } + const int *z_em_dev_ptr() const { return z_emitters; } + int *x_re_dev_ptr() { return x_receivers; } + const int *x_re_dev_ptr() const { return x_receivers; } + int *y_re_dev_ptr() { return y_receivers; } + const int *y_re_dev_ptr() const { return y_receivers; } + int *z_re_dev_ptr() { return z_receivers; } + const int *z_re_dev_ptr() const { return z_receivers; } + + const int num_emitters; + const int num_receivers; + const int rv_x; + const int rv_y; + const int rv_z; + const float scale_factor; + + geometry_device(const geometry_host &geom_host); + geometry_device(const geometry_device &geom_dev); + ~geometry_device(); +}; + +void mat_gpu_to_host(mat_host &m_host, const mat_device &m) throw(illegal_mat_gpu_assignment); + + + + +#endif /* CONTAINER_DEVICE_H_ */ diff --git a/TVALGPU/include/container_host.h b/TVALGPU/src/container_host.h similarity index 99% rename from TVALGPU/include/container_host.h rename to TVALGPU/src/container_host.h index 18a672e..300fff7 100644 --- a/TVALGPU/include/container_host.h +++ b/TVALGPU/src/container_host.h @@ -10,7 +10,6 @@ #ifndef CONTAINER_HOST_H_ #define CONTAINER_HOST_H_ #pragma once -#include #include "handle_error.h" #include #include diff --git a/TVALGPU/include/handle_error.h b/TVALGPU/src/handle_error.cpp similarity index 50% rename from TVALGPU/include/handle_error.h rename to TVALGPU/src/handle_error.cpp index 44bf214..6895331 100644 --- a/TVALGPU/include/handle_error.h +++ b/TVALGPU/src/handle_error.cpp @@ -1,16 +1,11 @@ -#ifndef HANDLE_ERROR_H_ -#define HANDLE_ERROR_H_ +#include "handle_error.h" -#include -#include -#include +cuda_exception::cuda_exception(const std::string& message) : std::runtime_error(message) { -class cuda_exception: public std::runtime_error { -public: - cuda_exception(const std::string& message) : std::runtime_error(message) {}; -}; +} -static void handle_error(cudaError_t error, const char *file, int line ) { +void handle_error(cudaError_t error, const char *file, int line ) +{ if (error != cudaSuccess) { std::stringstream ss; ss << file << ", line " << line << ": " << cudaGetErrorString(error) << "\n"; @@ -18,7 +13,7 @@ static void handle_error(cudaError_t error, const char *file, int line ) { } } -static void handle_error(cublasStatus_t error, const char *file, int line ) { +void handle_error(cublasStatus_t error, const char *file, int line ) { if (error != CUBLAS_STATUS_SUCCESS) { std::stringstream ss; ss << file << ", line " << line << ": cublas error " << error << "\n"; @@ -26,7 +21,7 @@ static void handle_error(cublasStatus_t error, const char *file, int line ) { } } -static void handle_error(cusparseStatus_t error, const char *file, int line ) { +void handle_error(cusparseStatus_t error, const char *file, int line ) { if (error != CUSPARSE_STATUS_SUCCESS) { std::stringstream ss; ss << file << ", line " << line << ": cusparse error " << error << "\n"; @@ -34,7 +29,3 @@ static void handle_error(cusparseStatus_t error, const char *file, int line ) { } } -#define HANDLE_ERROR(error) (handle_error(error, __FILE__, __LINE__ )) - - -#endif /* HANDLE_ERROR_H_ */ diff --git a/TVALGPU/src/handle_error.h b/TVALGPU/src/handle_error.h new file mode 100644 index 0000000..2ca9609 --- /dev/null +++ b/TVALGPU/src/handle_error.h @@ -0,0 +1,24 @@ +#ifndef HANDLE_ERROR_H_ +#define HANDLE_ERROR_H_ +#include +#include +#include +#include "cuda_runtime.h" +#include "cublas.h" +#include "cusparse.h" + +class cuda_exception: public std::runtime_error { +public: + cuda_exception(const std::string& message); +}; + +void handle_error(cudaError_t error, const char *file, int line ); + +void handle_error(cublasStatus_t error, const char *file, int line ); + +void handle_error(cusparseStatus_t error, const char *file, int line ); + +#define HANDLE_ERROR(error) ::handle_error(error, __FILE__, __LINE__ ) + + +#endif /* HANDLE_ERROR_H_ */ diff --git a/TVALGPU/src/mat_vec_mul.cpp b/TVALGPU/src/mat_vec_mul.cpp new file mode 100644 index 0000000..e69de29 diff --git a/TVALGPU/include/mat_vec_mul.h b/TVALGPU/src/mat_vec_mul.h similarity index 99% rename from TVALGPU/include/mat_vec_mul.h rename to TVALGPU/src/mat_vec_mul.h index 1f5e401..0903554 100644 --- a/TVALGPU/include/mat_vec_mul.h +++ b/TVALGPU/src/mat_vec_mul.h @@ -7,6 +7,8 @@ #ifndef MAT_VEC_MUL_H_ #define MAT_VEC_MUL_H_ +#include "container_device.h" +#include struct sparse_mm { sparse_mat_device A_csc; sparse_mat_device A_csr; diff --git a/TVALGPU/src/tval3.cu b/TVALGPU/src/tval3.cu index 4cbab8f..7dc012c 100644 --- a/TVALGPU/src/tval3.cu +++ b/TVALGPU/src/tval3.cu @@ -1,6 +1,4 @@ -#include -#include -#include + #include "handle_error.h" #include "container_device.h" #include "tval3_gpu.h" diff --git a/TVALGPU/include/tval3_gpu.h b/TVALGPU/src/tval3_gpu.h similarity index 100% rename from TVALGPU/include/tval3_gpu.h rename to TVALGPU/src/tval3_gpu.h diff --git a/TVALGPU/include/tval3_types.h b/TVALGPU/src/tval3_types.h similarity index 100% rename from TVALGPU/include/tval3_types.h rename to TVALGPU/src/tval3_types.h diff --git a/TVALGPU/src/tval3gpu3d.cpp b/TVALGPU/src/tval3gpu3d.cpp index b39a481..71e3461 100644 --- a/TVALGPU/src/tval3gpu3d.cpp +++ b/TVALGPU/src/tval3gpu3d.cpp @@ -1,34 +1,22 @@ -#include -#include -#include -#include +#include "tval3gpu3d.h" +#include +#include #include - -#include -#include #include #define OUTPUT_TYPES using namespace std; -// Create mat_host-object from mxArray. -// The data buffer of the mxArray is used directly. -// If pagelocked==true a part of the buffer gets page locked in this function -// using cudaHostRegister and must later be unregistered using -// cudaHostUnregister() !! -// the address of the page locked part is returned in plb -// if there is no part of the buffer which fulfills the alignment -// requirements, no memory gets page locked and *plb is set to NULL -mat_host *getMatrix(const mxArray *mi, bool pagelocked, void **plb) { - if(mi != NULL) { - const mwSize *dims = mxGetDimensions(mi); - mwSize dim_y = dims[0]; - mwSize dim_x = dims[1]; - mwSize dim_z = (mxGetNumberOfDimensions(mi) >= 3) ? dims[2] : 1; +mat_host *getMatrix(float* mi,size_t* dims, bool pagelocked, void **plb) { + + if(mi) { + size_t dim_y = dims[0]; + size_t dim_x = dims[1]; + size_t dim_z = dims[2]==0?1:dims[2]; - float *mi_data = (float *)mxGetData(mi); + float *mi_data = mi; if(pagelocked) { // start of page locked area... @@ -39,7 +27,7 @@ mat_host *getMatrix(const mxArray *mi, bool pagelocked, void **plb) { if(size > 0) { HANDLE_ERROR(cudaHostRegister(*plb, size, cudaHostRegisterDefault)); - mexPrintf("Pagelocked %i bytes. Offset: %i\n", size, + printf("Pagelocked %li bytes. Offset: %li\n", size, ((long)*plb - (long)mi_data)); } else { *plb = NULL; @@ -55,20 +43,30 @@ mat_host *getMatrix(const mxArray *mi, bool pagelocked, void **plb) { mat_host *mo = new mat_host(0); return mo; } - } -// create sparse_mat_host-object from mxArray -sparse_mat_host *getSparseMatrix(const mxArray *mi, bool pagelocked) { +tval3_options* getOptions(const TVALOptions& opt){ + tval3_options *optso = new tval3_options; + optso->beta = opt.beta; + optso->beta0 = opt.beta0; + optso->mu = opt.mu; + optso->mu0 = opt.mu0; + optso->tol = opt.tol; + optso->maxit = opt.maxit; + optso->nonneg = opt.nonneg; + if(!opt.isreal)optso->isreal = false; + return optso; +} - const mwSize *dims = mxGetDimensions(mi); - int dim_y = dims[0]; - int dim_x = dims[1]; - mwIndex *mi_dim_y = mxGetIr(mi); - mwIndex *mi_columnIndex = mxGetJc(mi); - int n_nonzero = (int)mxGetNzmax(mi); +sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, double * mValues,size_t mM, size_t mN, int nz, bool pagelocked) { + + size_t dim_y = mM; + size_t dim_x = mN; + int *mi_dim_y = xIdxs; + int *mi_columnIndex = yIdxs; + int n_nonzero = nz; - double *mi_data = (double *)mxGetData(mi); + double *mi_data = mValues; sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero, sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined); @@ -84,438 +82,46 @@ sparse_mat_host *getSparseMatrix(const mxArray *mi, bool pagelocked) { return mo; } -// get geometry_host structure from mxArray -geometry_host *get_geometry(const mxArray *A) { - float scale_factor; - int *x_e, *y_e, *z_e, *x_r, *y_r, *z_r; - int ne, nr, rv_x, rv_y, rv_z; - mxArray *field; - - field = mxGetField(A, 0, "rv_x"); - if(field == NULL) { - mexErrMsgTxt("no field rv_x in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.rv_x must be of type Int32!"); - } - rv_x = *( (int *)mxGetData(field) ); - } - - field = mxGetField(A, 0, "rv_y"); - if(field == NULL) { - mexErrMsgTxt("no field rv_y in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.rv_y must be of type Int32!"); - } - rv_y = *( (int *)mxGetData(field) ); - } - - field = mxGetField(A, 0, "rv_z"); - if(field == NULL) { - mexErrMsgTxt("no field rv_z in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.rv_z must be of type Int32!"); - } - rv_z = *( (int *)mxGetData(field) ); - } - - field = mxGetField(A, 0, "scale_factor"); - if(field == NULL) { - mexErrMsgTxt("no field scale_factor in struct A!"); - } else { - if(!mxIsSingle(field)) { - mexErrMsgTxt("A.scale_factor must be of type single!"); - } - scale_factor = *( (float *)mxGetData(field) ); - } - - field = mxGetField(A, 0, "x_emitters"); - if(field == NULL) { - mexErrMsgTxt("no field x_emitters in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.x_emitters must be of type int32!"); - } - const mwSize *dims = mxGetDimensions(field); - if(dims[0] > 1) - mexErrMsgTxt("A.x_emitters must be a column-vector!"); - ne = dims[1]; - x_e = (int *)mxGetData(field); - } - - field = mxGetField(A, 0, "y_emitters"); - if(field == NULL) { - mexErrMsgTxt("no field y_emitters in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.y_emitters must be of type in32!"); - } - const mwSize *dims = mxGetDimensions(field); - if(dims[0] > 1) - mexErrMsgTxt("A.y_emitters must be a column-vector!"); - if(dims[1] != ne) - mexErrMsgTxt("A.y_emitters must be of the same size as A.x_emitters!"); - y_e = (int *)mxGetData(field); - } - - field = mxGetField(A, 0, "z_emitters"); - if(field == NULL) { - mexErrMsgTxt("no field z_emitters in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.z_emitters must be of type in32!"); - } - const mwSize *dims = mxGetDimensions(field); - if(dims[0] > 1) - mexErrMsgTxt("A.z_emitters must be a column-vector!"); - if(dims[1] != ne) - mexErrMsgTxt("A.z_emitters must be of the same size as A.x_emitters!"); - z_e = (int *)mxGetData(field); - } - - - field = mxGetField(A, 0, "x_receivers"); - if(field == NULL) { - mexErrMsgTxt("no field x_receivers in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.x_receivers must be of type in32!"); - } - const mwSize *dims = mxGetDimensions(field); - if(dims[0] > 1) - mexErrMsgTxt("A.x_receivers must be a column-vector!"); - nr = dims[1]; - x_r = (int *)mxGetData(field); - } - - field = mxGetField(A, 0, "y_receivers"); - if(field == NULL) { - mexErrMsgTxt("no field y_receivers in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.y_receivers must be of type in32!"); - } - const mwSize *dims = mxGetDimensions(field); - if(dims[0] > 1) - mexErrMsgTxt("A.y_receivers must be a column-vector!"); - if(dims[1] != nr) - mexErrMsgTxt("A.y_receivers must be of the same size as A.x_receivers!"); - y_r = (int *)mxGetData(field); - } - - field = mxGetField(A, 0, "z_receivers"); - if(field == NULL) { - mexErrMsgTxt("no field z_receivers in struct A!"); - } else { - if(!mxIsInt32(field)) { - mexErrMsgTxt("A.z_receivers must be of type int32!"); - } - const mwSize *dims = mxGetDimensions(field); - if(dims[0] > 1) - mexErrMsgTxt("A.z_receivers must be a column-vector!"); - if(dims[1] != nr) - mexErrMsgTxt("A.z_receivers must be of the same size as A.z_receivers!"); - z_r = (int *)mxGetData(field); - } - - geometry_host *geom = new geometry_host; - geom->num_emitters = ne; - geom->num_receivers = nr; - geom->x_emitters = x_e; - geom->y_emitters = y_e; - geom->z_emitters = z_e; - geom->x_receivers = x_r; - geom->y_receivers = y_r; - geom->z_receivers = z_r; - geom->scale_factor = scale_factor; - geom->rv_x = rv_x; - geom->rv_y = rv_y; - geom->rv_z = rv_z; - - return geom; -} - -// create tval3_options struct -tval3_options *getOptions(const mxArray *optsi) { - - tval3_options *optso = new tval3_options; - - if(!mxIsStruct(optsi)) { - mexPrintf("Warning: opts is not a structure."); - return optso; - } - - mxArray *field; - - field = mxGetField(optsi, 0, "mu"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.mu is not of type double (ignored)."); - else - optso->mu = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "mu0"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.mu0 is not of type double (ignored)."); - else - optso->mu0 = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "beta"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.beta is not of type double (ignored)."); - else - optso->beta = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "beta0"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.beta0 is not of type double (ignored)."); - else - optso->beta0 = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "rate_cnt"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.rate_cnt is not of type double (ignored)."); - else - optso->rate_cnt = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "tol"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.tol is not of type double (ignored)."); - else - optso->tol = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "tol_inn"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.tol_inn is not of type double (ignored)."); - else - - optso->tol_inn = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "maxcnt"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.maxcnt is not of type double (ignored)."); - else - optso->maxcnt = (int) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "maxit"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.maxit is not of type double (ignored)."); - else - optso->maxit = (int) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "c"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.c is not of type double (ignored)."); - else - optso->c = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "gamma"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.gamma is not of type double (ignored)."); - else - optso->gamma = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "gam"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.gam is not of type double (ignored)."); - else - optso->gam = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "rate_gam"); - if(field != NULL) { - if(!mxIsDouble(field)) - mexPrintf("Warning: opts.rate_gam is not of type double (ignored)."); - else - optso->rate_gam = (float) *mxGetPr(field); - } - - field = mxGetField(optsi, 0, "isreal"); - if(field != NULL) { - if(!mxIsLogical(field)) - mexPrintf("Warning: opts.isreal is not of type logical (ignored)."); - else - optso->isreal = (bool) *mxGetLogicals(field); - } - - field = mxGetField(optsi, 0, "nonneg"); - if(field != NULL) { - if(!mxIsLogical(field)) - mexPrintf("Warning: opts.nonneg is not of type logical (ignored)."); - else - optso->nonneg = (bool) *mxGetLogicals(field); - } - - return optso; -} - -// create mxArray from struct tval3_info -mxArray *setInfo(tval3_info &info) { - const char *field_names[] = {"total_iters", "outer_iters", "rel_chg", "rel_error", "secs"}; - - mxArray *output = mxCreateStructMatrix(1, 1, 5, field_names); - - mxArray *value; - - value = mxCreateDoubleMatrix(1,1,mxREAL); - *mxGetPr(value) = info.total_iters; - mxSetField(output, 0, "total_iters", value); - - value = mxCreateDoubleMatrix(1,1,mxREAL); - *mxGetPr(value) = info.outer_iters; - mxSetField(output, 0, "outer_iters", value); - - value = mxCreateDoubleMatrix(1,1,mxREAL); - *mxGetPr(value) = info.rel_chg; - mxSetField(output, 0, "rel_chg", value); - - value = mxCreateDoubleMatrix(1,1,mxREAL); - *mxGetPr(value) = info.rel_error; - mxSetField(output, 0, "rel_error", value); - - value = mxCreateDoubleMatrix(1,1,mxREAL); - *mxGetPr(value) = info.secs; - mxSetField(output, 0, "secs", value); - - return output; -} - -// create mxArray from mat-object -mxArray *setMatrix(const mat_host &mi) { - - mwSize dimensions[3]; - dimensions[0] = mi.dim_y; - dimensions[1] = mi.dim_x; - dimensions[2] = mi.dim_z; - mxArray *mo = mxCreateNumericArray(3, dimensions, mxSINGLE_CLASS, mxREAL); - float *mo_data = (float *) mxGetData(mo); - - memcpy(mo_data, mi.data(), mi.len * sizeof(float)); - - return mo; -} - -// [U, out] = tval3cpp(A,b,p,q,opts,use_dbl) -void TVALGPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { - - // check number of arguments... - if ((nrhs < 6) || (nrhs > 9)) - mexErrMsgTxt("Wrong number of input arguments."); - if(nlhs > 2) - mexErrMsgTxt("Too many output arguments."); - - // assign input arguments... - const mxArray *A = prhs[0]; - const mxArray *b = prhs[1]; - const mxArray *p = prhs[2]; - const mxArray *q = prhs[3]; - const mxArray *r = prhs[4]; - const mxArray *opts = prhs[5]; - const mxArray *Ut = mxGetField(opts, 0, "Ut"); - const mxArray *device = (nrhs < 7) ? NULL : prhs [6]; - const mxArray *pagelocked = (nrhs < 8) ? NULL : prhs [7]; - - // assign output arguments... - mxArray **U = &plhs[0]; - mxArray **info = (nlhs < 2) ? NULL : &plhs[1]; - - // check data types and assign variables... - geometry_host *geo = NULL; - - if(!mxIsStruct(A) && !mxIsSingle(A) && - !(mxIsDouble(A) && mxIsSparse(A)) || mxIsComplex(A)) - mexErrMsgTxt("A must be a struct or a non complex dense matrix of type single or a non complex sparse matrix."); - if(!mxIsSingle(b) || mxIsComplex(b)) - mexErrMsgTxt("b must be non complex single."); - if((Ut != NULL) && (!mxIsSingle(Ut) || mxIsComplex(Ut))) - mexErrMsgTxt("opts.Ut must be non complex single."); - if(!mxIsDouble(p)) - mexErrMsgTxt("p must be of type double."); - if(!mxIsDouble(q)) - mexErrMsgTxt("q must be of type double."); - if(!mxIsDouble(r)) - mexErrMsgTxt("r must be of type double."); - if((device != NULL) && (!mxIsDouble(device))) - mexErrMsgTxt("device must be of type double."); - if((pagelocked != NULL) && (!mxIsLogical(pagelocked))) - mexErrMsgTxt("pagelocked is not a logical value."); - - int ip = (int)(*mxGetPr(p)); - int iq = (int)(*mxGetPr(q)); - int ir = (int)(*mxGetPr(r)); - int i_device = (device == 0) ? 0 : (int)(*mxGetPr(device)); - bool b_pagelocked = (pagelocked == NULL) ? true : (bool) mxGetLogicals(pagelocked)[0]; - - if(mxIsStruct(A)) { - geo = get_geometry(A); - } - - try { +TVALResult TVALGPU(int *xIdxs, int *yIdxs, double *mValues, size_t mM, size_t mN, + int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt, + int device, bool pagelocked) { + int ip = dims[0]; + int iq = dims[1]; + int ir = dims[2]; + int i_device = (device == 0) ? 0 :device; + TVALResult result; + //M is a sparse, not a struct, so geo not need + try{ HANDLE_ERROR(cudaSetDevice(i_device)); void *plb_b, *plb_Ut, *plb_A; - mat_host *mb = getMatrix(b, b_pagelocked, &plb_b); - mat_host mU(ip, iq, ir, mat_col_major, false, b_pagelocked); - mat_host *mUt = getMatrix(Ut, b_pagelocked, &plb_Ut); - - tval3_options *options = getOptions(opts); - + mat_host *mb = getMatrix(bData,bDims,pagelocked,&plb_b); + mat_host mU(ip, iq, ir, mat_col_major, false, pagelocked); + //按照前面的代码,UT为空指针 + mat_host *mUt = getMatrix(nullptr,nullptr, pagelocked, &plb_Ut); + tval3_options *options = getOptions(opt); tval3_info ti_info; - - - if(geo != NULL) { - ti_info = tval3_gpu(mU, *geo, *mb, *options, *mUt, b_pagelocked); - } else if(mxIsSparse(A)) { - sparse_mat_host *mA = getSparseMatrix(A, b_pagelocked); - ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, b_pagelocked); - delete mA; - } else { - mat_host *mA = getMatrix(A, b_pagelocked, &plb_A); - ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, b_pagelocked); - if(plb_A != NULL) - HANDLE_ERROR(cudaHostUnregister(plb_A)); - delete mA; - } - - *U = setMatrix(mU); - if(info != NULL) *info = setInfo(ti_info); - + //按照前面代码,输入的必为稀疏矩阵 + sparse_mat_host *mA = getSparseMatrix(xIdxs,yIdxs, mValues, mM, mN, nz, pagelocked); + ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, pagelocked); + delete mA; if(plb_b != NULL) HANDLE_ERROR(cudaHostUnregister(plb_b)); - /* Commented as this variable has not been registered using cudaHostRegister (TH Feb.2021) */ - //if(plb_Ut != NULL) - // HANDLE_ERROR(cudaHostUnregister(plb_Ut)); + result.data = new double[mU.len]; + std::copy(mU.data(),mU.data()+mU.len,result.data); + result.dims[0] = mU.dim_x; + result.dims[1] = mU.dim_y; + result.dims[2] = mU.dim_z; + + // if(info != NULL) *info = setInfo(ti_info); delete mb; delete mUt; - delete options; - - } catch(const std::exception &ex) { - mexErrMsgTxt(ex.what()); + delete options; } + catch(const std::exception &ex) { + result.errormsg = ex.what(); + } + return result; } diff --git a/TVALGPU/src/tval3gpu3d.h b/TVALGPU/src/tval3gpu3d.h new file mode 100644 index 0000000..093cb76 --- /dev/null +++ b/TVALGPU/src/tval3gpu3d.h @@ -0,0 +1,9 @@ +#ifndef __TVAL3GPU3D_H__ +#define __TVAL3GPU3D_H__ + +#include "tvalstruct.h" + +extern TVALResult TVALGPU(int *xIdxs, int *yIdxs, double *mValues, size_t mM, size_t mN, + int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt, + int device, bool pagelocked) ; +#endif // __TVAL3GPU3D_H__ \ No newline at end of file diff --git a/TVALGPU/src/tvalstruct.h b/TVALGPU/src/tvalstruct.h new file mode 100644 index 0000000..60d9741 --- /dev/null +++ b/TVALGPU/src/tvalstruct.h @@ -0,0 +1,26 @@ +#ifndef __TVALSTRUCT_H__ +#define __TVALSTRUCT_H__ + +#include +#include + +struct TVALOptions{ + float tol; + size_t maxit; + float TVnorm; + bool nonneg; + bool disp; + bool bent; + float mu0; + float mu; + float beta0; + float beta; + bool isreal = true; + +}; +struct TVALResult{ + double* data = nullptr; + int dims[3]{0}; + std::string errormsg; +}; +#endif // __TVALSTRUCT_H__ \ No newline at end of file diff --git a/cmake/URDependsConfig.cmake b/cmake/URDependsConfig.cmake index d6b5f99..481e21c 100644 --- a/cmake/URDependsConfig.cmake +++ b/cmake/URDependsConfig.cmake @@ -13,6 +13,6 @@ set_target_properties(URDepends::SignalProcess PROPERTIES IMPORTED_LOCATION "${_ add_library(URDepends::TransDetection SHARED IMPORTED) set_target_properties(URDepends::TransDetection PROPERTIES IMPORTED_LOCATION "${_DIR}/lib/libTranDetection.so") add_library(URDepends::TVALGPU SHARED IMPORTED) -set_target_properties(URDepends::TVALGPU PROPERTIES IMPORTED_LOCATION "${_DIR}/lib/libeiTVALGPU.so") +set_target_properties(URDepends::TVALGPU PROPERTIES IMPORTED_LOCATION "${_DIR}/lib/libTVALGPU.so") set(URDepends_FOUND ON) message(URDepends Found) \ No newline at end of file