Commit source

2023-05-18 16:04:27 +08:00
parent 88cf81e4ea
commit c6cd188732
83 changed files with 39921 additions and 0 deletions
--- a/TVALGPU/src/container_host.cpp
+++ b/TVALGPU/src/container_host.cpp
@@ -0,0 +1,207 @@
+#include "container_host.h"
+
+
+// ------------------------------------------------------------------------
+// member functions, etc. class mat_host
+// ------------------------------------------------------------------------
+
+mat_host::mat_host(int l_y, int l_x, int l_z, T_mat_format storage, bool init, bool mem_pagelocked, unsigned int flags): pbuf(NULL),
+	dim_y(l_y), dim_x(l_x), dim_z(l_z), len(l_y*l_x*l_z), alloc_info(allocated_in_constructor), format(storage),
+	pagelocked(mem_pagelocked), alloc_flags(flags) {
+	if(len > 0) {
+		if(pagelocked)
+			HANDLE_ERROR(cudaHostAlloc(&pbuf, len*sizeof(float), alloc_flags));
+		else
+			pbuf = new float[len];
+		if(init)
+			memset(pbuf, 0, len*sizeof(float));
+	}
+};
+
+
+mat_host::mat_host(int num_elements, bool init, bool mem_pagelocked, unsigned int flags): pbuf(NULL), dim_y(num_elements),
+	dim_x(1), dim_z(1), len(num_elements), alloc_info(allocated_in_constructor), format(mat_col_major), pagelocked(mem_pagelocked),
+	alloc_flags(flags) {
+	if(len > 0) {
+		if(pagelocked)
+			HANDLE_ERROR(cudaHostAlloc(&pbuf, len*sizeof(float), alloc_flags));
+		else
+			pbuf = new float[len];
+		if(init)
+			memset(pbuf, 0, len*sizeof(float));
+	}
+};
+
+
+mat_host::mat_host(int l_y, int l_x, int l_z, float *buffer, T_mat_format storage, bool mem_pagelocked, unsigned int flags):
+	pbuf(buffer), dim_y(l_y), dim_x(l_x), dim_z(l_z), len(l_y*l_x*l_z), alloc_info(preallocated_buffer), format(storage),
+	pagelocked(mem_pagelocked), alloc_flags(flags) {};
+
+
+mat_host::mat_host(int num_elements, float *buffer, bool mem_pagelocked, unsigned int flags): pbuf(buffer), dim_y(num_elements),
+	dim_x(1), dim_z(1), len(num_elements), alloc_info(preallocated_buffer), format(mat_row_major),
+	pagelocked(mem_pagelocked), alloc_flags(flags) {};
+
+
+mat_host::mat_host(const mat_host &m): pbuf(NULL), dim_y(m.dim_y), dim_x(m.dim_x), dim_z(m.dim_z), len(m.len),
+	alloc_info(allocated_in_constructor), format(m.format), pagelocked(m.pagelocked), alloc_flags(m.alloc_flags) {
+	if(len > 0) {
+		if(pagelocked)
+			HANDLE_ERROR(cudaHostAlloc(&pbuf, len*sizeof(float), alloc_flags));
+		else
+			pbuf = new float[len];
+		memcpy(pbuf, m.pbuf, len*sizeof(float));
+	}
+};
+
+
+mat_host::~mat_host() {
+	if(pbuf && (alloc_info == allocated_in_constructor)) {
+		if(pagelocked) {
+			HANDLE_ERROR(cudaFreeHost(pbuf));
+		} else {
+			delete[] pbuf;
+		}
+	}
+}
+
+
+mat_host &mat_host::operator=(const mat_host &m) throw(illegal_mat_gpu_assignment) {
+	if(m.len == len)
+		memcpy(pbuf, m.pbuf, len*sizeof(float));
+	else
+		throw illegal_mat_gpu_assignment(("Illegale Zuweisung von mat_host-Objekten!"));
+	return *this;
+}
+
+
+void mat_host::randomFill(float scl) {
+	for(int i=0; i<len; i++)
+		pbuf[i] = ((float)rand() / RAND_MAX - 0.5)/scl;
+}
+
+
+std::ostream &operator<<(std::ostream &stream, const mat_host &m) {
+	for(int z=0; z < m.dim_z; z++) {
+		stream << "\nz = " << z << ":\n\n";
+		for(int i=0; i<m.dim_y; i++) {
+			for(int j=0; j<m.dim_x; j++) {
+				if(m.format == mat_row_major)
+					stream << m[i*m.dim_x + j] << " ";
+				else
+					stream << m[i + j*m.dim_y] << " ";
+			}
+			stream << "\n";
+		}
+	}
+	return stream;
+}
+
+// ------------------------------------------------------------------------
+// member functions, etc. class sparse_mat_host
+// ------------------------------------------------------------------------
+
+
+sparse_mat_host::sparse_mat_host(int num_dim_y, int num_dim_x, int n_nonzero, T_sparse_mat_format storage_format, bool init,
+		bool mem_pagelocked, unsigned int flags):
+	p_val(NULL), p_ptr(NULL), p_ind(NULL), dim_y(num_dim_y), dim_x(num_dim_x), nnz(n_nonzero), format(storage_format),
+	alloc_info(allocated_in_constructor), pagelocked(mem_pagelocked), alloc_flags(flags)
+{
+	if(nnz > 0) {
+		int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
+
+		if(pagelocked) {
+			HANDLE_ERROR(cudaHostAlloc(&p_val, nnz*sizeof(float), alloc_flags));
+			HANDLE_ERROR(cudaHostAlloc(&p_ptr, len_ptr*sizeof(int), alloc_flags));
+			HANDLE_ERROR(cudaHostAlloc(&p_ind, nnz*sizeof(int), alloc_flags));
+		} else {
+			p_val = new float[nnz];
+			p_ptr = new int[len_ptr];
+			p_ind = new int[nnz];
+		}
+
+		if(init) {
+			memset(p_val, 0, nnz*sizeof(float));
+			memset(p_ptr, 0, (len_ptr)*sizeof(int));
+			memset(p_ind, 0, nnz*sizeof(int));
+		}
+	}
+};
+
+
+sparse_mat_host::sparse_mat_host(int num_dim_y, int num_dim_x, int n_nonzero, float *buff_val, int *buff_ptr, int *buff_ind,
+		T_sparse_mat_format storage_format, bool mem_pagelocked, unsigned int flags): p_val(buff_val), p_ptr(buff_ptr),
+		p_ind(buff_ind), dim_y(num_dim_y), dim_x(num_dim_x), nnz(n_nonzero), format(storage_format),
+		alloc_info(preallocated_buffer), pagelocked(mem_pagelocked), alloc_flags(flags) {}
+
+
+
+sparse_mat_host::sparse_mat_host(const sparse_mat_host &m): p_val(NULL), p_ptr(NULL), p_ind(NULL), dim_y(m.dim_y),
+	dim_x(m.dim_x), nnz(m.nnz), format(m.format), alloc_info(allocated_in_constructor),
+	pagelocked(m.pagelocked), alloc_flags(m.alloc_flags)
+{
+	if(nnz > 0) {
+		int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
+
+		if(pagelocked) {
+			HANDLE_ERROR(cudaHostAlloc(&p_val, nnz*sizeof(float), alloc_flags));
+			HANDLE_ERROR(cudaHostAlloc(&p_ptr, len_ptr*sizeof(int), alloc_flags));
+			HANDLE_ERROR(cudaHostAlloc(&p_ind, nnz*sizeof(int), alloc_flags));
+		} else {
+			p_val = new float[nnz];
+			p_ptr = new int[len_ptr];
+			p_ind = new int[nnz];
+		}
+		memcpy(p_val, m.p_val, nnz*sizeof(float));
+		memcpy(p_ptr, m.p_ptr, (len_ptr)*sizeof(int));
+		memcpy(p_ind, m.p_ind, nnz*sizeof(int));
+	}
+};
+
+
+sparse_mat_host::~sparse_mat_host() {
+	if(alloc_info == allocated_in_constructor) {
+		if(pagelocked) {
+			if(p_val) HANDLE_ERROR(cudaFreeHost(p_val));
+			if(p_ptr) HANDLE_ERROR(cudaFreeHost(p_ptr));
+			if(p_ind) HANDLE_ERROR(cudaFreeHost(p_ind));
+		} else {
+			if(p_val) delete[] p_val;
+			if(p_ptr) delete[] p_ptr;
+			if(p_ind) delete[] p_ind;
+		}
+	}
+}
+
+
+sparse_mat_host &sparse_mat_host::operator=(const sparse_mat_host &m) throw(illegal_mat_gpu_assignment){
+
+	if(m.nnz == nnz && m.dim_y == dim_y && m.dim_x == dim_x && m.format == format) {
+		int len_ptr = (format == sparse_mat_csc) ? dim_x + 1 : dim_y + 1;
+
+		memcpy(p_val, m.p_val, nnz*sizeof(float));
+		memcpy(p_ptr, m.p_ptr, (len_ptr)*sizeof(int));
+		memcpy(p_ind, m.p_ind, nnz*sizeof(int));
+	} else {
+		throw illegal_mat_gpu_assignment(("Illegale Zuweisung von sparse_mat_host-Objekten!"));
+	}
+
+	return *this;
+}
+
+// output operator ...
+
+std::ostream &operator<<(std::ostream &stream, const sparse_mat_host &m) {
+	mat_host tmp(m.dim_y, m.dim_x);
+	if(m.format == sparse_mat_csc) {
+		for(int c=0; c < m.dim_x; c++)
+			for(int i = m.ptr()[c]; i < m.ptr()[c+1]; i++)
+				tmp[c*m.dim_y + m.ind()[i]] = m.val()[i];
+	} else {
+		for(int r=0; r < m.dim_y; r++)
+			for(int i = m.ptr()[r]; i < m.ptr()[r+1]; i++)
+				tmp[m.ind()[i]*m.dim_y + r] = m.val()[i];
+	}
+	stream << tmp;
+	return stream;
+}
--- a/TVALGPU/src/tval3.cu
+++ b/TVALGPU/src/tval3.cu
--- a/TVALGPU/src/tval3gpu3d.cpp
+++ b/TVALGPU/src/tval3gpu3d.cpp
@@ -0,0 +1,521 @@
+#include <cuda_runtime.h>
+#include <cuda.h>
+#include <cublas_v2.h>
+#include <cusparse.h>
+#include <handle_error.h>
+
+#include <matrix.h>
+#include <mex.h>   
+#include <tval3_gpu.h>
+
+#define OUTPUT_TYPES
+
+using namespace std;
+
+// Create mat_host-object from mxArray.
+// The data buffer of the mxArray is used directly.
+// If pagelocked==true a part of the buffer gets page locked in this function
+// using cudaHostRegister and must later be unregistered using
+// cudaHostUnregister() !!
+// the address of the page locked part is returned in plb
+// if there is no part of the buffer which fulfills the alignment
+// requirements, no memory gets page locked and *plb is set to NULL
+mat_host *getMatrix(const mxArray *mi, bool pagelocked, void **plb) {
+
+    if(mi != NULL) {
+        const mwSize *dims = mxGetDimensions(mi);
+        mwSize dim_y = dims[0];
+        mwSize dim_x = dims[1];
+        mwSize dim_z = (mxGetNumberOfDimensions(mi) >= 3) ?  dims[2] : 1;
+        
+        float *mi_data = (float *)mxGetData(mi);
+                
+        if(pagelocked) {
+            // start of page locked area...
+            *plb = (void *) ((((long)mi_data + (4*1024) - 1) / (4*1024)) * 4*1024);
+            
+            //size of page locked area...
+            size_t size = (dim_y * dim_x * dim_z * sizeof(float) - ((long)*plb - (long)mi_data)) / (4*1024) * (4*1024);
+            
+            if(size > 0) {
+                HANDLE_ERROR(cudaHostRegister(*plb, size, cudaHostRegisterDefault));
+                mexPrintf("Pagelocked %i bytes. Offset: %i\n", size,
+                        ((long)*plb - (long)mi_data));
+            } else {
+                *plb = NULL;
+            }
+        } else {
+            *plb = NULL;
+        }
+        
+        mat_host *mo = new mat_host(dim_y, dim_x, dim_z, mi_data);
+
+        return mo;
+    } else {
+        mat_host *mo = new mat_host(0);
+        return mo;
+    }
+    
+}
+
+// create sparse_mat_host-object from mxArray
+sparse_mat_host *getSparseMatrix(const mxArray *mi, bool pagelocked) {
+
+    const mwSize *dims = mxGetDimensions(mi);
+    int dim_y = dims[0];
+    int dim_x = dims[1];
+    mwIndex *mi_dim_y = mxGetIr(mi);
+    mwIndex *mi_columnIndex = mxGetJc(mi);
+    int n_nonzero = (int)mxGetNzmax(mi);
+    
+    double *mi_data = (double *)mxGetData(mi);
+
+    sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero,
+            sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined);
+    
+    for(int i=0; i < dim_x + 1; i++)
+            mo->ptr()[i] = mi_columnIndex[i];
+
+    for(int i=0; i < mo->nnz; i++) {
+            mo->ind()[i] = mi_dim_y[i];
+            mo->val()[i] = (float)mi_data[i];
+    }
+    
+    return mo;
+}
+
+// get geometry_host structure from mxArray
+geometry_host *get_geometry(const mxArray *A) {
+    float scale_factor;
+    int *x_e, *y_e, *z_e, *x_r, *y_r, *z_r;
+    int ne, nr, rv_x, rv_y, rv_z;
+    mxArray *field;
+    
+    field = mxGetField(A, 0, "rv_x");
+    if(field == NULL) {
+        mexErrMsgTxt("no field rv_x in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.rv_x must be of type Int32!");
+        }
+       rv_x = *( (int *)mxGetData(field) );
+    }
+    
+    field = mxGetField(A, 0, "rv_y");
+    if(field == NULL) {
+        mexErrMsgTxt("no field rv_y in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.rv_y must be of type Int32!");
+        }
+       rv_y = *( (int *)mxGetData(field) );
+    }
+
+        field = mxGetField(A, 0, "rv_z");
+    if(field == NULL) {
+        mexErrMsgTxt("no field rv_z in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.rv_z must be of type Int32!");
+        }
+       rv_z = *( (int *)mxGetData(field) );
+    }
+        
+    field = mxGetField(A, 0, "scale_factor");
+    if(field == NULL) {
+        mexErrMsgTxt("no field scale_factor in struct A!");
+    } else {
+        if(!mxIsSingle(field)) {
+            mexErrMsgTxt("A.scale_factor must be of type single!");
+        }
+        scale_factor = *( (float *)mxGetData(field) );
+    }
+    
+    field = mxGetField(A, 0, "x_emitters");
+    if(field == NULL) {
+        mexErrMsgTxt("no field x_emitters in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.x_emitters must be of type int32!");
+        }
+        const mwSize *dims = mxGetDimensions(field);
+        if(dims[0] > 1)
+            mexErrMsgTxt("A.x_emitters must be a column-vector!");
+        ne = dims[1];
+        x_e = (int *)mxGetData(field);
+    }
+
+    field = mxGetField(A, 0, "y_emitters");
+    if(field == NULL) {
+        mexErrMsgTxt("no field y_emitters in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.y_emitters must be of type in32!");
+        }
+        const mwSize *dims = mxGetDimensions(field);
+        if(dims[0] > 1)
+            mexErrMsgTxt("A.y_emitters must be a column-vector!");
+        if(dims[1] != ne)
+            mexErrMsgTxt("A.y_emitters must be of the same size as A.x_emitters!");
+        y_e = (int *)mxGetData(field);
+    }
+    
+    field = mxGetField(A, 0, "z_emitters");
+    if(field == NULL) {
+        mexErrMsgTxt("no field z_emitters in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.z_emitters must be of type in32!");
+        }
+        const mwSize *dims = mxGetDimensions(field);
+        if(dims[0] > 1)
+            mexErrMsgTxt("A.z_emitters must be a column-vector!");
+        if(dims[1] != ne)
+            mexErrMsgTxt("A.z_emitters must be of the same size as A.x_emitters!");
+        z_e = (int *)mxGetData(field);
+    }
+    
+
+    field = mxGetField(A, 0, "x_receivers");
+    if(field == NULL) {
+        mexErrMsgTxt("no field x_receivers in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.x_receivers must be of type in32!");
+        }
+        const mwSize *dims = mxGetDimensions(field);
+        if(dims[0] > 1)
+            mexErrMsgTxt("A.x_receivers must be a column-vector!");
+        nr = dims[1];
+        x_r = (int *)mxGetData(field);
+    }
+
+    field = mxGetField(A, 0, "y_receivers");
+    if(field == NULL) {
+        mexErrMsgTxt("no field y_receivers in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.y_receivers must be of type in32!");
+        }
+        const mwSize *dims = mxGetDimensions(field);
+        if(dims[0] > 1)
+            mexErrMsgTxt("A.y_receivers must be a column-vector!");
+        if(dims[1] != nr)
+            mexErrMsgTxt("A.y_receivers must be of the same size as A.x_receivers!");
+        y_r = (int *)mxGetData(field);
+    }
+    
+    field = mxGetField(A, 0, "z_receivers");
+    if(field == NULL) {
+        mexErrMsgTxt("no field z_receivers in struct A!");
+    } else {
+        if(!mxIsInt32(field)) {
+            mexErrMsgTxt("A.z_receivers must be of type int32!");
+        }
+        const mwSize *dims = mxGetDimensions(field);
+        if(dims[0] > 1)
+            mexErrMsgTxt("A.z_receivers must be a column-vector!");
+        if(dims[1] != nr)
+            mexErrMsgTxt("A.z_receivers must be of the same size as A.z_receivers!");
+        z_r = (int *)mxGetData(field);
+    }
+    
+    geometry_host *geom = new geometry_host;
+    geom->num_emitters = ne;
+    geom->num_receivers = nr;
+    geom->x_emitters = x_e;
+    geom->y_emitters = y_e;
+    geom->z_emitters = z_e;
+    geom->x_receivers = x_r;
+    geom->y_receivers = y_r;
+    geom->z_receivers = z_r;
+    geom->scale_factor = scale_factor;
+    geom->rv_x = rv_x;
+    geom->rv_y = rv_y;
+    geom->rv_z = rv_z;
+    
+    return geom;
+}
+
+// create tval3_options struct
+tval3_options *getOptions(const mxArray *optsi) {
+
+    tval3_options *optso = new tval3_options;
+    
+    if(!mxIsStruct(optsi)) {
+        mexPrintf("Warning: opts is not a structure.");
+        return optso;
+    }
+    
+    mxArray *field;
+    
+    field = mxGetField(optsi, 0, "mu");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.mu is not of type double (ignored).");
+        else
+            optso->mu = (float) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "mu0");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.mu0 is not of type double (ignored).");
+        else
+            optso->mu0 = (float) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "beta");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.beta is not of type double (ignored).");
+        else
+            optso->beta = (float) *mxGetPr(field);
+    }
+    
+    field = mxGetField(optsi, 0, "beta0");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.beta0 is not of type double (ignored).");
+        else
+            optso->beta0 = (float) *mxGetPr(field);
+    }
+    
+    field = mxGetField(optsi, 0, "rate_cnt");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.rate_cnt is not of type double (ignored).");
+        else
+            optso->rate_cnt = (float) *mxGetPr(field);
+    }
+    
+    field = mxGetField(optsi, 0, "tol");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.tol is not of type double (ignored).");
+        else
+            optso->tol = (float) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "tol_inn");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.tol_inn is not of type double (ignored).");
+        else
+
+            optso->tol_inn = (float) *mxGetPr(field);
+    }
+    
+    field = mxGetField(optsi, 0, "maxcnt");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.maxcnt is not of type double (ignored).");
+        else
+            optso->maxcnt = (int) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "maxit");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.maxit is not of type double (ignored).");
+        else
+            optso->maxit = (int) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "c");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.c is not of type double (ignored).");
+        else
+            optso->c = (float) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "gamma");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.gamma is not of type double (ignored).");
+        else
+            optso->gamma = (float) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "gam");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.gam is not of type double (ignored).");
+        else
+            optso->gam = (float) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "rate_gam");
+    if(field != NULL) {
+        if(!mxIsDouble(field))
+            mexPrintf("Warning: opts.rate_gam is not of type double (ignored).");
+        else
+            optso->rate_gam = (float) *mxGetPr(field);
+    }
+
+    field = mxGetField(optsi, 0, "isreal");
+    if(field != NULL) {
+        if(!mxIsLogical(field))
+            mexPrintf("Warning: opts.isreal is not of type logical (ignored).");
+        else
+            optso->isreal = (bool) *mxGetLogicals(field);
+    }
+
+    field = mxGetField(optsi, 0, "nonneg");
+    if(field != NULL) {
+        if(!mxIsLogical(field))
+            mexPrintf("Warning: opts.nonneg is not of type logical (ignored).");
+        else
+            optso->nonneg = (bool) *mxGetLogicals(field);
+    }
+    
+    return optso;
+}
+
+// create mxArray from struct tval3_info
+mxArray *setInfo(tval3_info &info) {
+    const char *field_names[] = {"total_iters", "outer_iters", "rel_chg", "rel_error", "secs"};
+    
+    mxArray *output = mxCreateStructMatrix(1, 1, 5, field_names);
+    
+    mxArray *value;
+
+    value = mxCreateDoubleMatrix(1,1,mxREAL);
+    *mxGetPr(value)  = info.total_iters;
+    mxSetField(output, 0, "total_iters", value);
+
+    value = mxCreateDoubleMatrix(1,1,mxREAL);
+    *mxGetPr(value)  = info.outer_iters;
+    mxSetField(output, 0, "outer_iters", value);
+
+    value = mxCreateDoubleMatrix(1,1,mxREAL);
+    *mxGetPr(value)  = info.rel_chg;
+    mxSetField(output, 0, "rel_chg", value);
+
+    value = mxCreateDoubleMatrix(1,1,mxREAL);
+    *mxGetPr(value)  = info.rel_error;
+    mxSetField(output, 0, "rel_error", value);
+
+    value = mxCreateDoubleMatrix(1,1,mxREAL);
+    *mxGetPr(value)  = info.secs;
+    mxSetField(output, 0, "secs", value);
+    
+    return output;
+}
+
+// create mxArray from  mat-object
+mxArray *setMatrix(const mat_host &mi) {
+
+    mwSize dimensions[3];
+    dimensions[0] = mi.dim_y;
+    dimensions[1] = mi.dim_x;
+    dimensions[2] = mi.dim_z;
+    mxArray *mo = mxCreateNumericArray(3, dimensions, mxSINGLE_CLASS, mxREAL);
+    float *mo_data = (float *) mxGetData(mo);
+    
+    memcpy(mo_data, mi.data(), mi.len * sizeof(float));
+    
+    return mo;
+}
+
+// [U, out] = tval3cpp(A,b,p,q,opts,use_dbl)
+void TVALGPU(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+
+    // check number of arguments...
+    if ((nrhs < 6) || (nrhs > 9))
+        mexErrMsgTxt("Wrong number of input arguments.");
+    if(nlhs > 2)
+        mexErrMsgTxt("Too many output arguments.");
+    
+    // assign input arguments...
+    const mxArray *A = prhs[0];
+    const mxArray *b = prhs[1];
+    const mxArray *p = prhs[2];
+    const mxArray *q = prhs[3];
+    const mxArray *r = prhs[4];
+    const mxArray *opts = prhs[5];
+    const mxArray *Ut = mxGetField(opts, 0, "Ut");
+    const mxArray *device = (nrhs < 7) ? NULL : prhs [6];
+    const mxArray *pagelocked = (nrhs < 8) ? NULL : prhs [7];
+    
+    // assign output arguments...
+    mxArray **U = &plhs[0];
+    mxArray **info = (nlhs < 2) ? NULL : &plhs[1];
+    
+    // check data types and assign variables...
+    geometry_host *geo = NULL;
+    
+    if(!mxIsStruct(A) && !mxIsSingle(A) &&
+            !(mxIsDouble(A) && mxIsSparse(A)) || mxIsComplex(A))
+        mexErrMsgTxt("A must be a struct or a non complex dense matrix of type single or a non complex sparse matrix.");
+    if(!mxIsSingle(b) || mxIsComplex(b))
+        mexErrMsgTxt("b must be non complex single.");
+    if((Ut != NULL) && (!mxIsSingle(Ut) || mxIsComplex(Ut)))
+        mexErrMsgTxt("opts.Ut must be non complex single.");
+    if(!mxIsDouble(p))
+        mexErrMsgTxt("p must be of type double.");
+    if(!mxIsDouble(q))
+        mexErrMsgTxt("q must be of type double.");
+    if(!mxIsDouble(r))
+        mexErrMsgTxt("r must be of type double.");
+    if((device != NULL) && (!mxIsDouble(device)))
+        mexErrMsgTxt("device must be of type double.");
+    if((pagelocked != NULL) && (!mxIsLogical(pagelocked)))
+        mexErrMsgTxt("pagelocked is not a logical value.");
+
+    int ip = (int)(*mxGetPr(p));
+    int iq = (int)(*mxGetPr(q));
+    int ir = (int)(*mxGetPr(r));
+    int i_device = (device == 0) ? 0 : (int)(*mxGetPr(device));
+    bool b_pagelocked = (pagelocked == NULL) ? true : (bool) mxGetLogicals(pagelocked)[0]; 
+    
+    if(mxIsStruct(A)) {
+        geo = get_geometry(A);
+    }
+
+    try {
+        HANDLE_ERROR(cudaSetDevice(i_device));
+        
+        void *plb_b, *plb_Ut, *plb_A;
+        mat_host *mb = getMatrix(b, b_pagelocked, &plb_b);
+        mat_host mU(ip, iq, ir, mat_col_major, false, b_pagelocked);
+        mat_host *mUt = getMatrix(Ut, b_pagelocked, &plb_Ut);
+
+        tval3_options *options = getOptions(opts);
+
+        tval3_info ti_info;
+        
+        
+        if(geo != NULL) {
+            ti_info = tval3_gpu(mU, *geo, *mb, *options, *mUt, b_pagelocked);
+        } else if(mxIsSparse(A)) {
+            sparse_mat_host *mA = getSparseMatrix(A, b_pagelocked);
+            ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, b_pagelocked);
+            delete mA;
+        } else {
+            mat_host *mA = getMatrix(A, b_pagelocked, &plb_A);
+            ti_info = tval3_gpu(mU, *mA, *mb, *options, *mUt, b_pagelocked);
+            if(plb_A != NULL)
+                HANDLE_ERROR(cudaHostUnregister(plb_A));
+            delete mA;
+        }
+
+        *U = setMatrix(mU);
+        if(info != NULL) *info = setInfo(ti_info);
+
+        if(plb_b != NULL)
+            HANDLE_ERROR(cudaHostUnregister(plb_b));
+        
+        /* Commented as this variable has not been registered using cudaHostRegister (TH Feb.2021) */
+        //if(plb_Ut != NULL)
+        //    HANDLE_ERROR(cudaHostUnregister(plb_Ut));
+		
+        delete mb; 
+		delete mUt; 
+		delete options;    
+		
+    } catch(const std::exception &ex) {
+        mexErrMsgTxt(ex.what());
+    }
+}