Add CudaMatrix.
This commit is contained in:
@@ -3,6 +3,11 @@ project(Aurora)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
|
||||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
|
||||
enable_language(CUDA)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
find_package (OpenMP REQUIRED)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
|
||||
@@ -25,6 +30,14 @@ target_include_directories(Aurora PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_IN
|
||||
target_link_libraries(Aurora PUBLIC $<LINK_ONLY:MKL::MKL>)
|
||||
target_link_libraries(Aurora PUBLIC OpenMP::OpenMP_CXX)
|
||||
target_link_libraries(Aurora PUBLIC matio)
|
||||
|
||||
target_include_directories(Aurora PRIVATE ./src /usr/local/cuda/include)
|
||||
set_target_properties(Aurora PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_compile_options(Aurora PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||
-arch=sm_75
|
||||
>)
|
||||
target_link_libraries(Aurora PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
||||
|
||||
find_package(GTest REQUIRED)
|
||||
INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIRS})
|
||||
include_directories(./src/util)
|
||||
@@ -41,5 +54,12 @@ target_link_libraries(Aurora_Test PUBLIC $<LINK_ONLY:MKL::MKL>)
|
||||
target_link_libraries(Aurora_Test PUBLIC OpenMP::OpenMP_CXX)
|
||||
target_link_libraries(Aurora_Test PUBLIC matio)
|
||||
target_link_libraries(Aurora_Test PUBLIC ${GTEST_BOTH_LIBRARIES} )
|
||||
|
||||
target_include_directories(Aurora_Test PRIVATE ./src /usr/local/cuda/include)
|
||||
set_target_properties(Aurora_Test PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_compile_options(Aurora_Test PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||
-arch=sm_75
|
||||
>)
|
||||
target_link_libraries(Aurora_Test PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
||||
gtest_discover_tests(Aurora_Test )
|
||||
#target_link_libraries(CreateMatchedFilter PRIVATE TBB::tbb)
|
||||
238
src/CudaMatrix.cpp
Normal file
238
src/CudaMatrix.cpp
Normal file
@@ -0,0 +1,238 @@
|
||||
#include "CudaMatrix.h"
|
||||
|
||||
#include "Function.h"
|
||||
#include "Matrix.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <cstddef>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
using namespace Aurora;
|
||||
|
||||
CudaMatrix::CudaMatrix(std::shared_ptr<float> aData, std::vector<int> aInfo, ValueType aValueType)
|
||||
: mValueType(aValueType)
|
||||
, mData(aData)
|
||||
, mInfo(aInfo)
|
||||
{
|
||||
size_t infoSize = mInfo.size();
|
||||
for(; infoSize<3; ++infoSize)
|
||||
{
|
||||
mInfo.push_back(1);
|
||||
}
|
||||
}
|
||||
|
||||
bool CudaMatrix::isNull() const
|
||||
{
|
||||
return !mData || mInfo.empty();
|
||||
}
|
||||
|
||||
bool CudaMatrix::isNan() const
|
||||
{
|
||||
for(size_t i=0; i<getDataSize(); ++i)
|
||||
{
|
||||
if(mData.get()[i] == mData.get()[i])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CudaMatrix::isScalar() const
|
||||
{
|
||||
return (getDimSize(0) == 1 &&
|
||||
getDimSize(1) == 1 &&
|
||||
getDimSize(2) < 2);
|
||||
}
|
||||
|
||||
float CudaMatrix::getScalar() const
|
||||
{
|
||||
if (isNull()) return 0.0;
|
||||
if (isNull()) return 0.0;
|
||||
return getData()[0];
|
||||
}
|
||||
|
||||
bool CudaMatrix::isVector() const
|
||||
{
|
||||
if (getDimSize(2)>1) return false;
|
||||
if (isScalar()) return false;
|
||||
return getDimSize(0) == 1 ||
|
||||
getDimSize(1) == 1;
|
||||
}
|
||||
|
||||
int CudaMatrix::getDims() const
|
||||
{
|
||||
if(mInfo[2] > 1)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
float *CudaMatrix::getData() const
|
||||
{
|
||||
return mData.get();
|
||||
}
|
||||
|
||||
int CudaMatrix::getDimSize(int aIndex) const
|
||||
{
|
||||
if (aIndex >= 0 && aIndex < 3) {
|
||||
return mInfo.at(aIndex);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t CudaMatrix::getDataSize() const
|
||||
{
|
||||
if (!mData.get())return 0;
|
||||
size_t ret = 1;
|
||||
for (auto v: mInfo) {
|
||||
ret *= v;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void CudaMatrix::forceReshape(int rows, int columns, int slices)
|
||||
{
|
||||
mInfo = {rows,columns,slices};
|
||||
}
|
||||
|
||||
bool CudaMatrix::compareShape(const CudaMatrix &other) const
|
||||
{
|
||||
if (mInfo[2] == 1 && other.mInfo[2] == 1) {
|
||||
if (mInfo[0]==1 && other.mInfo[1] == 1 && mInfo[1] == other.mInfo[0]) return true;
|
||||
if (mInfo[1]==1 && other.mInfo[0] == 1 && mInfo[0] == other.mInfo[1]) return true;
|
||||
}
|
||||
for (int i = 0; i < mInfo.size(); ++i) {
|
||||
if (mInfo[i] != other.mInfo[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
CudaMatrix CudaMatrix::fromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType)
|
||||
{
|
||||
if (!aData)
|
||||
{
|
||||
return CudaMatrix();
|
||||
}
|
||||
std::vector<int> vector{aRows, aCols, aSlices};
|
||||
CudaMatrix ret({aData, gpuFree}, vector, aType);
|
||||
return ret;
|
||||
}
|
||||
|
||||
CudaMatrix CudaMatrix::copyFromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType)
|
||||
{
|
||||
if (!aData)
|
||||
{
|
||||
return CudaMatrix();
|
||||
}
|
||||
float* data = nullptr;
|
||||
unsigned long long size = aRows * aCols * aSlices * aType;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
cudaMemcpy(data, aData, sizeof(float) * size, cudaMemcpyDeviceToDevice);
|
||||
std::vector<int> vector{aRows, aCols, aSlices};
|
||||
return CudaMatrix({data, gpuFree}, vector, aType);
|
||||
}
|
||||
|
||||
CudaMatrix CudaMatrix::deepCopy() const
|
||||
{
|
||||
float* data = nullptr;
|
||||
unsigned long long size = getDataSize() * getValueType();
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToDevice);
|
||||
return CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
||||
}
|
||||
|
||||
Matrix CudaMatrix::toHostMatrix() const
|
||||
{
|
||||
unsigned long long size = getDataSize() * getValueType();
|
||||
float* data = new float[size];
|
||||
cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToHost);
|
||||
return Matrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
||||
}
|
||||
|
||||
CudaMatrix CudaMatrix::block(int aDim,int aBeginIndex, int aEndIndex) const
|
||||
{
|
||||
if(aDim > 2)
|
||||
{
|
||||
std::cerr<<"CudaMatrix block only support 1D-3D data!"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (isVector() && aDim == 0 && getDimSize(1)>1)
|
||||
{
|
||||
aDim = 1;
|
||||
}
|
||||
|
||||
if (aBeginIndex>=getDimSize(aDim) || aBeginIndex<0)
|
||||
{
|
||||
std::cerr<<"CudaMatrix block BeginIndx error!BeginIndx:"<<aBeginIndex<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
if (aEndIndex>=getDimSize(aDim) || aEndIndex<0)
|
||||
{
|
||||
std::cerr<<"CudaMatrix block EndIndex error!EndIndex:"<<aEndIndex<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
if (aEndIndex < aBeginIndex)
|
||||
{
|
||||
std::cerr<<"CudaMatrix block EndIndex can not less than BeginIndex ! BeginIndex:"<<aBeginIndex <<", EndIndex:"<<aEndIndex<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
int dimLength = aEndIndex - aBeginIndex + 1;
|
||||
int dataSize = getDataSize()/getDimSize(aDim)*dimLength;
|
||||
float * dataOutput = nullptr;
|
||||
cudaMalloc((void**)&dataOutput, sizeof(float) * dataSize * getValueType());
|
||||
int colStride = getDimSize(0);
|
||||
int sliceStride = getDimSize(0)*getDimSize(1);
|
||||
|
||||
switch (aDim)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
int colStride2 = dimLength;
|
||||
int sliceStride2 = dimLength*getDimSize(1);
|
||||
for (size_t i = 0; i < getDimSize(2); i++)
|
||||
{
|
||||
for (size_t j = 0; j < getDimSize(1); j++)
|
||||
{
|
||||
cudaMemcpy(dataOutput + (colStride2 * j + i * sliceStride2)*getValueType(),
|
||||
mData.get()+ (aBeginIndex + j * colStride + i * sliceStride)*getValueType(),
|
||||
sizeof(float) * colStride2*getValueType(), cudaMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
return CudaMatrix::fromRawData(dataOutput,dimLength,getDimSize(1),getDimSize(2),getValueType());
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
int colStride2 = getDimSize(0);
|
||||
int copySize = dimLength*getDimSize(0);
|
||||
for (size_t i = 0; i < getDimSize(2); i++)
|
||||
{
|
||||
cudaMemcpy(dataOutput + getValueType()*(i * copySize),
|
||||
mData.get() + getValueType()*(aBeginIndex * colStride + i * sliceStride),
|
||||
sizeof(float) * copySize*getValueType(), cudaMemcpyDeviceToDevice);
|
||||
}
|
||||
return CudaMatrix::fromRawData(dataOutput,getDimSize(0),dimLength,getDimSize(2),getValueType());
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
int copySize = dimLength*sliceStride;
|
||||
cudaMemcpy(dataOutput,
|
||||
mData.get() + aBeginIndex * sliceStride*getValueType(),
|
||||
sizeof(float) * copySize*getValueType(), cudaMemcpyDeviceToDevice);
|
||||
return CudaMatrix::fromRawData(dataOutput,getDimSize(0),getDimSize(1),dimLength,getValueType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float value)
|
||||
{
|
||||
if(aDim>2 )
|
||||
{
|
||||
std::cerr<<"CudaMatrix block only support 1D-3D data!"<<std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
239
src/CudaMatrix.h
Normal file
239
src/CudaMatrix.h
Normal file
@@ -0,0 +1,239 @@
|
||||
#ifndef CUDAMATRIX_H
|
||||
#define CUDAMATRIX_H
|
||||
|
||||
#include "Matrix.h"
|
||||
|
||||
|
||||
namespace Aurora
|
||||
{
|
||||
class CudaMatrix
|
||||
{
|
||||
public:
|
||||
explicit CudaMatrix(std::shared_ptr<float> aData = std::shared_ptr<float>(),
|
||||
std::vector<int> aInfo = std::vector<int>(),
|
||||
ValueType aValueType = Normal);
|
||||
|
||||
/**
|
||||
* Create from a Raw data(float array).
|
||||
* Use Raw data which create like new float[size]() as a data source
|
||||
* and the share_ptr's deleter will be std::default_delete<float[]>
|
||||
* @param data
|
||||
* @param rows
|
||||
* @param cols
|
||||
* @param slices
|
||||
* @param type
|
||||
* @return
|
||||
*/
|
||||
static CudaMatrix fromRawData(float *data, int rows, int cols = 1, int slices = 1, ValueType type = Normal);
|
||||
|
||||
/**
|
||||
* Create from a Raw data(float array) with copy the data to a new mkl memory.
|
||||
* @param data
|
||||
* @param rows
|
||||
* @param cols
|
||||
* @param slices
|
||||
* @param type
|
||||
* @return
|
||||
*/
|
||||
static CudaMatrix copyFromRawData(float *data, int rows, int cols = 1, int slices = 1, ValueType type = Normal);
|
||||
|
||||
/**
|
||||
* 深拷贝操作
|
||||
* @return 深拷贝的Matrix对象
|
||||
*/
|
||||
CudaMatrix deepCopy() const;
|
||||
|
||||
// add
|
||||
CudaMatrix operator+(float aScalar) const;
|
||||
friend CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix);
|
||||
friend CudaMatrix& operator+(float aScalar, CudaMatrix &&aMatrix);
|
||||
friend CudaMatrix& operator+(CudaMatrix &&aMatrix,float aScalar);
|
||||
CudaMatrix operator+(const CudaMatrix &aMatrix) const;
|
||||
CudaMatrix operator+(CudaMatrix &&aMatrix) const;
|
||||
friend CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther);
|
||||
|
||||
// sub
|
||||
CudaMatrix operator-(float aScalar) const;
|
||||
friend CudaMatrix operator-(float aScalar, const CudaMatrix &aMatrix);
|
||||
friend CudaMatrix& operator-(float aScalar, CudaMatrix &&aMatrix);
|
||||
friend CudaMatrix& operator-(CudaMatrix &&aMatrix,float aScalar);
|
||||
CudaMatrix operator-(const CudaMatrix &aMatrix) const;
|
||||
CudaMatrix operator-(CudaMatrix &&aMatrix) const;
|
||||
friend CudaMatrix operator-(CudaMatrix &&aMatrix,CudaMatrix &aOther);
|
||||
|
||||
//negetive
|
||||
friend CudaMatrix operator-(CudaMatrix &&aMatrix);
|
||||
friend CudaMatrix operator-(const CudaMatrix &aMatrix);
|
||||
|
||||
// mul
|
||||
CudaMatrix operator*(float aScalar) const;
|
||||
friend CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix);
|
||||
friend CudaMatrix& operator*(float aScalar, CudaMatrix &&aMatrix);
|
||||
friend CudaMatrix& operator*(CudaMatrix &&aMatrix,float aScalar);
|
||||
CudaMatrix operator*(const CudaMatrix &aMatrix) const;
|
||||
CudaMatrix operator*(CudaMatrix &&aMatrix) const;
|
||||
friend CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther);
|
||||
|
||||
// div
|
||||
CudaMatrix operator/(float aScalar) const;
|
||||
friend CudaMatrix operator/(float aScalar, const CudaMatrix &aMatrix);
|
||||
friend CudaMatrix& operator/(float aScalar, CudaMatrix &&aMatrix);
|
||||
friend CudaMatrix& operator/(CudaMatrix &&aMatrix,float aScalar);
|
||||
CudaMatrix operator/(const CudaMatrix &aMatrix) const;
|
||||
CudaMatrix operator/(CudaMatrix &&aMatrix) const;
|
||||
friend CudaMatrix operator/(CudaMatrix &&aMatrix, CudaMatrix &aOther);
|
||||
|
||||
// pow
|
||||
CudaMatrix operator^(int times) const;
|
||||
friend CudaMatrix operator^(CudaMatrix &&aMatrix,int times);
|
||||
|
||||
//compare
|
||||
CudaMatrix operator>(float aScalar) const;
|
||||
friend CudaMatrix operator>(float aScalar, const CudaMatrix &aMatrix);
|
||||
CudaMatrix operator>(const CudaMatrix &aMatrix) const;
|
||||
|
||||
CudaMatrix operator<(float aScalar) const;
|
||||
friend CudaMatrix operator<(float aScalar, const CudaMatrix &aMatrix);
|
||||
CudaMatrix operator<(const CudaMatrix &aMatrix) const;
|
||||
|
||||
CudaMatrix operator>=(float aScalar) const;
|
||||
friend CudaMatrix operator>=(float aScalar, const CudaMatrix &aMatrix);
|
||||
CudaMatrix operator>=(const CudaMatrix &aMatrix) const;
|
||||
|
||||
CudaMatrix operator<=(float aScalar) const;
|
||||
friend CudaMatrix operator<=(float aScalar, const CudaMatrix &aMatrix);
|
||||
CudaMatrix operator<=(const CudaMatrix &aMatrix) const;
|
||||
|
||||
CudaMatrix operator==(float aScalar) const;
|
||||
friend CudaMatrix operator==(float aScalar, const CudaMatrix &aMatrix);
|
||||
CudaMatrix operator==(const CudaMatrix &aMatrix) const;
|
||||
|
||||
CudaMatrix operator!=(float aScalar) const;
|
||||
friend CudaMatrix operator!=(float aScalar, const CudaMatrix &aMatrix);
|
||||
CudaMatrix operator!=(const CudaMatrix &aMatrix) const;
|
||||
|
||||
// sub
|
||||
float& operator[](size_t index);
|
||||
float operator[](size_t index) const;
|
||||
|
||||
/**
|
||||
* 切块操作
|
||||
*
|
||||
* @param aDim 需要切块的维度,
|
||||
* @param aBeginIndx 起始索引,包含
|
||||
* @param aEndIndex 终止索引,包含
|
||||
* @return Matrix 返回矩阵
|
||||
*/
|
||||
CudaMatrix block(int aDim,int aBeginIndx, int aEndIndex) const;
|
||||
|
||||
bool setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float value);
|
||||
bool setBlockComplexValue(int aDim,int aBeginIndx, int aEndIndex,std::complex<float> value);
|
||||
|
||||
|
||||
bool setBlock(int aDim,int aBeginIndx, int aEndIndex,const CudaMatrix& src);
|
||||
|
||||
/**
|
||||
* 矩阵乘法
|
||||
* @attention 目前只支持矩阵乘向量
|
||||
* @param aOther
|
||||
* @return Matrix
|
||||
*/
|
||||
CudaMatrix mul(const CudaMatrix& aOther) const;
|
||||
|
||||
/**
|
||||
* 矩阵乘法
|
||||
* @attention 目前只支持矩阵乘向量
|
||||
* @param aOther
|
||||
* @return Matrix
|
||||
*/
|
||||
CudaMatrix mul(CudaMatrix&& aOther) const;
|
||||
|
||||
/**
|
||||
* print matrix , only support 2d matrix now
|
||||
*/
|
||||
void printf();
|
||||
|
||||
void printfShape();
|
||||
|
||||
bool isScalar() const;
|
||||
|
||||
float getScalar() const;
|
||||
|
||||
/**
|
||||
* Get is the Matrix's data is empty or size is zero.
|
||||
* @return
|
||||
*/
|
||||
bool isNull() const;
|
||||
|
||||
bool isNan() const;
|
||||
|
||||
bool isVector() const;
|
||||
|
||||
bool isMKLAllocated() const;
|
||||
|
||||
/**
|
||||
* Get dimension count of the matrix
|
||||
* @return dimension count
|
||||
*/
|
||||
int getDims() const;
|
||||
|
||||
float *getData() const;
|
||||
|
||||
int getDimSize(int index) const;
|
||||
|
||||
/**
|
||||
* Compare matrix shape
|
||||
* @param other matrix
|
||||
* @return is identity
|
||||
*/
|
||||
bool compareShape(const CudaMatrix& other) const;
|
||||
|
||||
/**
|
||||
* Get the data size
|
||||
* @return the unit count of the matrix
|
||||
*/
|
||||
size_t getDataSize() const;
|
||||
|
||||
/**
|
||||
* Get Value type as normal and complex,
|
||||
* complex use std::complex<float>,
|
||||
* it's contains two float value.
|
||||
* @return
|
||||
*/
|
||||
ValueType getValueType() const {
|
||||
return mValueType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Value type as normal and complex,
|
||||
* @return
|
||||
*/
|
||||
void setValueType(ValueType aValueType) {
|
||||
mValueType = aValueType;
|
||||
}
|
||||
|
||||
/**
|
||||
* return true if the valueType is complex,
|
||||
* @return
|
||||
*/
|
||||
bool isComplex() const {
|
||||
if(mValueType == Complex)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void forceReshape(int rows, int columns, int slices);
|
||||
|
||||
Matrix toHostMatrix() const;
|
||||
|
||||
|
||||
private:
|
||||
ValueType mValueType = Normal;
|
||||
std::shared_ptr<float> mData;
|
||||
std::vector<int> mInfo;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -92,6 +92,11 @@ Aurora::Matrix MatlabReader::read(const std::string& aFieldName)
|
||||
std::copy((float*)matvar->data, (float*)matvar->data + matrixSize, matrixData);
|
||||
break;
|
||||
}
|
||||
case MAT_T_DOUBLE:
|
||||
{
|
||||
std::copy((double*)matvar->data, (double*)matvar->data + matrixSize, matrixData);
|
||||
break;
|
||||
}
|
||||
}
|
||||
result = Aurora::Matrix::fromRawData(matrixData,rows,columns,slices);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "Matrix.h"
|
||||
#include "CudaMatrix.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
@@ -393,24 +394,12 @@ namespace Aurora {
|
||||
return ret;
|
||||
}
|
||||
|
||||
Matrix Matrix::NewCuda(float *data, int rows, int cols, int slices, ValueType type)
|
||||
CudaMatrix Matrix::toDeviceMatrix() const
|
||||
{
|
||||
if (!data) return Matrix();
|
||||
std::vector<int> vector(3);
|
||||
vector[0]=rows;
|
||||
vector[1] = (cols > 0?cols:1);
|
||||
vector[2] = (slices > 0?slices:1);
|
||||
Matrix ret({data, gpuFree}, vector);
|
||||
if (type != Normal)ret.setValueType(type);
|
||||
ret.mCuda_Allocated = true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
Matrix Matrix::toHostMatrix() const
|
||||
{
|
||||
float* data = new float[getDataSize()];
|
||||
cudaMemcpy(data, mData.get(), sizeof(float) * getDataSize() * (mValueType == Normal ? 1 : 2), cudaMemcpyDeviceToHost);
|
||||
return fromRawData(data, mInfo[0], mInfo[1], mInfo[2], getValueType());
|
||||
float* deviceData = nullptr;
|
||||
cudaMalloc((void**)&deviceData, sizeof(float) * getDataSize() * getValueType() );
|
||||
cudaMemcpy(deviceData, mData.get(), sizeof(float) * getDataSize() * getValueType(), cudaMemcpyHostToDevice);
|
||||
return CudaMatrix::fromRawData(deviceData, mInfo[0], mInfo[1], mInfo[2], getValueType());
|
||||
}
|
||||
|
||||
Matrix Matrix::New(float *data, const Matrix &shapeMatrix) {
|
||||
|
||||
@@ -12,6 +12,7 @@ namespace Aurora {
|
||||
Complex
|
||||
};
|
||||
const int $ = -1;
|
||||
class CudaMatrix;
|
||||
|
||||
class Matrix {
|
||||
public:
|
||||
@@ -87,7 +88,6 @@ namespace Aurora {
|
||||
*/
|
||||
static Matrix New(float *data, const Matrix &shapeMatrix);
|
||||
|
||||
static Matrix NewCuda(float *data, int rows, int cols = 1, int slices = 1, ValueType type = Normal);
|
||||
/**
|
||||
* New a mkl calculate based Matrix
|
||||
* @attention Memory are allocate by Aurora:malloc function
|
||||
@@ -288,7 +288,7 @@ namespace Aurora {
|
||||
|
||||
void forceReshape(int rows, int columns, int slices);
|
||||
|
||||
Matrix toHostMatrix() const;
|
||||
CudaMatrix toDeviceMatrix() const;
|
||||
|
||||
|
||||
private:
|
||||
@@ -296,7 +296,6 @@ namespace Aurora {
|
||||
std::shared_ptr<float> mData;
|
||||
std::vector<int> mInfo;
|
||||
bool mMKL_Allocated = false;
|
||||
bool mCuda_Allocated = false;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user