#ifdef USE_CUDA #include "CudaMatrix.h" #include "Function.h" #include "Matrix.h" #include #include #include #include "CudaMatrixPrivate.cuh" namespace Aurora{ CudaMatrix::CudaMatrix(std::shared_ptr aData, std::vector aInfo, ValueType aValueType) : mValueType(aValueType) , mData(aData) , mInfo(aInfo) { size_t infoSize = mInfo.size(); for(; infoSize<3; ++infoSize) { mInfo.push_back(1); } } bool CudaMatrix::isNull() const { return !mData || mInfo.empty(); } bool CudaMatrix::isNan() const { for(size_t i=0; i1) return false; if (isScalar()) return false; return getDimSize(0) == 1 || getDimSize(1) == 1; } int CudaMatrix::getDims() const { if(mInfo[2] > 1) { return 3; } return 2; } float *CudaMatrix::getData() const { return mData.get(); } int CudaMatrix::getDimSize(int aIndex) const { if (aIndex >= 0 && aIndex < 3) { return mInfo.at(aIndex); } return 0; } size_t CudaMatrix::getDataSize() const { if (!mData.get())return 0; size_t ret = 1; for (auto v: mInfo) { ret *= v; } return ret; } void CudaMatrix::forceReshape(int rows, int columns, int slices) { mInfo = {rows,columns,slices}; } bool CudaMatrix::compareShape(const CudaMatrix &other) const { if (mInfo[2] == 1 && other.mInfo[2] == 1) { if (mInfo[0]==1 && other.mInfo[1] == 1 && mInfo[1] == other.mInfo[0]) return true; if (mInfo[1]==1 && other.mInfo[0] == 1 && mInfo[0] == other.mInfo[1]) return true; } for (int i = 0; i < mInfo.size(); ++i) { if (mInfo[i] != other.mInfo[i]) return false; } return true; } CudaMatrix CudaMatrix::fromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType) { if (!aData) { return CudaMatrix(); } std::vector vector{aRows, aCols, aSlices}; CudaMatrix ret({aData, gpuFree}, vector, aType); return ret; } CudaMatrix CudaMatrix::copyFromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType) { if (!aData) { return CudaMatrix(); } float* data = nullptr; unsigned long long size = aRows * aCols * aSlices * aType; cudaMalloc((void**)&data, sizeof(float) * size); cudaMemcpy(data, aData, sizeof(float) * size, cudaMemcpyDeviceToDevice); std::vector vector{aRows, aCols, aSlices}; return CudaMatrix({data, gpuFree}, vector, aType); } CudaMatrix CudaMatrix::deepCopy() const { float* data = nullptr; unsigned long long size = getDataSize() * getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToDevice); return CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType()); } Matrix CudaMatrix::toHostMatrix() const { unsigned long long size = getDataSize() * getValueType(); float* data = new float[size]; cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToHost); return Matrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType()); } CudaMatrix CudaMatrix::block(int aDim,int aBeginIndex, int aEndIndex) const { if(aDim > 2) { std::cerr<<"CudaMatrix block only support 1D-3D data!"<1) { aDim = 1; } if (aBeginIndex>=getDimSize(aDim) || aBeginIndex<0) { std::cerr<<"CudaMatrix block BeginIndx error!BeginIndx:"<=getDimSize(aDim) || aEndIndex<0) { std::cerr<<"CudaMatrix block EndIndex error!EndIndex:"<2 ) { std::cerr<<"CudaMatrix block only support 1D-3D data!"<getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),out.getData(),this->getDataSize()); return out; } CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{ if (this->getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); return aMatrix; } CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){ if (aOther.getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),out.getData(),this->getDataSize()); return out; } CudaMatrix CudaMatrix::operator*(CudaMatrix &&aMatrix) const{ if (this->getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); return aMatrix; } CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther){ if (aOther.getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<