#ifdef USE_CUDA #include "CudaMatrix.h" #include "Function.h" #include "Matrix.h" #include #include #include #include "CudaMatrixPrivate.cuh" namespace Aurora{ CudaMatrix::CudaMatrix(std::shared_ptr aData, std::vector aInfo, ValueType aValueType) : mValueType(aValueType) , mData(aData) , mInfo(aInfo) { size_t infoSize = mInfo.size(); for(; infoSize<3; ++infoSize) { mInfo.push_back(1); } } bool CudaMatrix::isNull() const { return !mData || mInfo.empty(); } bool CudaMatrix::isNan() const { for(size_t i=0; i1) return false; if (isScalar()) return false; return getDimSize(0) == 1 || getDimSize(1) == 1; } int CudaMatrix::getDims() const { if(mInfo[2] > 1) { return 3; } return 2; } float *CudaMatrix::getData() const { return mData.get(); } int CudaMatrix::getDimSize(int aIndex) const { if (aIndex >= 0 && aIndex < 3) { return mInfo.at(aIndex); } return 0; } size_t CudaMatrix::getDataSize() const { if (!mData.get())return 0; size_t ret = 1; for (auto v: mInfo) { ret *= v; } return ret; } void CudaMatrix::forceReshape(int rows, int columns, int slices) { mInfo = {rows,columns,slices}; } bool CudaMatrix::compareShape(const CudaMatrix &other) const { if (mInfo[2] == 1 && other.mInfo[2] == 1) { if (mInfo[0]==1 && other.mInfo[1] == 1 && mInfo[1] == other.mInfo[0]) return true; if (mInfo[1]==1 && other.mInfo[0] == 1 && mInfo[0] == other.mInfo[1]) return true; } for (int i = 0; i < mInfo.size(); ++i) { if (mInfo[i] != other.mInfo[i]) return false; } return true; } CudaMatrix CudaMatrix::fromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType) { if (!aData) { return CudaMatrix(); } std::vector vector{aRows, aCols, aSlices}; CudaMatrix ret({aData, gpuFree}, vector, aType); return ret; } CudaMatrix CudaMatrix::copyFromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType) { if (!aData) { return CudaMatrix(); } float* data = nullptr; unsigned long long size = aRows * aCols * aSlices * aType; cudaMalloc((void**)&data, sizeof(float) * size); cudaMemcpy(data, aData, sizeof(float) * size, cudaMemcpyDeviceToDevice); std::vector vector{aRows, aCols, aSlices}; return CudaMatrix({data, gpuFree}, vector, aType); } CudaMatrix CudaMatrix::deepCopy() const { float* data = nullptr; unsigned long long size = getDataSize() * getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToDevice); return CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType()); } Matrix CudaMatrix::toHostMatrix() const { unsigned long long size = getDataSize() * getValueType(); float* data = new float[size]; cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToHost); return Matrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType()); } CudaMatrix CudaMatrix::block(int aDim,int aBeginIndex, int aEndIndex) const { if(aDim > 2) { std::cerr<<"CudaMatrix block only support 1D-3D data!"<=getDimSize(aDim) || aBeginIndex<0) { std::cerr<<"CudaMatrix block BeginIndx error!BeginIndx:"<=getDimSize(aDim) || aEndIndex<0) { std::cerr<<"CudaMatrix block EndIndex error!EndIndex:"<2 ) { std::cerr<<"CudaMatrix setblockValue only support 1D-3D data!"<=getDimSize(aDim) || aBeginIndex<0) { std::cerr<<"CudaMatrix setblockValue BeginIndx error!BeginIndx:"<=getDimSize(aDim) || aEndIndex<0) { std::cerr<<"CudaMatrix setblockValue EndIndex error!EndIndex:"< aValue) { if(getValueType() != Complex) { std::cerr<<"CudaMatrix setBlockComplexValue only support complex matrix"<2 ) { std::cerr<<"CudaMatrix setBlockComplexValue only support 1D-3D data!"<=getDimSize(aDim) || aBeginIndex<0) { std::cerr<<"CudaMatrix setBlockComplexValue BeginIndx error!BeginIndx:"<=getDimSize(aDim) || aEndIndex<0) { std::cerr<<"CudaMatrix setBlockComplexValue EndIndex error!EndIndex:"<2 ) { std::cerr<<"CudaMatrix setBlock only support 1D-3D data!"<=getDimSize(aDim) || aBeginIndex<0) { std::cerr<<"CudaMatrix setBlock BeginIndx error!BeginIndx:"<=getDimSize(aDim) || aEndIndex<0) { std::cerr<<"CudaMatrix setBlock EndIndex error!EndIndex:"<getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getData(),aMatrix.getData(),out.getData(),this->getDataSize()); return out; } CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{ if (this->getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); return aMatrix; } CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){ if (aOther.getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),out.getData(),this->getDataSize()); return out; } CudaMatrix CudaMatrix::operator*(CudaMatrix &&aMatrix) const{ if (this->getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); return aMatrix; } CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther){ if (aOther.getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<isComplex()) { std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<getData(),aMatrix.getData(),out.getData(),aMatrix.getDataSize()); return out; } CudaMatrix CudaMatrix::operator-(CudaMatrix &&aMatrix) const{ if (aMatrix.isComplex()!=this->isComplex()) { std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize()); return aMatrix; } CudaMatrix operator-(CudaMatrix &&aMatrix,CudaMatrix &aOther){ if (aMatrix.isComplex()!=aOther.isComplex()) { std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aOther.isComplex()?"Complex":"Real")<isComplex()) { std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<getData(),aMatrix.getData(),out.getData(),aMatrix.getDataSize()); return out; } CudaMatrix CudaMatrix::operator/(CudaMatrix &&aMatrix) const{ if (aMatrix.isComplex()!=this->isComplex()) { std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize()); return aMatrix; } CudaMatrix operator/(CudaMatrix &&aMatrix, CudaMatrix &aOther){ if (aMatrix.isComplex()!=aOther.isComplex()) { std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aOther.isComplex()?"Complex":"Real")<(float aScalar) const{ float* data = nullptr; unsigned long long size = this->getDataSize() * this->getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::G); return out; } CudaMatrix operator>(float aScalar, const CudaMatrix &aMatrix){ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::G); return out; } CudaMatrix CudaMatrix::operator>(const CudaMatrix &aMatrix) const{ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::G); return out; } CudaMatrix CudaMatrix::operator<(float aScalar) const{ float* data = nullptr; unsigned long long size = this->getDataSize() * this->getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::L); return out; } CudaMatrix operator<(float aScalar, const CudaMatrix &aMatrix){ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::L); return out; } CudaMatrix CudaMatrix::operator<(const CudaMatrix &aMatrix) const{ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::L); return out; } CudaMatrix CudaMatrix::operator>=(float aScalar) const{ float* data = nullptr; unsigned long long size = this->getDataSize() * this->getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::GE); return out; } CudaMatrix operator>=(float aScalar, const CudaMatrix &aMatrix){ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::GE); return out; } CudaMatrix CudaMatrix::operator>=(const CudaMatrix &aMatrix) const{ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::GE); return out; } CudaMatrix CudaMatrix::operator<=(float aScalar) const{ float* data = nullptr; unsigned long long size = this->getDataSize() * this->getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::LE); return out; } CudaMatrix operator<=(float aScalar, const CudaMatrix &aMatrix){ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::LE); return out; } CudaMatrix CudaMatrix::operator<=(const CudaMatrix &aMatrix) const{ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::LE); return out; } CudaMatrix CudaMatrix::operator==(float aScalar) const{ float* data = nullptr; unsigned long long size = this->getDataSize() * this->getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::E); return out; } CudaMatrix operator==(float aScalar, const CudaMatrix &aMatrix){ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::E); return out; } CudaMatrix CudaMatrix::operator==(const CudaMatrix &aMatrix) const{ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::E); return out; } CudaMatrix CudaMatrix::operator!=(float aScalar) const{ float* data = nullptr; unsigned long long size = this->getDataSize() * this->getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::NE); return out; } CudaMatrix operator!=(float aScalar, const CudaMatrix &aMatrix){ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::NE); return out; } CudaMatrix CudaMatrix::operator!=(const CudaMatrix &aMatrix) const{ float* data = nullptr; unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); cudaMalloc((void**)&data, sizeof(float) * size); auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::NE); return out; } float CudaMatrix::getValue(size_t index) const{ float result; cudaError_t cuda_error = cudaMemcpy(&result, getData() + index, sizeof(float), cudaMemcpyDeviceToHost); if (cuda_error != cudaSuccess) { fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error)); return nan(""); } return result; } void CudaMatrix::setValue(size_t index, const float& value){ cudaError_t cuda_error = cudaMemcpy( getData() + index,&value, sizeof(float), cudaMemcpyHostToDevice); if (cuda_error != cudaSuccess) { fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error)); } } } #endif // USE_CUDA