Add CudaMatrix.

This commit is contained in:
sunwen
2023-10-30 10:28:24 +08:00
parent 15c0654c5c
commit e3abe9fabe
6 changed files with 510 additions and 20 deletions

238
src/CudaMatrix.cpp Normal file
View File

@@ -0,0 +1,238 @@
#include "CudaMatrix.h"
#include "Function.h"
#include "Matrix.h"
#include <iostream>
#include <cstddef>
#include <cuda_runtime.h>
using namespace Aurora;
CudaMatrix::CudaMatrix(std::shared_ptr<float> aData, std::vector<int> aInfo, ValueType aValueType)
: mValueType(aValueType)
, mData(aData)
, mInfo(aInfo)
{
size_t infoSize = mInfo.size();
for(; infoSize<3; ++infoSize)
{
mInfo.push_back(1);
}
}
bool CudaMatrix::isNull() const
{
return !mData || mInfo.empty();
}
bool CudaMatrix::isNan() const
{
for(size_t i=0; i<getDataSize(); ++i)
{
if(mData.get()[i] == mData.get()[i])
{
return false;
}
}
return true;
}
bool CudaMatrix::isScalar() const
{
return (getDimSize(0) == 1 &&
getDimSize(1) == 1 &&
getDimSize(2) < 2);
}
float CudaMatrix::getScalar() const
{
if (isNull()) return 0.0;
if (isNull()) return 0.0;
return getData()[0];
}
bool CudaMatrix::isVector() const
{
if (getDimSize(2)>1) return false;
if (isScalar()) return false;
return getDimSize(0) == 1 ||
getDimSize(1) == 1;
}
int CudaMatrix::getDims() const
{
if(mInfo[2] > 1)
{
return 3;
}
return 2;
}
float *CudaMatrix::getData() const
{
return mData.get();
}
int CudaMatrix::getDimSize(int aIndex) const
{
if (aIndex >= 0 && aIndex < 3) {
return mInfo.at(aIndex);
}
return 0;
}
size_t CudaMatrix::getDataSize() const
{
if (!mData.get())return 0;
size_t ret = 1;
for (auto v: mInfo) {
ret *= v;
}
return ret;
}
void CudaMatrix::forceReshape(int rows, int columns, int slices)
{
mInfo = {rows,columns,slices};
}
bool CudaMatrix::compareShape(const CudaMatrix &other) const
{
if (mInfo[2] == 1 && other.mInfo[2] == 1) {
if (mInfo[0]==1 && other.mInfo[1] == 1 && mInfo[1] == other.mInfo[0]) return true;
if (mInfo[1]==1 && other.mInfo[0] == 1 && mInfo[0] == other.mInfo[1]) return true;
}
for (int i = 0; i < mInfo.size(); ++i) {
if (mInfo[i] != other.mInfo[i]) return false;
}
return true;
}
CudaMatrix CudaMatrix::fromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType)
{
if (!aData)
{
return CudaMatrix();
}
std::vector<int> vector{aRows, aCols, aSlices};
CudaMatrix ret({aData, gpuFree}, vector, aType);
return ret;
}
CudaMatrix CudaMatrix::copyFromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType)
{
if (!aData)
{
return CudaMatrix();
}
float* data = nullptr;
unsigned long long size = aRows * aCols * aSlices * aType;
cudaMalloc((void**)&data, sizeof(float) * size);
cudaMemcpy(data, aData, sizeof(float) * size, cudaMemcpyDeviceToDevice);
std::vector<int> vector{aRows, aCols, aSlices};
return CudaMatrix({data, gpuFree}, vector, aType);
}
CudaMatrix CudaMatrix::deepCopy() const
{
float* data = nullptr;
unsigned long long size = getDataSize() * getValueType();
cudaMalloc((void**)&data, sizeof(float) * size);
cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToDevice);
return CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
}
Matrix CudaMatrix::toHostMatrix() const
{
unsigned long long size = getDataSize() * getValueType();
float* data = new float[size];
cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToHost);
return Matrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
}
CudaMatrix CudaMatrix::block(int aDim,int aBeginIndex, int aEndIndex) const
{
if(aDim > 2)
{
std::cerr<<"CudaMatrix block only support 1D-3D data!"<<std::endl;
return CudaMatrix();
}
if (isVector() && aDim == 0 && getDimSize(1)>1)
{
aDim = 1;
}
if (aBeginIndex>=getDimSize(aDim) || aBeginIndex<0)
{
std::cerr<<"CudaMatrix block BeginIndx error!BeginIndx:"<<aBeginIndex<<std::endl;
return CudaMatrix();
}
if (aEndIndex>=getDimSize(aDim) || aEndIndex<0)
{
std::cerr<<"CudaMatrix block EndIndex error!EndIndex:"<<aEndIndex<<std::endl;
return CudaMatrix();
}
if (aEndIndex < aBeginIndex)
{
std::cerr<<"CudaMatrix block EndIndex can not less than BeginIndex ! BeginIndex:"<<aBeginIndex <<", EndIndex:"<<aEndIndex<<std::endl;
return CudaMatrix();
}
int dimLength = aEndIndex - aBeginIndex + 1;
int dataSize = getDataSize()/getDimSize(aDim)*dimLength;
float * dataOutput = nullptr;
cudaMalloc((void**)&dataOutput, sizeof(float) * dataSize * getValueType());
int colStride = getDimSize(0);
int sliceStride = getDimSize(0)*getDimSize(1);
switch (aDim)
{
case 0:
{
int colStride2 = dimLength;
int sliceStride2 = dimLength*getDimSize(1);
for (size_t i = 0; i < getDimSize(2); i++)
{
for (size_t j = 0; j < getDimSize(1); j++)
{
cudaMemcpy(dataOutput + (colStride2 * j + i * sliceStride2)*getValueType(),
mData.get()+ (aBeginIndex + j * colStride + i * sliceStride)*getValueType(),
sizeof(float) * colStride2*getValueType(), cudaMemcpyDeviceToDevice);
}
}
return CudaMatrix::fromRawData(dataOutput,dimLength,getDimSize(1),getDimSize(2),getValueType());
}
case 1:
{
int colStride2 = getDimSize(0);
int copySize = dimLength*getDimSize(0);
for (size_t i = 0; i < getDimSize(2); i++)
{
cudaMemcpy(dataOutput + getValueType()*(i * copySize),
mData.get() + getValueType()*(aBeginIndex * colStride + i * sliceStride),
sizeof(float) * copySize*getValueType(), cudaMemcpyDeviceToDevice);
}
return CudaMatrix::fromRawData(dataOutput,getDimSize(0),dimLength,getDimSize(2),getValueType());
}
case 2:
{
int copySize = dimLength*sliceStride;
cudaMemcpy(dataOutput,
mData.get() + aBeginIndex * sliceStride*getValueType(),
sizeof(float) * copySize*getValueType(), cudaMemcpyDeviceToDevice);
return CudaMatrix::fromRawData(dataOutput,getDimSize(0),getDimSize(1),dimLength,getValueType());
}
}
}
bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float value)
{
if(aDim>2 )
{
std::cerr<<"CudaMatrix block only support 1D-3D data!"<<std::endl;
return false;
}
}