Merge branch 'dtof' of http://192.168.1.9:3000/Bug/Aurora into dtof
This commit is contained in:
@@ -147,6 +147,10 @@ CudaMatrix CudaMatrix::deepCopy() const
|
|||||||
|
|
||||||
Matrix CudaMatrix::toHostMatrix() const
|
Matrix CudaMatrix::toHostMatrix() const
|
||||||
{
|
{
|
||||||
|
if(!mData.get())
|
||||||
|
{
|
||||||
|
return Matrix();
|
||||||
|
}
|
||||||
unsigned long long size = getDataSize() * getValueType();
|
unsigned long long size = getDataSize() * getValueType();
|
||||||
float* data = new float[size];
|
float* data = new float[size];
|
||||||
cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToHost);
|
cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToHost);
|
||||||
|
|||||||
@@ -195,6 +195,11 @@ __global__ void sqrtKernel(float* aInputData, float* aOutput, unsigned int aSize
|
|||||||
|
|
||||||
CudaMatrix Aurora::sqrt(const CudaMatrix& aMatrix)
|
CudaMatrix Aurora::sqrt(const CudaMatrix& aMatrix)
|
||||||
{
|
{
|
||||||
|
if(aMatrix.getValueType() == Aurora::Complex)
|
||||||
|
{
|
||||||
|
std::cerr<<"sqrt not support complex"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||||
float* data = nullptr;
|
float* data = nullptr;
|
||||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||||
@@ -206,6 +211,11 @@ CudaMatrix Aurora::sqrt(const CudaMatrix& aMatrix)
|
|||||||
|
|
||||||
CudaMatrix Aurora::sqrt(const CudaMatrix&& aMatrix)
|
CudaMatrix Aurora::sqrt(const CudaMatrix&& aMatrix)
|
||||||
{
|
{
|
||||||
|
if(aMatrix.getValueType() == Aurora::Complex)
|
||||||
|
{
|
||||||
|
std::cerr<<"sqrt not support complex"<<std::endl;
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||||
float* data = nullptr;
|
float* data = nullptr;
|
||||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||||
@@ -302,3 +312,94 @@ CudaMatrix Aurora::sign(const CudaMatrix&& aMatrix)
|
|||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__global__ void repMatKernel(float* aInputData, float* aOutput, unsigned int aInputSize, bool aIsComplex)
|
||||||
|
{
|
||||||
|
unsigned int idX = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
unsigned int idY = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
unsigned int idZ = blockIdx.z * blockDim.z + threadIdx.z;
|
||||||
|
if(aIsComplex)
|
||||||
|
{
|
||||||
|
unsigned int outPutIndex = 2 * (idZ * blockDim.x * blockDim.y * gridDim.x * gridDim.y + idY * blockDim.x * gridDim.x + idX);
|
||||||
|
unsigned int inPutIndex = 2 * (threadIdx.y * blockDim.x + threadIdx.x);
|
||||||
|
aOutput[outPutIndex] = aInputData[inPutIndex];
|
||||||
|
aOutput[outPutIndex + 1] = aInputData[inPutIndex + 1];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
aOutput[idZ * blockDim.x * blockDim.y * gridDim.x * gridDim.y + idY * blockDim.x * gridDim.x + idX] = aInputData[threadIdx.y * blockDim.x + threadIdx.x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::repmat(const CudaMatrix& aMatrix,int aRowTimes, int aColumnTimes)
|
||||||
|
{
|
||||||
|
if(aRowTimes < 1 || aColumnTimes < 1 || aMatrix.getDims() > 2 || aMatrix.isNull())
|
||||||
|
{
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
dim3 blockSize(aMatrix.getDimSize(0), aMatrix.getDimSize(1), 1);
|
||||||
|
dim3 gridSize(aRowTimes, aColumnTimes, 1);
|
||||||
|
|
||||||
|
size_t size = aMatrix.getDataSize() * aMatrix.getValueType() * aRowTimes * aColumnTimes;
|
||||||
|
float* data = nullptr;
|
||||||
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||||
|
repMatKernel<<<gridSize, blockSize>>>(aMatrix.getData(), data, aMatrix.getDataSize(), aMatrix.isComplex());
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0) * aRowTimes, aMatrix.getDimSize(1) * aColumnTimes, aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::repmat(const CudaMatrix& aMatrix,int aRowTimes, int aColumnTimes, int aSliceTimes)
|
||||||
|
{
|
||||||
|
if(aRowTimes < 1 || aColumnTimes < 1 || aMatrix.getDims() > 2 || aMatrix.isNull())
|
||||||
|
{
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
dim3 blockSize(aMatrix.getDimSize(0), aMatrix.getDimSize(1), 1);
|
||||||
|
dim3 gridSize(aRowTimes, aColumnTimes, aSliceTimes);
|
||||||
|
|
||||||
|
size_t size = aMatrix.getDataSize() * aMatrix.getValueType() * aRowTimes * aColumnTimes * aSliceTimes;
|
||||||
|
float* data = nullptr;
|
||||||
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||||
|
repMatKernel<<<gridSize, blockSize>>>(aMatrix.getData(), data, aMatrix.getDataSize(), aMatrix.isComplex());
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0) * aRowTimes, aMatrix.getDimSize(1) * aColumnTimes, aMatrix.getDimSize(2) * aSliceTimes, aMatrix.getValueType());
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void repMat3DKernel(float* aInputData, float* aOutput, unsigned int aInputSize, bool aIsComplex)
|
||||||
|
{
|
||||||
|
unsigned int idX = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
unsigned int idY = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
unsigned int idZ = blockIdx.z * blockDim.z + threadIdx.z;
|
||||||
|
if(aIsComplex)
|
||||||
|
{
|
||||||
|
unsigned int outPutIndex = 2 * (idZ * blockDim.x * blockDim.y * gridDim.x * gridDim.y + idY * blockDim.x * gridDim.x + idX);
|
||||||
|
unsigned int inPutIndex = 2 * (threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x);
|
||||||
|
aOutput[outPutIndex] = aInputData[inPutIndex];
|
||||||
|
aOutput[outPutIndex + 1] = aInputData[inPutIndex + 1];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
aOutput[idZ * blockDim.x * blockDim.y * gridDim.x * gridDim.y + idY * blockDim.x * gridDim.x + idX] = aInputData[threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::repmat3d(const CudaMatrix& aMatrix,int aRowTimes, int aColumnTimes, int aSliceTimes)
|
||||||
|
{
|
||||||
|
if(aRowTimes < 1 || aColumnTimes < 1 || aMatrix.getDims() < 3 || aMatrix.isNull())
|
||||||
|
{
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
dim3 blockSize(aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2));
|
||||||
|
dim3 gridSize(aRowTimes, aColumnTimes, aSliceTimes);
|
||||||
|
|
||||||
|
size_t size = aMatrix.getDataSize() * aMatrix.getValueType() * aRowTimes * aColumnTimes * aSliceTimes;
|
||||||
|
float* data = nullptr;
|
||||||
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||||
|
repMat3DKernel<<<gridSize, blockSize>>>(aMatrix.getData(), data, aMatrix.getDataSize(), aMatrix.isComplex());
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0) * aRowTimes, aMatrix.getDimSize(1) * aColumnTimes, aMatrix.getDimSize(2) * aSliceTimes, aMatrix.getValueType());
|
||||||
|
}
|
||||||
@@ -39,6 +39,12 @@ namespace Aurora
|
|||||||
CudaMatrix sign(const CudaMatrix& aMatrix);
|
CudaMatrix sign(const CudaMatrix& aMatrix);
|
||||||
|
|
||||||
CudaMatrix sign(const CudaMatrix&& aMatrix);
|
CudaMatrix sign(const CudaMatrix&& aMatrix);
|
||||||
|
|
||||||
|
CudaMatrix repmat(const CudaMatrix& aMatrix,int aRowTimes, int aColumnTimes);
|
||||||
|
|
||||||
|
CudaMatrix repmat(const CudaMatrix& aMatrix,int aRowTimes, int aColumnTimes, int aSliceTimes);
|
||||||
|
|
||||||
|
CudaMatrix repmat3d(const CudaMatrix& aMatrix,int aRowTimes, int aColumnTimes, int aSliceTimes);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //AURORA_CUDA_FUNCTION1D_H
|
#endif //AURORA_CUDA_FUNCTION1D_H
|
||||||
@@ -163,8 +163,8 @@ TEST_F(Function1D_Cuda_Test, sqrt)
|
|||||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||||
result1 = Aurora::sqrt(hostMatrix);
|
result1 = Aurora::sqrt(hostMatrix);
|
||||||
result2 = Aurora::sqrt(deviceMatrix).toHostMatrix();
|
result2 = Aurora::sqrt(deviceMatrix).toHostMatrix();
|
||||||
EXPECT_EQ(result2.getDataSize(), 4);
|
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||||
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(result1[i], result2[i]);
|
EXPECT_EQ(result1[i], result2[i]);
|
||||||
@@ -222,3 +222,51 @@ TEST_F(Function1D_Cuda_Test, sign)
|
|||||||
EXPECT_EQ(result1[i], result2[i]);
|
EXPECT_EQ(result1[i], result2[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(Function1D_Cuda_Test, repmat)
|
||||||
|
{
|
||||||
|
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||||
|
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||||
|
|
||||||
|
auto result1 = Aurora::repmat(hostMatrix,3,6);
|
||||||
|
auto result2 = Aurora::repmat(deviceMatrix,3,6).toHostMatrix();
|
||||||
|
EXPECT_EQ(result2.getDataSize(), 8 * 3 * 6);
|
||||||
|
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||||
|
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(result1[i], result2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,2,1,Aurora::Complex);
|
||||||
|
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||||
|
result1 = Aurora::repmat(hostMatrix, 4, 8);
|
||||||
|
result2 = Aurora::repmat(deviceMatrix, 4, 8).toHostMatrix();
|
||||||
|
EXPECT_EQ(result2.getDataSize(), 4 * 4 * 8);
|
||||||
|
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
||||||
|
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(result1[i], result2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
hostMatrix = Aurora::Matrix::fromRawData(new float[12]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8,9.9,10,11,12}, 3, 4, 1,Aurora::Normal);
|
||||||
|
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||||
|
result1 = Aurora::repmat(hostMatrix, 4, 8, 3);
|
||||||
|
result2 = Aurora::repmat(deviceMatrix, 4, 8, 3).toHostMatrix();
|
||||||
|
EXPECT_EQ(result2.getDataSize(), 3 * 4 * 4 * 8 * 3);
|
||||||
|
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||||
|
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(result1[i], result2[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
hostMatrix = Aurora::Matrix::fromRawData(new float[12]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8,9.9,10,11,12}, 3, 2, 1,Aurora::Complex);
|
||||||
|
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||||
|
result1 = Aurora::repmat(hostMatrix, 4, 8, 3);
|
||||||
|
result2 = Aurora::repmat(deviceMatrix, 4, 8, 3).toHostMatrix();
|
||||||
|
EXPECT_EQ(result2.getDataSize(), 3 * 2 * 4 * 8 * 3);
|
||||||
|
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
||||||
|
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(result1[i], result2[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user