Add vecnorm and unitest
This commit is contained in:
@@ -1118,3 +1118,97 @@ CudaMatrix Aurora::vertcat(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix
|
|||||||
return Aurora::CudaMatrix::fromRawData(data, outputRows, outputColumns, outputSlices, aMatrix1.getValueType());
|
return Aurora::CudaMatrix::fromRawData(data, outputRows, outputColumns, outputSlices, aMatrix1.getValueType());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__global__ void vecnorm1Kernel(float* aInputData, unsigned int aInputRowSize, float* aOutput, bool aIsComplex)
|
||||||
|
{
|
||||||
|
__shared__ float sharedValue[THREADS_PER_BLOCK];
|
||||||
|
sharedValue[threadIdx.x] = 0;
|
||||||
|
if(aIsComplex)
|
||||||
|
{
|
||||||
|
for(unsigned int i=0; i<=aInputRowSize/blockDim.x; ++i)
|
||||||
|
{
|
||||||
|
unsigned int indexByRows = i*blockDim.x + threadIdx.x;
|
||||||
|
if(indexByRows < aInputRowSize)
|
||||||
|
{
|
||||||
|
unsigned int idx = blockIdx.x*aInputRowSize + indexByRows;
|
||||||
|
sharedValue[threadIdx.x] += sqrt(aInputData[2*idx] * aInputData[2*idx] + aInputData[2*idx+1] * aInputData[2*idx+1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(unsigned int i=0; i<=aInputRowSize/blockDim.x; ++i)
|
||||||
|
{
|
||||||
|
unsigned int indexByRows = i*blockDim.x + threadIdx.x;
|
||||||
|
if(indexByRows < aInputRowSize)
|
||||||
|
{
|
||||||
|
sharedValue[threadIdx.x] += abs(aInputData[blockIdx.x*aInputRowSize + indexByRows]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
for(unsigned int i = blockDim.x/2; i>0; i >>= 1)
|
||||||
|
{
|
||||||
|
if(threadIdx.x < i)
|
||||||
|
{
|
||||||
|
sharedValue[threadIdx.x] += sharedValue[threadIdx.x + i];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
aOutput[blockIdx.x] = sharedValue[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void vecnorm2Kernel(float* aInputData, unsigned int aInputRowSize, float* aOutput, bool aIsComplex)
|
||||||
|
{
|
||||||
|
__shared__ float sharedValue[THREADS_PER_BLOCK];
|
||||||
|
sharedValue[threadIdx.x] = 0;
|
||||||
|
|
||||||
|
for(unsigned int i=0; i<=aInputRowSize/blockDim.x; ++i)
|
||||||
|
{
|
||||||
|
unsigned int indexByRows = i*blockDim.x + threadIdx.x;
|
||||||
|
if(indexByRows < aInputRowSize)
|
||||||
|
{
|
||||||
|
if(aIsComplex)
|
||||||
|
{
|
||||||
|
unsigned int idx = blockIdx.x*aInputRowSize + indexByRows;
|
||||||
|
sharedValue[threadIdx.x] += aInputData[2 * idx] * aInputData[2 * idx];
|
||||||
|
sharedValue[threadIdx.x] += aInputData[2 * idx + 1] * aInputData[2 * idx + 1];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sharedValue[threadIdx.x] += aInputData[blockIdx.x*aInputRowSize + indexByRows] * aInputData[blockIdx.x*aInputRowSize + indexByRows];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
for(unsigned int i = blockDim.x/2; i>0; i >>= 1)
|
||||||
|
{
|
||||||
|
if(threadIdx.x < i)
|
||||||
|
{
|
||||||
|
sharedValue[threadIdx.x] += sharedValue[threadIdx.x + i];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
aOutput[blockIdx.x] = sqrt(sharedValue[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMatrix Aurora::vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, int aDim)
|
||||||
|
{
|
||||||
|
//only surpport aDim = 1 for now.
|
||||||
|
if(aDim != 1 || aNormMethod == NormMethod::NormF || aMatrix.isNull())
|
||||||
|
{
|
||||||
|
return CudaMatrix();
|
||||||
|
}
|
||||||
|
unsigned int column = aMatrix.getDimSize(1);
|
||||||
|
float* data = nullptr;
|
||||||
|
cudaMalloc((void**)&data, sizeof(float) * column);
|
||||||
|
if(aNormMethod == Aurora::Norm1)
|
||||||
|
{
|
||||||
|
vecnorm1Kernel<<<column, THREADS_PER_BLOCK>>>(aMatrix.getData(), aMatrix.getDimSize(0), data, aMatrix.isComplex());
|
||||||
|
}
|
||||||
|
else if(aNormMethod == Aurora::Norm2)
|
||||||
|
{
|
||||||
|
vecnorm2Kernel<<<column, THREADS_PER_BLOCK>>>(aMatrix.getData(), aMatrix.getDimSize(0), data, aMatrix.isComplex());
|
||||||
|
}
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
return Aurora::CudaMatrix::fromRawData(data,column);
|
||||||
|
}
|
||||||
|
|||||||
@@ -69,6 +69,8 @@ namespace Aurora
|
|||||||
|
|
||||||
CudaMatrix vertcat(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2);
|
CudaMatrix vertcat(const CudaMatrix& aMatrix1, const CudaMatrix& aMatrix2);
|
||||||
|
|
||||||
|
CudaMatrix vecnorm(const CudaMatrix& aMatrix, NormMethod aNormMethod, int aDim);
|
||||||
|
|
||||||
// ------compareSet----------------------------------------------------
|
// ------compareSet----------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -902,3 +902,41 @@ TEST_F(Function1D_Cuda_Test, vertcat) {
|
|||||||
EXPECT_FLOAT_EQ(result.getDimSize(0),6);
|
EXPECT_FLOAT_EQ(result.getDimSize(0),6);
|
||||||
EXPECT_FLOAT_EQ(result.getDimSize(1),1);
|
EXPECT_FLOAT_EQ(result.getDimSize(1),1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(Function1D_Cuda_Test, vecnorm) {
|
||||||
|
//1Dim
|
||||||
|
float *data = new float[3]{1,2,-3};
|
||||||
|
auto matrix = Aurora::Matrix::fromRawData(data, 3).toDeviceMatrix();
|
||||||
|
auto result = Aurora::vecnorm(matrix,Aurora::NormMethod::Norm1,1).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],6);
|
||||||
|
result = Aurora::vecnorm(matrix,Aurora::NormMethod::Norm2,1).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],3.74166);
|
||||||
|
|
||||||
|
//2Dims
|
||||||
|
data = new float[8]{1,2,-3,6,7,9,22.3,-8.6};
|
||||||
|
matrix = Aurora::Matrix::fromRawData(data, 4,2).toDeviceMatrix();
|
||||||
|
result = Aurora::vecnorm(matrix,Aurora::NormMethod::Norm1,1).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],12);
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[1],46.9);
|
||||||
|
result = Aurora::vecnorm(matrix,Aurora::NormMethod::Norm2,1).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],7.0711);
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[1],26.4811);
|
||||||
|
|
||||||
|
//1Dim Complex
|
||||||
|
data = new float[6]{1,2,-3,4,5,-6};
|
||||||
|
matrix = Aurora::Matrix::fromRawData(data, 3,1,1,Aurora::Complex).toDeviceMatrix();
|
||||||
|
result = Aurora::vecnorm(matrix,Aurora::NormMethod::Norm1,1).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],15.0463);
|
||||||
|
result = Aurora::vecnorm(matrix,Aurora::NormMethod::Norm2,1).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],9.5394);
|
||||||
|
|
||||||
|
//2Dims Complex
|
||||||
|
data = new float[12]{1,2,-3,4,5,-6,7,8,9,22,24,25};
|
||||||
|
matrix = Aurora::Matrix::fromRawData(data, 3,2,1,Aurora::Complex).toDeviceMatrix();
|
||||||
|
result = Aurora::vecnorm(matrix,Aurora::NormMethod::Norm1,1).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],15.0463);
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[1],69.0553);
|
||||||
|
result = Aurora::vecnorm(matrix,Aurora::NormMethod::Norm2,1).toHostMatrix();
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[0],9.5394);
|
||||||
|
EXPECT_FLOAT_AE(result.getData()[1],43.3474);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user