Add exp, mod, acos, acosd, conj, norm and unittest.
This commit is contained in:
@@ -49,6 +49,8 @@ target_compile_options(Aurora PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||
-arch=sm_75 --expt-extended-lambda
|
||||
>)
|
||||
target_link_libraries(Aurora PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
||||
target_link_libraries(Aurora PRIVATE ${CUDA_cublas_LIBRARY})
|
||||
target_link_libraries(Aurora PRIVATE ${CUDA_cusolver_LIBRARY})
|
||||
endif(Aurora_USE_CUDA)
|
||||
|
||||
find_package(GTest REQUIRED)
|
||||
@@ -75,6 +77,8 @@ target_compile_options(Aurora_Test PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
|
||||
-arch=sm_75 --expt-extended-lambda
|
||||
>)
|
||||
target_link_libraries(Aurora_Test PRIVATE ${CUDA_RUNTIME_LIBRARY} CUDA::cufft CUDA::cudart)
|
||||
target_link_libraries(Aurora_Test PRIVATE ${CUDA_cublas_LIBRARY})
|
||||
target_link_libraries(Aurora_Test PRIVATE ${CUDA_cusolver_LIBRARY})
|
||||
endif(Aurora_USE_CUDA)
|
||||
gtest_discover_tests(Aurora_Test )
|
||||
#target_link_libraries(CreateMatchedFilter PRIVATE TBB::tbb)
|
||||
@@ -1,14 +1,18 @@
|
||||
#include "CudaMatrix.h"
|
||||
#include "AuroraDefs.h"
|
||||
#include "Function1D.cuh"
|
||||
#include "Function1D.h"
|
||||
#include "Matrix.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/transform.h>
|
||||
#include <thrust/iterator/constant_iterator.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
#include <thrust/complex.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cusolverDn.h>
|
||||
|
||||
using namespace Aurora;
|
||||
using namespace thrust::placeholders;
|
||||
@@ -701,3 +705,251 @@ CudaMatrix Aurora::log(const CudaMatrix& aMatrix, int aBaseNum)
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
__global__ void expKernel(float* aInputData, float* aOutput, unsigned int aInputSize, bool aIsComplex)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aInputSize)
|
||||
{
|
||||
if(aIsComplex)
|
||||
{
|
||||
unsigned int index = 2 * idx;
|
||||
float expReal = expf(aInputData[index]);
|
||||
aOutput[index] = expReal * cosf(aInputData[index + 1]);
|
||||
aOutput[index + 1] = expReal * sinf(aInputData[index + 1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
aOutput[idx] = expf(aInputData[idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::exp(const CudaMatrix& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size * aMatrix.getValueType());
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
expKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aMatrix.getData(), data, size, aMatrix.isComplex());
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
__global__ void modKernel(float* aInputData, float* aOutput, unsigned int aInputSize, float aModValue)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aInputSize)
|
||||
{
|
||||
aOutput[idx] = fmodf(aInputData[idx], aModValue);
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::mod(const CudaMatrix& aMatrix, float aValue)
|
||||
{
|
||||
if(aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"mod not support complex"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
modKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aMatrix.getData(), data, size, aValue);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2));
|
||||
}
|
||||
|
||||
__global__ void acosKernel(float* aInputData, float* aOutput, unsigned int aInputSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aInputSize)
|
||||
{
|
||||
aOutput[idx] = acosf(aInputData[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::acos(const CudaMatrix& aMatrix)
|
||||
{
|
||||
if(aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"acos not support complex"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
acosKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2));
|
||||
}
|
||||
|
||||
__global__ void acosdKernel(float* aInputData, float* aOutput, unsigned int aInputSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aInputSize)
|
||||
{
|
||||
aOutput[idx] = acosf(aInputData[idx]) * 180 / PI;
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::acosd(const CudaMatrix& aMatrix)
|
||||
{
|
||||
if(aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"acos not support complex"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
acosdKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2));
|
||||
}
|
||||
|
||||
__global__ void conjKernel(float* aInputData, float* aOutput, unsigned int aInputSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aInputSize)
|
||||
{
|
||||
unsigned int index = idx * 2;
|
||||
aOutput[index] = aInputData[index];
|
||||
aOutput[index + 1] = -aInputData[index + 1];
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::conj(const CudaMatrix& aMatrix)
|
||||
{
|
||||
if(!aMatrix.isComplex())
|
||||
{
|
||||
return CudaMatrix::copyFromRawData(aMatrix.getData(),aMatrix.getDimSize(0),aMatrix.getDimSize(1),aMatrix.getDimSize(2));
|
||||
}
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size * aMatrix.getValueType());
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
conjKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
|
||||
float Aurora::norm(const CudaMatrix& aMatrix, NormMethod aNormMethod)
|
||||
{
|
||||
float resultValue = 0;
|
||||
if(aMatrix.getDims() > 2)
|
||||
{
|
||||
std::cerr<<"norm not support 3d matrix"<<std::endl;
|
||||
return 0;
|
||||
}
|
||||
//for 1 dims
|
||||
if(aMatrix.getDimSize(0) == 1 || aMatrix.getDimSize(1) == 1 )
|
||||
{
|
||||
if(aNormMethod == Aurora::Norm1)
|
||||
{
|
||||
CudaMatrix result = abs(aMatrix);
|
||||
resultValue = thrust::reduce(thrust::device, result.getData(), result.getData() + result.getDataSize(), 0.0, thrust::plus<float>());
|
||||
return resultValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
CudaMatrix result = aMatrix.deepCopy();
|
||||
thrust::transform(thrust::device, result.getData(), result.getData() + result.getDataSize() * result.getValueType(), result.getData(), thrust::square<float>());
|
||||
resultValue = thrust::reduce(thrust::device, result.getData(), result.getData() + result.getDataSize() * result.getValueType(), 0.0, thrust::plus<float>());
|
||||
return std::sqrt(resultValue);
|
||||
}
|
||||
}
|
||||
//for 2 dims
|
||||
if(aNormMethod == Aurora::NormF)
|
||||
{
|
||||
CudaMatrix result = aMatrix.deepCopy();
|
||||
thrust::transform(thrust::device, result.getData(), result.getData() + result.getDataSize() * result.getValueType(), result.getData(), thrust::square<float>());
|
||||
resultValue = thrust::reduce(thrust::device, result.getData(), result.getData() + result.getDataSize() * result.getValueType(), 0.0, thrust::plus<float>());
|
||||
return std::sqrt(resultValue);
|
||||
}
|
||||
else if(aNormMethod == Aurora::Norm1)
|
||||
{
|
||||
for(int i=0; i<aMatrix.getDimSize(1); ++i)
|
||||
{
|
||||
CudaMatrix result = abs(aMatrix.block(1, i, i));
|
||||
float tempValue = thrust::reduce(thrust::device, result.getData(), result.getData() + result.getDataSize(), 0.0, thrust::plus<float>());
|
||||
if(resultValue < tempValue)
|
||||
{
|
||||
resultValue = tempValue;
|
||||
}
|
||||
}
|
||||
return resultValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"norm2 not support 2d complex matrix"<<std::endl;
|
||||
return 0;
|
||||
}
|
||||
cusolverDnHandle_t cusolverH = NULL;
|
||||
cudaStream_t stream = NULL;
|
||||
float* d_S = NULL;
|
||||
float* d_U = NULL;
|
||||
float* d_VT = NULL;
|
||||
int* devInfo = NULL;
|
||||
float* d_work = NULL;
|
||||
int lwork = 0;
|
||||
const int m = aMatrix.getDimSize(0);
|
||||
const int n = aMatrix.getDimSize(1);
|
||||
const int lda = m;
|
||||
const int ldu = m;
|
||||
const int ldvt = n;
|
||||
|
||||
cusolverDnCreate(&cusolverH);
|
||||
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
cusolverDnSetStream(cusolverH, stream);
|
||||
cudaMalloc((void**)&d_S, sizeof(float)*n);
|
||||
cudaMalloc((void**)&d_U, sizeof(float)*ldu*m);
|
||||
cudaMalloc((void**)&d_VT, sizeof(float)*ldvt*n);
|
||||
cudaMalloc((void**)&devInfo, sizeof(int));
|
||||
|
||||
cusolverDnSgesvd_bufferSize(cusolverH, m, n, &lwork);
|
||||
|
||||
cudaMalloc((void**)&d_work, sizeof(float)*lwork);
|
||||
auto matrix = aMatrix.deepCopy();
|
||||
cusolverDnSgesvd(cusolverH, 'A', 'A', m, n, matrix.getData(), lda, d_S,d_U, ldu, d_VT, ldvt, d_work, lwork, NULL, devInfo);
|
||||
|
||||
int devInfo_h = 0;
|
||||
cudaMemcpy(&devInfo_h, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
|
||||
if (devInfo_h != 0)
|
||||
{
|
||||
printf("Unsuccessful SVD execution\n");
|
||||
printf("Error code: %d\n", devInfo_h);
|
||||
}
|
||||
|
||||
float S[n] = {0};
|
||||
cudaMemcpy(S, d_S, sizeof(float)*n, cudaMemcpyDeviceToHost);
|
||||
|
||||
float resultValue = S[0];
|
||||
for (int i = 1; i < n; i++)
|
||||
{
|
||||
if (S[i] > resultValue)
|
||||
{
|
||||
resultValue = S[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (d_S ) cudaFree(d_S);
|
||||
if (d_U ) cudaFree(d_U);
|
||||
if (d_VT) cudaFree(d_VT);
|
||||
if (devInfo) cudaFree(devInfo);
|
||||
if (d_work) cudaFree(d_work);
|
||||
if (cusolverH) cusolverDnDestroy(cusolverH);
|
||||
if (stream) cudaStreamDestroy(stream);
|
||||
return resultValue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,7 +51,17 @@ namespace Aurora
|
||||
|
||||
CudaMatrix log(const CudaMatrix& aMatrix, int aBaseNum = -1);
|
||||
|
||||
CudaMatrix exp(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix mod(const CudaMatrix& aMatrix, float aValue);
|
||||
|
||||
CudaMatrix acos(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix acosd(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix conj(const CudaMatrix& aMatrix);
|
||||
|
||||
float norm(const CudaMatrix& aMatrix, NormMethod aNormMethod);
|
||||
|
||||
// ------compareSet----------------------------------------------------
|
||||
|
||||
|
||||
@@ -647,3 +647,156 @@ TEST_F(Function1D_Cuda_Test, compareSet)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, exp)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::exp(hostMatrix);
|
||||
auto result2 = Aurora::exp(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
hostMatrix = Aurora::Matrix::fromRawData(new float[12]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8,9.9,10,11,12}, 3, 2, 1,Aurora::Complex);
|
||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
result1 = Aurora::exp(hostMatrix);
|
||||
result2 = Aurora::exp(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, mod)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::mod(hostMatrix, 2);
|
||||
auto result2 = Aurora::mod(deviceMatrix, 2).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
result1 = Aurora::mod(hostMatrix, 3);
|
||||
result2 = Aurora::mod(deviceMatrix, 3).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, acos)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::acos(hostMatrix);
|
||||
auto result2 = Aurora::acos(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, acosd)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::acosd(hostMatrix);
|
||||
auto result2 = Aurora::acosd(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, conj)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::conj(hostMatrix);
|
||||
auto result2 = Aurora::conj(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
hostMatrix = Aurora::Matrix::fromRawData(new float[12]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8,9.9,10,11,12}, 3, 2, 1,Aurora::Complex);
|
||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
result1 = Aurora::conj(hostMatrix);
|
||||
result2 = Aurora::conj(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), result1.getDataSize());
|
||||
EXPECT_EQ(result2.getValueType(), result1.getValueType());
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_FLOAT_AE(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, norm) {
|
||||
//1Dim
|
||||
float *data = new float[3]{1,2,-3};
|
||||
auto matrix = Aurora::Matrix::fromRawData(data, 3);
|
||||
auto deviceMatrix = matrix.toDeviceMatrix();
|
||||
auto result = Aurora::norm(matrix.toDeviceMatrix(), Aurora::NormMethod::Norm1);
|
||||
EXPECT_FLOAT_AE(result,6);
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::Norm2);
|
||||
EXPECT_FLOAT_AE(result,3.74166);
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::NormF);
|
||||
EXPECT_FLOAT_AE(result,3.74166);
|
||||
|
||||
//2Dims
|
||||
data = new float[8]{1,2,-3,6,7,9,22.3,-8.6};
|
||||
matrix = Aurora::Matrix::fromRawData(data, 4,2);
|
||||
deviceMatrix = matrix.toDeviceMatrix();
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::Norm1);
|
||||
EXPECT_FLOAT_AE(result,46.9);
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::Norm2);
|
||||
EXPECT_FLOAT_AE(result,26.7284);
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::NormF);
|
||||
EXPECT_FLOAT_AE(result,27.4089);
|
||||
|
||||
//1Dim Complex
|
||||
data = new float[6]{1,2,-3,4,5,-6};
|
||||
matrix = Aurora::Matrix::fromRawData(data, 3,1,1,Aurora::Complex);
|
||||
deviceMatrix = matrix.toDeviceMatrix();
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::Norm1);
|
||||
EXPECT_FLOAT_AE(result,15.0463);
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::Norm2);
|
||||
EXPECT_FLOAT_AE(result,9.5394);
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::NormF);
|
||||
EXPECT_FLOAT_AE(result,9.5394);
|
||||
|
||||
//2Dims Complex
|
||||
data = new float[12]{1,2,-3,4,5,-6,7,8,9,22,24,25};
|
||||
matrix = Aurora::Matrix::fromRawData(data, 3,2,1,Aurora::Complex);
|
||||
deviceMatrix = matrix.toDeviceMatrix();
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::Norm1);
|
||||
EXPECT_FLOAT_AE(result,69.0553);
|
||||
//not support
|
||||
//result = Aurora::norm(matrix,Aurora::NormMethod::Norm2);
|
||||
//EXPECT_FLOAT_AE(result,43.5314);
|
||||
result = Aurora::norm(deviceMatrix,Aurora::NormMethod::NormF);
|
||||
EXPECT_FLOAT_AE(result,44.3847);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user