From 81078bd69f824c6655bf8d8d30c22b71edaa1ca1 Mon Sep 17 00:00:00 2001 From: kradchen Date: Tue, 19 Dec 2023 13:12:20 +0800 Subject: [PATCH] Add std, and fix fft, ifft bug for cuda --- cmake/AuroraConfig.cmake | 9 +- src/Function2D.cu | 39 +- src/Function2D.cuh | 8 + test/CudaMatrix_Test.cpp | 727 ++++++++++++++++++++-------------- test/Function2D_Cuda_Test.cpp | 82 +++- 5 files changed, 552 insertions(+), 313 deletions(-) diff --git a/cmake/AuroraConfig.cmake b/cmake/AuroraConfig.cmake index 6bcbe4a..41ef2bd 100644 --- a/cmake/AuroraConfig.cmake +++ b/cmake/AuroraConfig.cmake @@ -1,6 +1,8 @@ set(MKL_INTERFACE_FULL intel_lp64) find_package(OpenMP REQUIRED) find_package(MKL CONFIG REQUIRED) +enable_language(CUDA) +find_package(CUDAToolkit REQUIRED) set(Aurora_MAJOR_VERSION 1) set(Aurora_MINOR_VERSION 0) @@ -9,12 +11,11 @@ set(Aurora_BUILD_VERSION 0) get_filename_component(Aurora_DIR "${CMAKE_CURRENT_LIST_DIR}/" PATH) message("Aurora_DIR: ${Aurora_DIR}") -file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/*.cpp") - +file(GLOB_RECURSE Aurora_Source "${Aurora_DIR}/src/[AFSC]*.cpp" "${Aurora_DIR}/src/Matrix*.cpp" "${Aurora_DIR}/src/*.cu") +message( ${Aurora_Source}) set(Aurora_INCLUDE_DIRS "${Aurora_DIR}/src" "${Aurora_DIR}/thirdparty/include" $) - set(Aurora_Complie_Options $ ) -set(Aurora_Libraries $ OpenMP::OpenMP_CXX) +set(Aurora_Libraries $ OpenMP::OpenMP_CXX ${CUDA_cublas_LIBRARY} ${CUDA_cusolver_LIBRARY}) set(Aurora_FOUND TRUE) message(Aurora Found) \ No newline at end of file diff --git a/src/Function2D.cu b/src/Function2D.cu index 25e817f..2669038 100644 --- a/src/Function2D.cu +++ b/src/Function2D.cu @@ -156,7 +156,6 @@ CudaMatrix Aurora::max(const CudaMatrix &aMatrix, FunctionDirection direction, l CudaMatrix vxmMax(CudaMatrix aVec, CudaMatrix aMat) { //col-vec x mat if (aVec.getDimSize(1) == 1 && aVec.getDimSize(0) == aMat.getDimSize(0)) { - std::cout<<"max mat and col-vec "< 1 || aMatrix.isComplex()) { + std::cerr + << (aMatrix.getDimSize(2) > 1 ? "std() not support 3D data!" : "std() not support complex value type!") + << std::endl; + return CudaMatrix(); + } + + auto src = aMatrix.isComplex() ? Aurora::abs(aMatrix) : aMatrix.deepCopy(); + int calc_size = src.getDimSize(0) == 1 ? src.getDimSize(1) : src.getDimSize(0); + auto meanM = Aurora::mean(src); + return sqrt(Aurora::sum((src-meanM)^2.0)/((float)calc_size-1.0f)); +} + template class RowElementIterator:public thrust::iterator_facade< RowElementIterator, @@ -1294,12 +1304,13 @@ __global__ void complexFillKernel(float* aInputData, float* aOutput,unsigned int for (int offset = 0; offset < aDesColEleCount; offset+=blockDim.x) { if(threadIdx.x + offset< aCopySize){ - aOutput[2*idx_d] = aInputData[idx_s]; - aOutput[2*idx_d + 1] = 0; + aOutput[2 * idx_d + offset * 2] = aInputData[idx_s + offset]; + aOutput[2 * idx_d + offset * 2 + 1] = 0; + } else if(threadIdx.x + offset< aDesColEleCount){ - aOutput[2*idx_d] = 0; - aOutput[2*idx_d + 1] = 0; + aOutput[2 * idx_d + offset * 2] = 0; + aOutput[2 * idx_d + offset * 2 + 1] = 0; } else{ return; @@ -1316,12 +1327,12 @@ __global__ void complexCopyKernel(float* aInputData, float* aOutput,unsigned int for (int offset = 0; offset < aDesColEleCount; offset+=blockDim.x) { if(threadIdx.x + offset< aCopySize){ - aOutput[2*idx_d] = aInputData[idx_s*2]; - aOutput[2*idx_d + 1] = aInputData[idx_s*2+1]; + aOutput[2*idx_d + offset * 2 ] = aInputData[idx_s*2 + offset*2]; + aOutput[2*idx_d + offset*2+ 1] = aInputData[idx_s*2+ offset*2+1]; } else if(threadIdx.x + offset< aDesColEleCount){ - aOutput[2*idx_d] = 0; - aOutput[2*idx_d + 1] = 0; + aOutput[2*idx_d + offset*2] = 0; + aOutput[2*idx_d + offset*2+ 1] = 0; } else{ return; @@ -1344,7 +1355,9 @@ if (aMatrix.isComplex()){ complexFillKernel<<>>(aMatrix.getData(), data, needCopySize, aMatrix.getDimSize(0),ColEleCount); } auto ret = Aurora::CudaMatrix::fromRawData(data,ColEleCount,aMatrix.getDimSize(1),1,Complex); + auto mm = ret.toHostMatrix(); ExecFFT(ret,0); + mm = ret.toHostMatrix(); return ret; } diff --git a/src/Function2D.cuh b/src/Function2D.cuh index 2a3e502..26c8428 100644 --- a/src/Function2D.cuh +++ b/src/Function2D.cuh @@ -26,6 +26,14 @@ namespace Aurora */ CudaMatrix mean(const CudaMatrix &aMatrix, FunctionDirection direction = Column); + /** + * @brief 标准差,只支持列方向 + * + * @param aMatrix + * @return CudaMatrix + */ + CudaMatrix std(const CudaMatrix &aMatrix); + CudaMatrix sort(const CudaMatrix &aMatrix,FunctionDirection direction = Column); CudaMatrix sort(CudaMatrix &&aMatrix,FunctionDirection direction = Column); diff --git a/test/CudaMatrix_Test.cpp b/test/CudaMatrix_Test.cpp index e84baf8..5bb8273 100644 --- a/test/CudaMatrix_Test.cpp +++ b/test/CudaMatrix_Test.cpp @@ -26,11 +26,12 @@ protected: } }; -TEST_F(CudaMatrix_Test, MatrixAdd) { +TEST_F(CudaMatrix_Test, MatrixAddScalar) { { - auto A = Aurora::zeros(1000,1,1); - auto B = Aurora::zeros(1000,1,1); - for (size_t i = 0; i < 1000; i++) + auto A = Aurora::zeros(257,257,1); + auto B = Aurora::zeros(257,257,1); + size_t size = 257*257; + for (size_t i = 0; i < size; i++) { A[i] = -1; B[i] = i; @@ -42,35 +43,48 @@ TEST_F(CudaMatrix_Test, MatrixAdd) { auto dB = B.toDeviceMatrix(); auto dC = (dA+dB); auto dhC = dC.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + EXPECT_EQ(257, dC.getDimSize(0)); + EXPECT_EQ(257, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + + for (size_t i = 0; i < size; i++) { - ASSERT_FLOAT_EQ(C[i],dhC[i]); + EXPECT_FLOAT_EQ(C[i],dhC[i]); } printf("Test CudaMatrix operator+(float aScalar) const \r\n"); //CudaMatrix operator+(float aScalar) const auto D = C+0.5; auto dD = dC+0.5; auto dhD = dD.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + EXPECT_EQ(257, dD.getDimSize(0)); + EXPECT_EQ(257, dD.getDimSize(1)); + EXPECT_EQ(1, dD.getDimSize(2)); + for (size_t i = 0; i < size; i++) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + EXPECT_FLOAT_EQ(D[i],dhD[i]); } printf("Test CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) \r\n"); // CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) dD = 0.5 + dC; dhD = dD.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + EXPECT_EQ(257, dD.getDimSize(0)); + EXPECT_EQ(257, dD.getDimSize(1)); + EXPECT_EQ(1, dD.getDimSize(2)); + for (size_t i = 0; i < size; i++) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + EXPECT_FLOAT_EQ(D[i],dhD[i]); } printf("Test CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) \r\n"); // CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) { auto dD2 = 0.5 + (dA+dB); dhD = dD2.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + EXPECT_EQ(257, dD2.getDimSize(0)); + EXPECT_EQ(257, dD2.getDimSize(1)); + EXPECT_EQ(1, dD2.getDimSize(2)); + for (size_t i = 0; i < size; i++) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + EXPECT_FLOAT_EQ(D[i],dhD[i]); } } printf("Test CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) \r\n"); @@ -79,9 +93,12 @@ TEST_F(CudaMatrix_Test, MatrixAdd) { auto dD2 = (dA+dB)+0.5; dhD = dD2.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + EXPECT_EQ(257, dD2.getDimSize(0)); + EXPECT_EQ(257, dD2.getDimSize(1)); + EXPECT_EQ(1, dD2.getDimSize(2)); + for (size_t i = 0; i < size; i++) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + EXPECT_FLOAT_EQ(D[i],dhD[i]); } } //CudaMatrix operator+(CudaMatrix &&aMatrix) const @@ -90,9 +107,12 @@ TEST_F(CudaMatrix_Test, MatrixAdd) { auto D = A+C; auto dD2 = dA+(dA+dB); dhD = dD2.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + EXPECT_EQ(257, dD2.getDimSize(0)); + EXPECT_EQ(257, dD2.getDimSize(1)); + EXPECT_EQ(1, dD2.getDimSize(2)); + for (size_t i = 0; i < size; i++) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + EXPECT_FLOAT_EQ(D[i],dhD[i]); } } //CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) @@ -101,9 +121,12 @@ TEST_F(CudaMatrix_Test, MatrixAdd) { auto D = A+C; auto dD2 = (dA+dB)+dA; dhD = dD2.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + EXPECT_EQ(257, dD2.getDimSize(0)); + EXPECT_EQ(257, dD2.getDimSize(1)); + EXPECT_EQ(1, dD2.getDimSize(2)); + for (size_t i = 0; i < size; i++) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + EXPECT_FLOAT_EQ(D[i],dhD[i]); } } } @@ -120,41 +143,135 @@ TEST_F(CudaMatrix_Test, MatrixAdd) { auto C = A+B; std::complex scalar(-1,-1); auto dA = A.toDeviceMatrix(); - + //complex matrix + complex scalar auto dC = (dA+scalar); + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); for (size_t i = 0; i < C.getDataSize()*2; i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } - + + //complex Matrix&& + complex scalar dC = A.toDeviceMatrix()+scalar; + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); for (size_t i = 0; i < C.getDataSize()*2; i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } - + //complex scalar + complex Matrix&& dC = scalar+A.toDeviceMatrix(); + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); for (size_t i = 0; i < C.getDataSize()*2; i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } + //complex scalar + complex matrix + dC = scalar+dA; + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); + } + //complex matrix + real scalar C = A+1; dC = (dA+1); + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); for (size_t i = 0; i < C.getDataSize()*2; i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i))<<"index"< scalar(-1,-1); + auto dA = A.toDeviceMatrix(); + //real matrix + complex scalar + auto dC = (dA+scalar); + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); + } + + //real Matrix&& + complex scalar + dC = A.toDeviceMatrix()+scalar; + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); + } + //complex scalar + real Matrix&& + dC = scalar+A.toDeviceMatrix(); + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); + } + + //complex scalar + real Matrix + dC = scalar+dA; + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(50, dC.getDimSize(0)); + EXPECT_EQ(100, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } } } @@ -162,64 +279,89 @@ TEST_F(CudaMatrix_Test, MatrixAdd) { TEST_F(CudaMatrix_Test, MatrixAddmm) { //real { - auto A = Aurora::zeros(100,100,1); - auto B = Aurora::zeros(100,100,1); - for (size_t i = 0; i < 10000; i++) - { - A[i] = -1; - B[i] = 8; - } + auto A = -Aurora::ones(4096,23519,1); + auto B = Aurora::ones(4096,23519,1)*8; auto C = B-1; auto dA = A.toDeviceMatrix(); auto dB = B.toDeviceMatrix(); - //Matrix& + Matrix&, col mode + //Matrix& + Matrix& auto dC = (dA+dB); + EXPECT_TRUE(!dC.isComplex()); + EXPECT_EQ(4096, dC.getDimSize(0)); + EXPECT_EQ(23519, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + #pragma omp parallel for for (size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } - //Matrix&& + Matrix&, col mode + //Matrix&& + Matrix& dC = A.toDeviceMatrix()+dB; - for (size_t i = 0; i < C.getDataSize(); i++) - { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); - } - //Matrix& + Matrix&&, col mode + EXPECT_TRUE(!dC.isComplex()); + EXPECT_EQ(4096, dC.getDimSize(0)); + EXPECT_EQ(23519, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + // #pragma omp parallel for + // for (size_t i = 0; i < C.getDataSize(); i++) + // { + // EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); + // } + //Matrix& + Matrix&& dC = dB+A.toDeviceMatrix(); - for (size_t i = 0; i < C.getDataSize(); i++) - { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); - } + EXPECT_TRUE(!dC.isComplex()); + EXPECT_EQ(4096, dC.getDimSize(0)); + EXPECT_EQ(23519, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + // #pragma omp parallel for + // for (size_t i = 0; i < C.getDataSize(); i++) + // { + // EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); + // } } //complex { - float *dataA = Aurora::random(100*100*2); - float *dataB = Aurora::random(100*100*2); + float *dataA = Aurora::random(4096*23519*2); + float *dataB = Aurora::random(4096*23519*2); - auto A = Aurora::Matrix::fromRawData(dataA, 100,100,1,Aurora::Complex); - auto B = Aurora::Matrix::fromRawData(dataB, 100,100,1,Aurora::Complex); + auto A = Aurora::Matrix::fromRawData(dataA, 4096,23519,1,Aurora::Complex); + auto B = Aurora::Matrix::fromRawData(dataB, 4096,23519,1,Aurora::Complex); auto C = B+A; auto dA = A.toDeviceMatrix(); auto dB = B.toDeviceMatrix(); //Matrix& + Matrix&, auto dC = (dA+dB); + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(4096, dC.getDimSize(0)); + EXPECT_EQ(23519, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + #pragma omp parallel for for (size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } //Matrix&& + Matrix&, dC = A.toDeviceMatrix()+dB; - for (size_t i = 0; i < C.getDataSize(); i++) - { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); - } + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(4096, dC.getDimSize(0)); + EXPECT_EQ(23519, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + // #pragma omp parallel for + // for (size_t i = 0; i < C.getDataSize(); i++) + // { + // EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); + // } //Matrix& + Matrix&&, dC = dB+A.toDeviceMatrix(); - for (size_t i = 0; i < C.getDataSize(); i++) - { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); - } + EXPECT_TRUE(dC.isComplex()); + EXPECT_EQ(4096, dC.getDimSize(0)); + EXPECT_EQ(23519, dC.getDimSize(1)); + EXPECT_EQ(1, dC.getDimSize(2)); + // #pragma omp parallel for + // for (size_t i = 0; i < C.getDataSize(); i++) + // { + // EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); + // } } } @@ -235,31 +377,31 @@ TEST_F(CudaMatrix_Test, MatrixAddmv) { auto dC = (dA+dB); for (size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } //Matrix&& + Matrix&, col mode dC = A.toDeviceMatrix()+dB; for (size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } //Matrix& + Matrix&&, col mode dC = dB+A.toDeviceMatrix(); for (size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } //Matrix& + vec&&, col mode dC = dA+B.toDeviceMatrix(); for (size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } //vec&& + Matrix&&, col mode dC = B.toDeviceMatrix()+dA; for (size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + EXPECT_FLOAT_EQ(C[i],dC.getValue(i)); } dB.forceReshape(1, 100, 1); @@ -267,13 +409,13 @@ TEST_F(CudaMatrix_Test, MatrixAddmv) { dC = (dA+dB); for (size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(C[i],dC.getValue(i))<<"index:"<dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2354,7 +2495,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (dA<=dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2362,7 +2503,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (dA>=dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2370,7 +2511,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (dA==dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2378,7 +2519,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (dA!=dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2386,7 +2527,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (9dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2402,7 +2543,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (9<=dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2410,7 +2551,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (9>=dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2418,7 +2559,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (9==dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } { @@ -2426,7 +2567,7 @@ TEST_F(CudaMatrix_Test, MatrixCompare){ auto dhR = (9!=dB).toHostMatrix(); for (size_t i = 0; i < 1000; i++) { - ASSERT_FLOAT_EQ(R[i],dhR[i]); + EXPECT_FLOAT_EQ(R[i],dhR[i]); } } } @@ -2445,7 +2586,7 @@ TEST_F(CudaMatrix_Test, matrixfunction) Aurora::Matrix block2 = A.toHostMatrix().block(0, 1, 2); for (size_t i = 0; i < block1.getDataSize(); i++) { - ASSERT_FLOAT_EQ(block1[i], block2[i]); + EXPECT_FLOAT_EQ(block1[i], block2[i]); } block2 = A.toHostMatrix(); @@ -2454,7 +2595,7 @@ TEST_F(CudaMatrix_Test, matrixfunction) block2.setBlockValue(0, 1, 2,-1); for (size_t i = 0; i < block1.getDataSize(); i++) { - ASSERT_FLOAT_EQ(block1[i], block2[i]); + EXPECT_FLOAT_EQ(block1[i], block2[i]); } Aurora::CudaMatrix C = Aurora::zeros(2,3).toDeviceMatrix(); @@ -2463,7 +2604,7 @@ TEST_F(CudaMatrix_Test, matrixfunction) block2 = C.toHostMatrix(); for(size_t i = 0; i < C.getDataSize(); i++) { - ASSERT_FLOAT_EQ(block1[i], block2[i]); + EXPECT_FLOAT_EQ(block1[i], block2[i]); } } diff --git a/test/Function2D_Cuda_Test.cpp b/test/Function2D_Cuda_Test.cpp index e9f7b0e..b340214 100644 --- a/test/Function2D_Cuda_Test.cpp +++ b/test/Function2D_Cuda_Test.cpp @@ -39,7 +39,7 @@ TEST_F(Function2D_Cuda_Test, min) B = Aurora::Matrix::fromRawData(dataB, 4096, 41472); dB = B.toDeviceMatrix(); long r,c; - + // column auto ret1 = Aurora::min(B, Aurora::Column,r,c); auto ret2 = Aurora::min(dB, Aurora::Column,r,c); @@ -53,7 +53,7 @@ TEST_F(Function2D_Cuda_Test, min) ASSERT_FLOAT_EQ(ret1[i], ret2.getValue(i))<<", index at :"<