diff --git a/src/CudaMatrix.cpp b/src/CudaMatrix.cpp index 092a2a7..8df61c0 100644 --- a/src/CudaMatrix.cpp +++ b/src/CudaMatrix.cpp @@ -490,107 +490,8 @@ bool CudaMatrix::setBlock(int aDim,int aBeginIndex, int aEndIndex, const CudaMat } } } - -CudaMatrix CudaMatrix::operator+(float aScalar) const{ - if (isComplex()) - { - std::cerr<<"Complex matrix not support operator+(float aScalar)"<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { - std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),out.getData(),this->getDataSize()); - return out; -} - -CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{ - if (this->getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { - std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); - return aMatrix; -} - - -CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){ - if (aOther.getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() != aMatrix.getDataSize()) { + std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() + <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { + std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") + <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getData(),aMatrix.getData(),out.getData(),this->getDataSize()); + return out; + } + + CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{ + if (this->getDataSize() != aMatrix.getDataSize()) { + std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() + <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { + std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") + <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); + return aMatrix; + } + + + CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){ + if (aOther.getDataSize() != aMatrix.getDataSize()) { + std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() + std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { - std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") + std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() + std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<getDataSize() <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { - std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") + std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<isComplex()) + { + std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") + <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { + std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<getDataSize() + <<" and the matrix1 size is "<getData(),aMatrix.getData(),out.getData(),aMatrix.getDataSize()); + return out; + } + CudaMatrix CudaMatrix::operator-(CudaMatrix &&aMatrix) const{ + if (aMatrix.isComplex()!=this->isComplex()) + { + std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") + <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { + std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<getDataSize() + <<" and the matrix1 size is "<getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize()); + return aMatrix; + } + CudaMatrix operator-(CudaMatrix &&aMatrix,CudaMatrix &aOther){ + if (aMatrix.isComplex()!=aOther.isComplex()) + { + std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real") + <<" and the matrix1 type is "<<(aOther.isComplex()?"Complex":"Real")<isComplex()) + { + std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") + <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { + std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<getDataSize() + <<" and the matrix1 size is "<getData(),aMatrix.getData(),out.getData(),aMatrix.getDataSize()); + return out; + } + CudaMatrix CudaMatrix::operator/(CudaMatrix &&aMatrix) const{ + if (aMatrix.isComplex()!=this->isComplex()) + { + std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") + <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { + std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<getDataSize() + <<" and the matrix1 size is "<getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize()); + return aMatrix; + } + CudaMatrix operator/(CudaMatrix &&aMatrix, CudaMatrix &aOther){ + if (aMatrix.isComplex()!=aOther.isComplex()) + { + std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real") + <<" and the matrix1 type is "<<(aOther.isComplex()?"Complex":"Real")<{ void setExponent(float v){ exponent = v; } + __host__ __device__ float operator()(const float& x) { return powf(x, exponent); @@ -74,12 +75,12 @@ void unaryDiv(float* in1, const float& in2, float* out, unsigned long length){ void unaryPow(float* in1, float N,float* out, unsigned long length){ if (N == 0.0f) { - thrust::fill(out,out+length,0); + thrust::fill(thrust::device,out,out+length,1); return; } if (N == 1.0f) { - thrust::copy(in1,in1+length,out); + thrust::copy(thrust::device,in1,in1+length,out); return; } if (N == 2.0f){ diff --git a/test/CudaMatrix_Test.cpp b/test/CudaMatrix_Test.cpp index ab0972d..6321dce 100644 --- a/test/CudaMatrix_Test.cpp +++ b/test/CudaMatrix_Test.cpp @@ -186,6 +186,246 @@ TEST_F(CudaMatrix_Test, MatrixMul) { } } +TEST_F(CudaMatrix_Test, MatrixSub) { + auto A = Aurora::zeros(1000,1,1); + auto B = Aurora::zeros(1000,1,1); + for (size_t i = 0; i < 1000; i++) + { + A[i] = -1; + B[i] = i; + } + printf("Test CudaMatrix operator-(const CudaMatrix &aMatrix) const \r\n"); + //CudaMatrix operator+(const CudaMatrix &aMatrix) const + auto C = A-B; + auto dA = A.toDeviceMatrix(); + auto dB = B.toDeviceMatrix(); + auto dC = (dA-dB); + auto dhC = dC.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(C[i],dhC[i]); + } + printf("Test CudaMatrix operator-(float aScalar) const \r\n"); + //CudaMatrix operator+(float aScalar) const + auto D = C-0.5; + auto dD = dC-0.5; + auto dhD = dD.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + printf("Test CudaMatrix operator-(float aScalar, const CudaMatrix &aMatrix) \r\n"); + // CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) + D = 0.5-C; + dD = 0.5 - dC; + dhD = dD.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + printf("Test CudaMatrix &operator-(float aScalar, CudaMatrix &&aMatrix) \r\n"); + // CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) + { + auto dD2 = 0.5 - (dA-dB); + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } + printf("Test CudaMatrix &operator-(CudaMatrix &&aMatrix, float aScalar) \r\n"); + // CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) + { + auto E = C-0.5; + auto dE2 = (dA-dB)-0.5; + auto dhE = dE2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(E[i],dhE[i]); + } + } + //CudaMatrix operator+(CudaMatrix &&aMatrix) const + printf("Test CudaMatrix operator-(CudaMatrix &&aMatrix) const \r\n"); + { + auto D = A-C; + auto dD2 = dA-(dA-dB); + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } + //CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) + printf("Test CudaMatrix operator-(CudaMatrix &&aMatrix,CudaMatrix &aOther) \r\n"); + { + auto D = C-A; + auto dD2 = (dA-dB)-dA; + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } +} + +TEST_F(CudaMatrix_Test, MatrixDiv) { + auto A = Aurora::zeros(1000,1,1); + auto B = Aurora::zeros(1000,1,1); + for (size_t i = 0; i < 1000; i++) + { + A[i] = -1; + B[i] = i; + } + printf("Test CudaMatrix operator/(const CudaMatrix &aMatrix) const \r\n"); + //CudaMatrix operator+(const CudaMatrix &aMatrix) const + auto C = A/B; + auto dA = A.toDeviceMatrix(); + auto dB = B.toDeviceMatrix(); + auto dC = (dA/dB); + auto dhC = dC.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(C[i],dhC[i]); + } + printf("Test CudaMatrix operator/(float aScalar) const \r\n"); + //CudaMatrix operator+(float aScalar) const + auto D = C/0.5; + auto dD = dC/0.5; + auto dhD = dD.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + printf("Test CudaMatrix operator/(float aScalar, const CudaMatrix &aMatrix) \r\n"); + // CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) + D = 0.5/C; + dD = 0.5 / dC; + dhD = dD.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + printf("Test CudaMatrix &operator/(float aScalar, CudaMatrix &&aMatrix) \r\n"); + // CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) + { + auto dD2 = 0.5 / (dA/dB); + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } + printf("Test CudaMatrix &operator/(CudaMatrix &&aMatrix, float aScalar) \r\n"); + // CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) + { + auto E = C/0.5; + auto dE2 = (dA/dB)/0.5; + auto dhE = dE2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(E[i],dhE[i]); + } + } + //CudaMatrix operator+(CudaMatrix &&aMatrix) const + printf("Test CudaMatrix operator/(CudaMatrix &&aMatrix) const \r\n"); + { + auto D = A/C; + auto dD2 = dA/(dA/dB); + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } + //CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) + printf("Test CudaMatrix operator/(CudaMatrix &&aMatrix,CudaMatrix &aOther) \r\n"); + { + auto D = C/A; + auto dD2 = (dA/dB)/dA; + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } +} + +TEST_F(CudaMatrix_Test, MatrixPow){ + auto A = Aurora::zeros(1000,1,1); + auto B = Aurora::zeros(1000,1,1); + for (size_t i = 0; i < 1000; i++) + { + A[i] = -1+0.2*i; + } + auto dA= A.toDeviceMatrix(); + { + auto R = A^0; + auto dR = dA^0; + auto dhR = dR.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R = A^1; + auto dR = dA^1; + auto dhR = dR.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R = A^2; + auto dR = dA^2; + auto dhR = dR.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R = A^3; + auto dR = dA^3; + auto dhR = dR.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R = A^5; + auto dR = dA^5; + auto dhR = dR.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + +} + +TEST_F(CudaMatrix_Test, MatrixNeg){ + auto A = Aurora::zeros(1000,1,1); + auto B = Aurora::zeros(1000,1,1); + for (size_t i = 0; i < 1000; i++) + { + A[i] = -1+0.2*i; + } + auto dA= A.toDeviceMatrix(); + { + auto R = -A; + auto dR = -dA; + auto dhR = dR.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } +} + + TEST_F(CudaMatrix_Test, matrixfunction) { printf("Test CudaMatrix block(int aDim,int aBeginIndx, int aEndIndex) const\r\n");