diff --git a/src/CudaMatrix.cpp b/src/CudaMatrix.cpp index 2e97443..f21c25e 100644 --- a/src/CudaMatrix.cpp +++ b/src/CudaMatrix.cpp @@ -51,7 +51,7 @@ float CudaMatrix::getScalar() const { if (isNull()) return 0.0; if (isNull()) return 0.0; - return getData()[0]; + return getValue(0); } bool CudaMatrix::isVector() const @@ -916,5 +916,171 @@ bool CudaMatrix::setBlock(int aDim,int aBeginIndex, int aEndIndex, const CudaMat unaryNeg(aMatrix.getData(),out.getData(),aMatrix.getDataSize()); return out; } + + //----compare--------------------------------------------------- + CudaMatrix CudaMatrix::operator>(float aScalar) const{ + float* data = nullptr; + unsigned long long size = this->getDataSize() * this->getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); + unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::G); + return out; + } + CudaMatrix operator>(float aScalar, const CudaMatrix &aMatrix){ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::G); + return out; + } + CudaMatrix CudaMatrix::operator>(const CudaMatrix &aMatrix) const{ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::G); + return out; + } + + CudaMatrix CudaMatrix::operator<(float aScalar) const{ + float* data = nullptr; + unsigned long long size = this->getDataSize() * this->getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); + unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::L); + return out; + } + CudaMatrix operator<(float aScalar, const CudaMatrix &aMatrix){ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::L); + return out; + } + CudaMatrix CudaMatrix::operator<(const CudaMatrix &aMatrix) const{ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::L); + return out; + } + + CudaMatrix CudaMatrix::operator>=(float aScalar) const{ + float* data = nullptr; + unsigned long long size = this->getDataSize() * this->getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); + unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::GE); + return out; + } + + CudaMatrix operator>=(float aScalar, const CudaMatrix &aMatrix){ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::GE); + return out; + } + + CudaMatrix CudaMatrix::operator>=(const CudaMatrix &aMatrix) const{ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::GE); + return out; + } + CudaMatrix CudaMatrix::operator<=(float aScalar) const{ + float* data = nullptr; + unsigned long long size = this->getDataSize() * this->getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); + unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::LE); + return out; + } + CudaMatrix operator<=(float aScalar, const CudaMatrix &aMatrix){ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::LE); + return out; + } + CudaMatrix CudaMatrix::operator<=(const CudaMatrix &aMatrix) const{ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::LE); + return out; + } + CudaMatrix CudaMatrix::operator==(float aScalar) const{ + float* data = nullptr; + unsigned long long size = this->getDataSize() * this->getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); + unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::E); + return out; + } + CudaMatrix operator==(float aScalar, const CudaMatrix &aMatrix){ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::E); + return out; + } + CudaMatrix CudaMatrix::operator==(const CudaMatrix &aMatrix) const{ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::E); + return out; + } + CudaMatrix CudaMatrix::operator!=(float aScalar) const{ + float* data = nullptr; + unsigned long long size = this->getDataSize() * this->getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType()); + unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::NE); + return out; + } + CudaMatrix operator!=(float aScalar, const CudaMatrix &aMatrix){ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::NE); + return out; + } + CudaMatrix CudaMatrix::operator!=(const CudaMatrix &aMatrix) const{ + float* data = nullptr; + unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType()); + unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::NE); + return out; + } + float CudaMatrix::getValue(size_t index){ + float result; + cudaError_t cuda_error = cudaMemcpy(&result, getData() + index, sizeof(float), cudaMemcpyDeviceToHost); + + if (cuda_error != cudaSuccess) { + fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error)); + return nan(""); + } + return result; + } + void CudaMatrix::setValue(size_t index, const float& value){ + cudaError_t cuda_error = cudaMemcpy( getData() + index,&value, sizeof(float), cudaMemcpyHostToDevice); + if (cuda_error != cudaSuccess) { + fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error)); + } + } } #endif // USE_CUDA diff --git a/src/CudaMatrix.h b/src/CudaMatrix.h index d818526..5daec9a 100644 --- a/src/CudaMatrix.h +++ b/src/CudaMatrix.h @@ -113,8 +113,8 @@ namespace Aurora CudaMatrix operator!=(const CudaMatrix &aMatrix) const; // sub - float& operator[](size_t index); - float operator[](size_t index) const; + float getValue(size_t index); + void setValue(size_t index, const float& value); /** * 切块操作 diff --git a/src/CudaMatrixPrivate.cu b/src/CudaMatrixPrivate.cu index 8feb8b7..b6b5ac0 100644 --- a/src/CudaMatrixPrivate.cu +++ b/src/CudaMatrixPrivate.cu @@ -5,9 +5,9 @@ #include using namespace thrust::placeholders; -struct PowOperator: public thrust::unary_function{ +struct PowOp: public thrust::unary_function{ float exponent; - PowOperator(float v):exponent(v) {} + PowOp(float v):exponent(v) {} void setExponent(float v){ exponent = v; } @@ -18,6 +18,114 @@ struct PowOperator: public thrust::unary_function{ } }; +struct CompareGOp: public thrust::unary_function{ + float exponent; + CompareGOp(float v):exponent(v) {} + void setExponent(float v){ + exponent = v; + } + + __host__ __device__ + float operator()(const float& x) { + return (exponent{ + float exponent; + CompareGEOp(float v):exponent(v) {} + void setExponent(float v){ + exponent = v; + } + + __host__ __device__ + float operator()(const float& x) { + return (exponent<=x?1.0:.0); + } +}; + +struct CompareEOp: public thrust::unary_function{ + float exponent; + CompareEOp(float v):exponent(v) {} + void setExponent(float v){ + exponent = v; + } + + __host__ __device__ + float operator()(const float& x) { + return (exponent==x?1.0:.0); + } +}; + +struct CompareNEOp: public thrust::unary_function{ + float exponent; + CompareNEOp(float v):exponent(v) {} + void setExponent(float v){ + exponent = v; + } + + __host__ __device__ + float operator()(const float& x) { + return (exponent!=x?1.0:.0); + } +}; + +struct CompareLOp: public thrust::unary_function{ + float exponent; + CompareLOp(float v):exponent(v) {} + void setExponent(float v){ + exponent = v; + } + + __host__ __device__ + float operator()(const float& x) { + return (exponent>x?1.0:.0); + } +}; + +struct CompareLEOp: public thrust::unary_function{ + float exponent; + CompareLEOp(float v):exponent(v) {} + void setExponent(float v){ + exponent = v; + } + + __host__ __device__ + float operator()(const float& x) { + return (exponent>=x?1.0:.0); + } +}; + + +struct CompareAGOp{ + __host__ __device__ + float operator()(const float& x,const float& y) { + return x>y?1:0; + } +}; + +struct CompareAGEOp{ + __host__ __device__ + float operator()(const float& x,const float& y) { + return x>=y?1:0; + } +}; + +struct CompareAEOp{ + __host__ __device__ + float operator()(const float& x,const float& y) { + return x==y?1:0; + } +}; + +struct CompareANEOp{ + __host__ __device__ + float operator()(const float& x,const float& y) { + return x!=y?1:0; + } +}; + + void unaryAdd(float* in1, float* in2, float* out, unsigned long length) { thrust::plus op; @@ -88,7 +196,83 @@ void unaryPow(float* in1, float N,float* out, unsigned long length){ thrust::transform(thrust::device,in1,in1+length,out,op); return; } - thrust::transform(thrust::device,in1,in1+length,out,PowOperator(N)); + thrust::transform(thrust::device,in1,in1+length,out,PowOp(N)); +} + +void unaryCompare(float* in1, const float& in2, float* out, unsigned long length, int type){ + switch (type) + { + case G: + thrust::transform(thrust::device,in1,in1+length,out,CompareGOp(in2)); + break; + case GE: + thrust::transform(thrust::device,in1,in1+length,out,CompareGEOp(in2)); + break; + case E: + thrust::transform(thrust::device,in1,in1+length,out,CompareEOp(in2)); + break; + case NE: + thrust::transform(thrust::device,in1,in1+length,out,CompareNEOp(in2)); + break; + case LE: + thrust::transform(thrust::device,in1,in1+length,out,CompareLEOp(in2)); + break; + case L: + thrust::transform(thrust::device,in1,in1+length,out,CompareLOp(in2)); + break; + default: + break; + } +} +void unaryCompare(const float& in1, float* in2, float* out, unsigned long length, int type){ + switch (type) + { + case G: + thrust::transform(thrust::device,in2,in2+length,out,CompareLOp(in1)); + break; + case GE: + thrust::transform(thrust::device,in2,in2+length,out,CompareLEOp(in1)); + break; + case E: + thrust::transform(thrust::device,in2,in2+length,out,CompareEOp(in1)); + break; + case NE: + thrust::transform(thrust::device,in2,in2+length,out,CompareNEOp(in1)); + break; + case LE: + thrust::transform(thrust::device,in2,in2+length,out,CompareGEOp(in1)); + break; + case L: + thrust::transform(thrust::device,in2,in2+length,out,CompareGOp(in1)); + break; + default: + break; + } +} +void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type){ + switch (type) + { + case G: + thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGOp()); + break; + case GE: + thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGEOp()); + break; + case E: + thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAEOp()); + break; + case NE: + thrust::transform(thrust::device,in1,in1+length,in2,out,CompareANEOp()); + break; + case LE: + thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGEOp()); + break; + case L: + thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGOp()); + break; + default: + break; + } } void thrustFill(float* aBegin, float* aEnd, float aValue) diff --git a/src/CudaMatrixPrivate.cuh b/src/CudaMatrixPrivate.cuh index 2853310..8b60081 100644 --- a/src/CudaMatrixPrivate.cuh +++ b/src/CudaMatrixPrivate.cuh @@ -3,7 +3,12 @@ #define __CUDAMATRIX_CUH__ #include - +namespace{ + enum CompareType + { + G,GE,E,NE,LE,L + }; +} void unaryAdd(float* in1, float* in2, float* out, unsigned long length); void unaryAdd(float* in1, const float& in2, float* out, unsigned long length); void unaryMul(float* in1, float* in2, float* out, unsigned long length); @@ -19,6 +24,12 @@ void unaryDiv(const float& in1, float* in2, float* out, unsigned long length); void unarySub(float* in1, const float& in2, float* out, unsigned long length); void unaryDiv(float* in1, const float& in2, float* out, unsigned long length); + +void unaryCompare(float* in1, const float& in2, float* out, unsigned long length,int type); +void unaryCompare(const float& in1, float* in2, float* out, unsigned long length, int type); +void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type); + + void thrustFill(float* aBegin, float* aEnd, float aValue); void thrustFill(float* aBegin, float* aEnd, std::complex aValue); diff --git a/test/CudaMatrix_Test.cpp b/test/CudaMatrix_Test.cpp index 6321dce..7e894d0 100644 --- a/test/CudaMatrix_Test.cpp +++ b/test/CudaMatrix_Test.cpp @@ -425,6 +425,113 @@ TEST_F(CudaMatrix_Test, MatrixNeg){ } } +TEST_F(CudaMatrix_Test, MatrixCompare){ + auto A = Aurora::zeros(1000,1,1); + auto B = Aurora::zeros(1000,1,1); + for (size_t i = 0; i < 1000; i++) + { + A[i] = -1+0.2*i; + } + auto dA= A.toDeviceMatrix(); + auto dB= B.toDeviceMatrix(); + { + auto R= (AB); + auto dhR = (dA>dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (A<=B); + auto dhR = (dA<=dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (A>=B); + auto dhR = (dA>=dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (A==B); + auto dhR = (dA==dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (A!=B); + auto dhR = (dA!=dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (9B); + auto dhR = (9>dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (9<=B); + auto dhR = (9<=dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (9>=B); + auto dhR = (9>=dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (9==B); + auto dhR = (9==dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } + { + auto R= (9!=B); + auto dhR = (9!=dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(R[i],dhR[i]); + } + } +} + TEST_F(CudaMatrix_Test, matrixfunction) {