Cuda matrix compare and value getter and setter

2023-11-21 13:13:28 +08:00
parent 4edb2d133d
commit aaf8c1b193
5 changed files with 474 additions and 6 deletions
--- a/src/CudaMatrix.cpp
+++ b/src/CudaMatrix.cpp
@@ -912,5 +912,171 @@ bool CudaMatrix::setBlock(int aDim,int aBeginIndex, int aEndIndex, const CudaMat
        unaryNeg(aMatrix.getData(),out.getData(),aMatrix.getDataSize());
        return out;
    }
+
+    //----compare---------------------------------------------------
+    CudaMatrix CudaMatrix::operator>(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::G);
+        return out;
+    }
+    CudaMatrix operator>(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::G);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator>(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::G);
+        return out;
+    }
+
+    CudaMatrix CudaMatrix::operator<(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::L);
+        return out;
+    }
+    CudaMatrix operator<(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::L);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator<(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::L);
+        return out;
+    }
+
+    CudaMatrix CudaMatrix::operator>=(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::GE);
+        return out;
+    }
+
+    CudaMatrix operator>=(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::GE);
+        return out;
+    }
+
+    CudaMatrix CudaMatrix::operator>=(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::GE);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator<=(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::LE);
+        return out;
+    }
+    CudaMatrix operator<=(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::LE);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator<=(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::LE);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator==(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::E);
+        return out;
+    }
+    CudaMatrix operator==(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::E);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator==(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::E);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator!=(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::NE);
+        return out;
+    }
+    CudaMatrix operator!=(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::NE);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator!=(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::NE);
+        return out;
+    }
+    float CudaMatrix::getValue(size_t index){
+        float result;
+        cudaError_t cuda_error = cudaMemcpy(&result, getData() + index, sizeof(float), cudaMemcpyDeviceToHost);
+
+        if (cuda_error != cudaSuccess) {
+            fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error));
+            return nan("");
+        }
+        return result;
+    }
+    void CudaMatrix::setValue(size_t index, const float& value){
+        cudaError_t cuda_error = cudaMemcpy( getData() + index,&value, sizeof(float), cudaMemcpyHostToDevice);
+        if (cuda_error != cudaSuccess) {
+            fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error));
+        }
+    }
 }
 #endif // USE_CUDA
--- a/src/CudaMatrix.h
+++ b/src/CudaMatrix.h
@@ -113,8 +113,8 @@ namespace Aurora
        CudaMatrix operator!=(const CudaMatrix &aMatrix) const;

        // sub
-        float& operator[](size_t index);
-        float operator[](size_t index) const;
+        float getValue(size_t index);
+        void setValue(size_t index, const float& value);

        /**
         * 切块操作
--- a/src/CudaMatrixPrivate.cu
+++ b/src/CudaMatrixPrivate.cu
@@ -5,9 +5,9 @@
 #include <thrust/execution_policy.h>
 using namespace thrust::placeholders;

-struct PowOperator: public thrust::unary_function<float, float>{
+struct PowOp: public thrust::unary_function<float, float>{
    float exponent;
-    PowOperator(float v):exponent(v) {}
+    PowOp(float v):exponent(v) {}
    void setExponent(float v){
        exponent = v;
    }
@@ -18,6 +18,114 @@ struct PowOperator: public thrust::unary_function<float, float>{
    }
 };

+struct CompareGOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareGOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent<x?1.0:.0);
+    }
+};
+
+struct CompareGEOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareGEOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent<=x?1.0:.0);
+    }
+};
+
+struct CompareEOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareEOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent==x?1.0:.0);
+    }
+};
+
+struct CompareNEOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareNEOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent!=x?1.0:.0);
+    }
+};
+
+struct CompareLOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareLOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent>x?1.0:.0);
+    }
+};
+
+struct CompareLEOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareLEOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent>=x?1.0:.0);
+    }
+};
+
+
+struct CompareAGOp{
+    __host__ __device__
+    float operator()(const float& x,const float& y)  {
+        return x>y?1:0;
+    }
+};
+
+struct CompareAGEOp{
+    __host__ __device__
+    float operator()(const float& x,const float& y)  {
+        return x>=y?1:0;
+    }
+};
+
+struct CompareAEOp{
+    __host__ __device__
+    float operator()(const float& x,const float& y)  {
+        return x==y?1:0;
+    }
+};
+
+struct CompareANEOp{
+    __host__ __device__
+    float operator()(const float& x,const float& y)  {
+        return x!=y?1:0;
+    }
+};
+
+
 void unaryAdd(float* in1, float* in2, float* out, unsigned long  length)
 {
    thrust::plus<float> op;
@@ -88,7 +196,83 @@ void unaryPow(float* in1, float N,float* out, unsigned long length){
        thrust::transform(thrust::device,in1,in1+length,out,op);
        return;
    }
-    thrust::transform(thrust::device,in1,in1+length,out,PowOperator(N));
+    thrust::transform(thrust::device,in1,in1+length,out,PowOp(N));
+}
+
+void unaryCompare(float* in1, const float& in2, float* out, unsigned long length, int type){
+    switch (type)
+    {
+        case G:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareGOp(in2));
+            break;
+        case GE:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareGEOp(in2));
+            break;
+        case E:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareEOp(in2));
+            break;
+        case NE:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareNEOp(in2));
+            break;
+        case LE:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareLEOp(in2));
+            break;
+        case L:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareLOp(in2));
+            break;
+        default:
+            break;
+    }
+}
+void unaryCompare(const float& in1, float* in2, float* out, unsigned long length, int type){
+    switch (type)
+    {
+        case G:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareLOp(in1));
+            break;
+        case GE:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareLEOp(in1));
+            break;
+        case E:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareEOp(in1));
+            break;
+        case NE:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareNEOp(in1));
+            break;
+        case LE:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareGEOp(in1));
+            break;
+        case L:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareGOp(in1));
+            break;
+        default:
+            break;
+    }
+}
+void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type){
+    switch (type)
+    {
+        case G:
+            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGOp());
+            break;
+        case GE:
+            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGEOp());
+            break;
+        case E:
+            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAEOp());
+            break;
+        case NE:
+            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareANEOp());
+            break;
+        case LE:
+            thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGEOp());
+            break;
+        case L:
+            thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGOp());
+            break;
+        default:
+            break;
+    }
 }

 void thrustFill(float* aBegin, float* aEnd, float aValue)
--- a/src/CudaMatrixPrivate.cuh
+++ b/src/CudaMatrixPrivate.cuh
@@ -3,7 +3,12 @@
 #define __CUDAMATRIX_CUH__

 #include <complex>
-
+namespace{
+    enum CompareType
+    {
+        G,GE,E,NE,LE,L
+    };
+}
 void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
 void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
 void unaryMul(float* in1, float* in2, float* out, unsigned long length);
@@ -19,6 +24,12 @@ void unaryDiv(const float& in1, float* in2, float* out, unsigned long length);
 void unarySub(float* in1, const float& in2, float* out, unsigned long length);
 void unaryDiv(float* in1, const float& in2, float* out, unsigned long length);

+
+void unaryCompare(float* in1, const float& in2, float* out, unsigned long length,int type);
+void unaryCompare(const float& in1, float* in2, float* out, unsigned long length, int type);
+void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type);
+
+
 void thrustFill(float* aBegin, float* aEnd, float aValue);
 void thrustFill(float* aBegin, float* aEnd, std::complex<float> aValue);