diff --git a/src/CudaMatrix.cpp b/src/CudaMatrix.cpp
index 2e97443..f21c25e 100644
--- a/src/CudaMatrix.cpp
+++ b/src/CudaMatrix.cpp
@@ -51,7 +51,7 @@ float CudaMatrix::getScalar() const
 {
     if (isNull()) return 0.0;
     if (isNull()) return 0.0;
-    return getData()[0];
+    return getValue(0);
 }
 
 bool CudaMatrix::isVector() const
@@ -916,5 +916,171 @@ bool CudaMatrix::setBlock(int aDim,int aBeginIndex, int aEndIndex, const CudaMat
         unaryNeg(aMatrix.getData(),out.getData(),aMatrix.getDataSize());
         return out;
     }
+
+    //----compare---------------------------------------------------
+    CudaMatrix CudaMatrix::operator>(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::G);
+        return out;
+    }
+    CudaMatrix operator>(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::G);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator>(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::G);
+        return out;
+    }
+
+    CudaMatrix CudaMatrix::operator<(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::L);
+        return out;
+    }
+    CudaMatrix operator<(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::L);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator<(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::L);
+        return out;
+    }
+
+    CudaMatrix CudaMatrix::operator>=(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::GE);
+        return out;
+    }
+
+    CudaMatrix operator>=(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::GE);
+        return out;
+    }
+
+    CudaMatrix CudaMatrix::operator>=(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::GE);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator<=(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::LE);
+        return out;
+    }
+    CudaMatrix operator<=(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::LE);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator<=(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::LE);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator==(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::E);
+        return out;
+    }
+    CudaMatrix operator==(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::E);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator==(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::E);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator!=(float aScalar) const{
+        float* data = nullptr;
+        unsigned long long size =  this->getDataSize() * this->getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
+        unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::NE);
+        return out;
+    }
+    CudaMatrix operator!=(float aScalar, const CudaMatrix &aMatrix){
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::NE);
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator!=(const CudaMatrix &aMatrix) const{
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::NE);
+        return out;
+    }
+    float CudaMatrix::getValue(size_t index){
+        float result;
+        cudaError_t cuda_error = cudaMemcpy(&result, getData() + index, sizeof(float), cudaMemcpyDeviceToHost);
+
+        if (cuda_error != cudaSuccess) {
+            fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error));
+            return nan("");
+        }
+        return result;
+    }
+    void CudaMatrix::setValue(size_t index, const float& value){
+        cudaError_t cuda_error = cudaMemcpy( getData() + index,&value, sizeof(float), cudaMemcpyHostToDevice);
+        if (cuda_error != cudaSuccess) {
+            fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error));
+        }
+    }
 }
 #endif // USE_CUDA
diff --git a/src/CudaMatrix.h b/src/CudaMatrix.h
index d818526..5daec9a 100644
--- a/src/CudaMatrix.h
+++ b/src/CudaMatrix.h
@@ -113,8 +113,8 @@ namespace Aurora
         CudaMatrix operator!=(const CudaMatrix &aMatrix) const;
 
         // sub
-        float& operator[](size_t index);
-        float operator[](size_t index) const;
+        float getValue(size_t index);
+        void setValue(size_t index, const float& value);
 
         /**
          * 切块操作
diff --git a/src/CudaMatrixPrivate.cu b/src/CudaMatrixPrivate.cu
index 8feb8b7..b6b5ac0 100644
--- a/src/CudaMatrixPrivate.cu
+++ b/src/CudaMatrixPrivate.cu
@@ -5,9 +5,9 @@
 #include <thrust/execution_policy.h>
 using namespace thrust::placeholders;
 
-struct PowOperator: public thrust::unary_function<float, float>{
+struct PowOp: public thrust::unary_function<float, float>{
     float exponent;
-    PowOperator(float v):exponent(v) {}
+    PowOp(float v):exponent(v) {}
     void setExponent(float v){
         exponent = v;
     }
@@ -18,6 +18,114 @@ struct PowOperator: public thrust::unary_function<float, float>{
     }
 };
 
+struct CompareGOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareGOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent<x?1.0:.0);
+    }
+};
+
+struct CompareGEOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareGEOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent<=x?1.0:.0);
+    }
+};
+
+struct CompareEOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareEOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent==x?1.0:.0);
+    }
+};
+
+struct CompareNEOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareNEOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent!=x?1.0:.0);
+    }
+};
+
+struct CompareLOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareLOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent>x?1.0:.0);
+    }
+};
+
+struct CompareLEOp: public thrust::unary_function<float, float>{
+    float exponent;
+    CompareLEOp(float v):exponent(v) {}
+    void setExponent(float v){
+        exponent = v;
+    }
+    
+    __host__ __device__
+    float operator()(const float& x)  {
+        return (exponent>=x?1.0:.0);
+    }
+};
+
+
+struct CompareAGOp{
+    __host__ __device__
+    float operator()(const float& x,const float& y)  {
+        return x>y?1:0;
+    }
+};
+
+struct CompareAGEOp{
+    __host__ __device__
+    float operator()(const float& x,const float& y)  {
+        return x>=y?1:0;
+    }
+};
+
+struct CompareAEOp{
+    __host__ __device__
+    float operator()(const float& x,const float& y)  {
+        return x==y?1:0;
+    }
+};
+
+struct CompareANEOp{
+    __host__ __device__
+    float operator()(const float& x,const float& y)  {
+        return x!=y?1:0;
+    }
+};
+
+
 void unaryAdd(float* in1, float* in2, float* out, unsigned long  length)
 {
     thrust::plus<float> op;
@@ -88,7 +196,83 @@ void unaryPow(float* in1, float N,float* out, unsigned long length){
         thrust::transform(thrust::device,in1,in1+length,out,op);
         return;
     }
-    thrust::transform(thrust::device,in1,in1+length,out,PowOperator(N));
+    thrust::transform(thrust::device,in1,in1+length,out,PowOp(N));
+}
+
+void unaryCompare(float* in1, const float& in2, float* out, unsigned long length, int type){
+    switch (type)
+    {
+        case G:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareGOp(in2));
+            break;
+        case GE:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareGEOp(in2));
+            break;
+        case E:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareEOp(in2));
+            break;
+        case NE:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareNEOp(in2));
+            break;
+        case LE:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareLEOp(in2));
+            break;
+        case L:
+            thrust::transform(thrust::device,in1,in1+length,out,CompareLOp(in2));
+            break;
+        default:
+            break;
+    }
+}
+void unaryCompare(const float& in1, float* in2, float* out, unsigned long length, int type){
+    switch (type)
+    {
+        case G:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareLOp(in1));
+            break;
+        case GE:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareLEOp(in1));
+            break;
+        case E:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareEOp(in1));
+            break;
+        case NE:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareNEOp(in1));
+            break;
+        case LE:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareGEOp(in1));
+            break;
+        case L:
+            thrust::transform(thrust::device,in2,in2+length,out,CompareGOp(in1));
+            break;
+        default:
+            break;
+    }
+}
+void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type){
+    switch (type)
+    {
+        case G:
+            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGOp());
+            break;
+        case GE:
+            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAGEOp());
+            break;
+        case E:
+            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareAEOp());
+            break;
+        case NE:
+            thrust::transform(thrust::device,in1,in1+length,in2,out,CompareANEOp());
+            break;
+        case LE:
+            thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGEOp());
+            break;
+        case L:
+            thrust::transform(thrust::device,in2,in2+length,in1,out,CompareAGOp());
+            break;
+        default:
+            break;
+    }
 }
 
 void thrustFill(float* aBegin, float* aEnd, float aValue)
diff --git a/src/CudaMatrixPrivate.cuh b/src/CudaMatrixPrivate.cuh
index 2853310..8b60081 100644
--- a/src/CudaMatrixPrivate.cuh
+++ b/src/CudaMatrixPrivate.cuh
@@ -3,7 +3,12 @@
 #define __CUDAMATRIX_CUH__
 
 #include <complex>
-
+namespace{
+    enum CompareType
+    {
+        G,GE,E,NE,LE,L
+    };
+}
 void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
 void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
 void unaryMul(float* in1, float* in2, float* out, unsigned long length);
@@ -19,6 +24,12 @@ void unaryDiv(const float& in1, float* in2, float* out, unsigned long length);
 void unarySub(float* in1, const float& in2, float* out, unsigned long length);
 void unaryDiv(float* in1, const float& in2, float* out, unsigned long length);
 
+
+void unaryCompare(float* in1, const float& in2, float* out, unsigned long length,int type);
+void unaryCompare(const float& in1, float* in2, float* out, unsigned long length, int type);
+void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type);
+
+
 void thrustFill(float* aBegin, float* aEnd, float aValue);
 void thrustFill(float* aBegin, float* aEnd, std::complex<float> aValue);
 
diff --git a/test/CudaMatrix_Test.cpp b/test/CudaMatrix_Test.cpp
index 6321dce..7e894d0 100644
--- a/test/CudaMatrix_Test.cpp
+++ b/test/CudaMatrix_Test.cpp
@@ -425,6 +425,113 @@ TEST_F(CudaMatrix_Test, MatrixNeg){
     }
 }
 
+TEST_F(CudaMatrix_Test, MatrixCompare){
+    auto A = Aurora::zeros(1000,1,1);
+    auto B = Aurora::zeros(1000,1,1);
+    for (size_t i = 0; i < 1000; i++)
+    {
+        A[i] = -1+0.2*i;
+    }
+    auto dA= A.toDeviceMatrix();
+    auto dB= B.toDeviceMatrix();
+    {
+        auto R= (A<B);
+        auto dhR = (dA<dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (A>B);
+        auto dhR = (dA>dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }    
+    {
+        auto R= (A<=B);
+        auto dhR = (dA<=dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (A>=B);
+        auto dhR = (dA>=dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (A==B);
+        auto dhR = (dA==dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (A!=B);
+        auto dhR = (dA!=dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9<B);
+        auto dhR = (9<dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9>B);
+        auto dhR = (9>dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9<=B);
+        auto dhR = (9<=dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9>=B);
+        auto dhR = (9>=dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9==B);
+        auto dhR = (9==dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+    {
+        auto R= (9!=B);
+        auto dhR = (9!=dB).toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            ASSERT_FLOAT_EQ(R[i],dhR[i]);
+        }
+    }
+}
+
 
 TEST_F(CudaMatrix_Test, matrixfunction)
 {