Fix UnitTest add cudamatrix add and mul

2023-11-01 14:31:29 +08:00
parent fe0abf8ee6
commit 029b86013e
13 changed files with 1108 additions and 617 deletions
--- a/src/CudaMatrix.cpp
+++ b/src/CudaMatrix.cpp
@@ -9,7 +9,7 @@
 #include <cuda_runtime.h>
 #include "CudaMatrixPrivate.cuh"

-using namespace Aurora;
+namespace Aurora{

 CudaMatrix::CudaMatrix(std::shared_ptr<float> aData, std::vector<int> aInfo, ValueType aValueType)
    : mValueType(aValueType)
@@ -241,8 +241,66 @@ bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float valu
    return true;
 }

+
+CudaMatrix CudaMatrix::operator+(float aScalar) const{
+    if (isComplex())
+    {
+        std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
+        return  CudaMatrix();
+    }
+    float* data = nullptr;
+    unsigned long long size =  getDataSize() * getValueType();
+    cudaMalloc((void**)&data, sizeof(float) * size);
+    auto out =  CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
+    unaryAdd(getData(),aScalar,out.getData(),getDataSize());
+    return out;
+}
+
+CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix){
+    if (aMatrix.isComplex())
+    {
+        std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
+        return  CudaMatrix();
+    }
+    float* data = nullptr;
+    unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+    cudaMalloc((void**)&data, sizeof(float) * size);
+    auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+    unaryAdd(aMatrix.getData(),aScalar,out.getData(),aMatrix.getDataSize());
+    return out;
+}
+
+CudaMatrix& operator+(float aScalar, CudaMatrix &&aMatrix){
+    if (aMatrix.isComplex())
+    {
+        std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
+        return  aMatrix;
+    }
+    unaryAdd(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
+    return aMatrix;
+}
+
+CudaMatrix& operator+(CudaMatrix &&aMatrix,float aScalar){
+    if (aMatrix.isComplex())
+    {
+        std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
+        return  aMatrix;
+    }
+    unaryAdd(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
+    return aMatrix;
+}
+
 CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
-    if (this->getDataSize() != aMatrix.getDataSize()) return CudaMatrix();
+    if (this->getDataSize() != aMatrix.getDataSize()) {
+        std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
+        <<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
+        return CudaMatrix();
+    }
+    if (this->isComplex() != aMatrix.isComplex()) {
+        std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
+        <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
+        return CudaMatrix();
+    }
    float* data = nullptr;
    unsigned long long size =  getDataSize() * getValueType();
    cudaMalloc((void**)&data, sizeof(float) * size);
@@ -250,4 +308,128 @@ CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
    unaryAdd(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
    return out;
 }
+
+CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{
+    if (this->getDataSize() != aMatrix.getDataSize()) {
+        std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
+        <<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
+        return CudaMatrix();
+    }
+    if (this->isComplex() != aMatrix.isComplex()) {
+        std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
+        <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
+        return CudaMatrix();
+    }
+    unaryAdd(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize());
+    return aMatrix;
+}
+
+
+CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){
+    if (aOther.getDataSize() != aMatrix.getDataSize()) {
+        std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
+        <<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
+        return CudaMatrix();
+    }
+    if (aOther.isComplex() != aMatrix.isComplex()) {
+        std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
+        <<" and the matrix1 type is "<<(aOther.isComplex()?"Comples":"Real")<<std::endl;
+        return CudaMatrix();
+    }
+    unaryAdd(aOther.getData(),aMatrix.getData(),aMatrix.getData(),aOther.getDataSize());
+    return aMatrix;
+}
+
+        // mul
+    CudaMatrix CudaMatrix::operator*(float aScalar) const{
+        if (isComplex())
+        {
+            std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
+            return  CudaMatrix();
+        }
+        float* data = nullptr;
+        unsigned long long size =  getDataSize() * getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
+        unaryMul(getData(),aScalar,out.getData(),getDataSize());
+        return out;
+    }
+    CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix){
+        if (aMatrix.isComplex())
+        {
+            std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
+            return  CudaMatrix();
+        }
+        float* data = nullptr;
+        unsigned long long size =  aMatrix.getDataSize() * aMatrix.getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
+        unaryMul(aMatrix.getData(),aScalar,out.getData(),aMatrix.getDataSize());
+        return out;
+    }
+    CudaMatrix& operator*(float aScalar, CudaMatrix &&aMatrix){
+        if (aMatrix.isComplex())
+        {
+            std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
+            return  aMatrix;
+        }
+        unaryMul(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
+        return aMatrix;
+    }
+    CudaMatrix& operator*(CudaMatrix &&aMatrix,float aScalar){
+        if (aMatrix.isComplex())
+        {
+            std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
+            return  aMatrix;
+        }
+        unaryMul(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
+        return aMatrix;
+    }
+    CudaMatrix CudaMatrix::operator*(const CudaMatrix &aMatrix) const{
+        if (this->getDataSize() != aMatrix.getDataSize()) {
+            std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
+            <<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
+            return CudaMatrix();
+        }
+        if (this->isComplex() != aMatrix.isComplex()) {
+            std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
+            <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
+            return CudaMatrix();
+        }
+        float* data = nullptr;
+        unsigned long long size =  getDataSize() * getValueType();
+        cudaMalloc((void**)&data, sizeof(float) * size);
+        auto out =  CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
+        unaryMul(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
+        return out;
+    }
+    CudaMatrix CudaMatrix::operator*(CudaMatrix &&aMatrix) const{
+        if (this->getDataSize() != aMatrix.getDataSize()) {
+            std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
+            <<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
+            return CudaMatrix();
+        }
+        if (this->isComplex() != aMatrix.isComplex()) {
+            std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
+            <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
+            return CudaMatrix();
+        }
+        unaryMul(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize());
+        return aMatrix;
+    }
+    CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther){
+        if (aOther.getDataSize() != aMatrix.getDataSize()) {
+            std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
+            <<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
+            return CudaMatrix();
+        }
+        if (aOther.isComplex() != aMatrix.isComplex()) {
+            std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
+            <<" and the matrix1 type is "<<(aOther.isComplex()?"Comples":"Real")<<std::endl;
+            return CudaMatrix();
+        }
+        unaryMul(aOther.getData(),aMatrix.getData(),aMatrix.getData(),aOther.getDataSize());
+        return aMatrix;
+    }
+}
 #endif // USE_CUDA
--- a/src/CudaMatrixPrivate.cu
+++ b/src/CudaMatrixPrivate.cu
@@ -5,7 +5,7 @@
 #include <thrust/execution_policy.h>
 using namespace thrust::placeholders;

-struct PowOperator{
+struct PowOperator: public thrust::unary_function<float, float>{
    float exponent;
    PowOperator(float v):exponent(v) {}
    void setExponent(float v){
@@ -25,7 +25,7 @@ void unaryAdd(float* in1, float* in2, float* out, unsigned long  length)

 void unaryAdd(float* in1, const float& in2, float* out, unsigned long  length)
 {
-    thrust::transform(thrust::device,in1,in1+length,out,in2*_1);
+    thrust::transform(thrust::device,in1,in1+length,out,in2 + _1);
 }

 void unaryMul(float* in1, float* in2, float* out, unsigned long  length)
@@ -34,6 +34,11 @@ void unaryMul(float* in1, float* in2, float* out, unsigned long  length)
    thrust::transform(thrust::device,in1,in1+length,in2,out,op);
 }

+void unaryMul(float* in1, const float& in2, float* out, unsigned long  length)
+{
+    thrust::transform(thrust::device,in1, in1+length, out, in2 * _1);
+}
+
 void unaryNeg(float* in1, float* out, unsigned long length){
    thrust::negate<float> op;
    thrust::transform(thrust::device,in1,in1+length,out,op);
@@ -49,6 +54,23 @@ void unaryDiv(float* in1, float* in2, float* out, unsigned long length){
    thrust::transform(thrust::device,in1,in1+length,in2,out,op);
 }

+void unarySub(const float& in1, float* in2, float* out, unsigned long length){
+    thrust::transform(thrust::device,in2,in2+length,out,in1-_1);
+}
+
+void unaryDiv(const float& in1, float* in2, float* out, unsigned long length){
+    thrust::transform(thrust::device,in2,in2+length,out,in1/_1);
+
+}
+
+void unarySub(float* in1, const float& in2, float* out, unsigned long length){
+    thrust::transform(thrust::device,in1,in1+length,out,_1-in2);
+}
+
+void unaryDiv(float* in1, const float& in2, float* out, unsigned long length){
+    thrust::transform(thrust::device,in1,in1+length,out,_1/in2);
+}
+
 void unaryPow(float* in1, float N,float* out, unsigned long length){
    if (N == 0.0f)
    {
@@ -65,7 +87,6 @@ void unaryPow(float* in1, float N,float* out, unsigned long length){
        thrust::transform(thrust::device,in1,in1+length,out,op);
        return;
    }
-    thrust::transform(thrust::device,in1,in1+length,out,powf(_1,N));
-    
+    thrust::transform(thrust::device,in1,in1+length,out,PowOperator(N));
 }

--- a/src/CudaMatrixPrivate.cuh
+++ b/src/CudaMatrixPrivate.cuh
@@ -5,11 +5,17 @@
 void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
 void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
 void unaryMul(float* in1, float* in2, float* out, unsigned long length);
+void unaryMul(float* in1, const float& in2, float* out, unsigned long length);
+
 void unaryNeg(float* in1, float* out, unsigned long length);
 void unaryPow(float* in1, float N,float* out, unsigned long length);

 void unarySub(float* in1, float* in2, float* out, unsigned long length);
 void unaryDiv(float* in1, float* in2, float* out, unsigned long length);
+void unarySub(const float& in1, float* in2, float* out, unsigned long length);
+void unaryDiv(const float& in1, float* in2, float* out, unsigned long length);
+void unarySub(float* in1, const float& in2, float* out, unsigned long length);
+void unaryDiv(float* in1, const float& in2, float* out, unsigned long length);


 #endif // __CUDAMATRIX_H__
--- a/src/Function.cpp
+++ b/src/Function.cpp
@@ -3,6 +3,7 @@
 //

 #include "Function.h"
+#include <cstddef>

 //必须在mkl.h和Eigen的头之前，<complex>之后
 #define MKL_Complex8 std::complex<float>
@@ -29,6 +30,17 @@ namespace Aurora {
    void free(void* ptr){
        mkl_free(ptr);
    }
+    float*  random(size_t length){
+        VSLStreamStatePtr stream;
+        vslNewStream(&stream, VSL_BRNG_MT19937, 1);
+        float * ret =  new float[length];
+        float low = -100.0;
+        float high = 10000.0;
+        vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, length, ret, low, high);
+        // 释放资源
+        vslDeleteStream(&stream);
+        return ret;
+    }
 #ifdef USE_CUDA
    void gpuFree(void* ptr)
    {
--- a/src/Function.h
+++ b/src/Function.h
@@ -6,11 +6,13 @@
 #define MATLABFUNCTIONS_MATRIXFUNCTIONS_H

 #include <complex>
+#include <cstddef>

 namespace Aurora{
    float* malloc(size_t size,bool complex = false);
    void free(void* ptr);
    void gpuFree(void* ptr);
+    float* random(size_t length);
 };


--- a/src/main.cxx
+++ b/src/main.cxx
@@ -23,17 +23,97 @@ int main()
        A[i] = -1;
        B[i] =  i;
    }
+    printf("Test CudaMatrix operator+(const CudaMatrix &aMatrix) const \r\n");
+    //CudaMatrix operator+(const CudaMatrix &aMatrix) const
    auto C = A+B;
    auto dA = A.toDeviceMatrix();
    auto dB = B.toDeviceMatrix();
-    auto dC = (dA+dB).toHostMatrix();
+    auto dC = (dA+dB);
+    auto dhC = dC.toHostMatrix();
    for (size_t i = 0; i < 1000; i++)
    {
-        if (C[i]!=dC[i]){
-            printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dC[i]);
+        if (C[i]!=dhC[i]){
+            printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dhC[i]);
            return 9;
        }
    }
-    
+    printf("Test CudaMatrix operator+(float aScalar) const \r\n");
+    //CudaMatrix operator+(float aScalar) const
+    auto D = C+0.5;
+    auto dD = dC+0.5;
+    auto dhD = dD.toHostMatrix();
+    for (size_t i = 0; i < 1000; i++)
+    {
+        if (D[i]!=dhD[i]){
+            printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
+            return 9;
+        }
+    }
+    printf("Test CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) \r\n");
+    // CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix)
+    dD = 0.5 + dC;
+    dhD = dD.toHostMatrix();
+    for (size_t i = 0; i < 1000; i++)
+    {
+        if (D[i]!=dhD[i]){
+            printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
+            return 9;
+        }
+    }
+    printf("Test CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) \r\n");
+    // CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix)
+    {
+        auto dD2 = 0.5 + (dA+dB);
+        dhD = dD2.toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            if (D[i]!=dhD[i]){
+                printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
+                return 9;
+            }
+        }
+    }
+    printf("Test CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) \r\n");
+    // CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar)
+    {
+        
+        auto dD2 =  (dA+dB)+0.5;
+        dhD = dD2.toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            if (D[i]!=dhD[i]){
+                printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
+                return 9;
+            }
+        }
+    }
+    //CudaMatrix operator+(CudaMatrix &&aMatrix) const
+    printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix) const \r\n");
+    {
+        auto D = A+C;
+        auto dD2 = dA+(dA+dB);
+        dhD = dD2.toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            if (C[i]!=dhC[i]){
+                printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
+                return 9;
+            }
+        }
+    }
+    //CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther)
+    printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) \r\n");
+    {
+        auto D = A+C;
+        auto dD2 = (dA+dB)+dA;
+        dhD = dD2.toHostMatrix();
+        for (size_t i = 0; i < 1000; i++)
+        {
+            if (C[i]!=dhC[i]){
+                printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
+                return 9;
+            }
+        }
+    }
    return 0;
 }