diff --git a/src/CudaMatrix.cpp b/src/CudaMatrix.cpp index d19e3c5..3d1cf5c 100644 --- a/src/CudaMatrix.cpp +++ b/src/CudaMatrix.cpp @@ -1,3 +1,7 @@ +#include "AuroraDefs.h" +#include "Function1D.cuh" +#include +#include #ifdef USE_CUDA #include "CudaMatrix.h" @@ -5,10 +9,19 @@ #include "Matrix.h" #include +#include + #include #include #include "CudaMatrixPrivate.cuh" - +namespace { + std::string getMatrixShapeStr(const Aurora::CudaMatrix &aMatrix) { + std::string ret; + return "(" + std::to_string(aMatrix.getDimSize(0)) + "," + + std::to_string(aMatrix.getDimSize(1)) + "," + + std::to_string(aMatrix.getDimSize(1)) + "," + ")"; + } +} namespace Aurora{ CudaMatrix::CudaMatrix(std::shared_ptr aData, std::vector aInfo, ValueType aValueType) @@ -145,6 +158,15 @@ CudaMatrix CudaMatrix::deepCopy() const return CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType()); } +CudaMatrix CudaMatrix::deepCopyShape(bool complex) const +{ + float* data = nullptr; + ValueType newType = complex==true?Complex:getValueType(); + unsigned long long size = getDataSize() * newType; + cudaMalloc((void**)&data, sizeof(float) * size); + return CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), newType); +} + Matrix CudaMatrix::toHostMatrix() const { if(!mData.get()) @@ -494,419 +516,891 @@ bool CudaMatrix::setBlock(int aDim,int aBeginIndex, int aEndIndex, const CudaMat } } } - //--Add---------------------------------------------------------------- - CudaMatrix CudaMatrix::operator+(float aScalar) const{ - if (isComplex()) - { - std::cerr<<"Complex matrix not support operator+(float aScalar)"<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { - std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getData(),aMatrix.getData(),out.getData(),this->getDataSize()); - return out; - } - - CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{ - if (this->getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { - std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); - return aMatrix; - } - - - CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){ - if (aOther.getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { - std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<isComplex() != aMatrix.isComplex()) { - std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); - } - else{ - unaryMul(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize()); - } - return aMatrix; - } - CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther){ - if (aOther.getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "< v(aScalar,0); + return (*this)+v; + } else { + auto out = deepCopyShape(); + unaryAdd(getData(), aScalar, out.getData(), getDataSize()); return out; } - CudaMatrix operator-(float aScalar, const CudaMatrix &aMatrix){ - if (aMatrix.isComplex()) - { - std::cerr<<"Complex matrix not support operator-(float aScalar, const CudaMatrix &aMatrix)"<isComplex()) - { - std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<getData(),aMatrix.getData(),out.getData(),aMatrix.getDataSize()); - return out; - } - CudaMatrix CudaMatrix::operator-(CudaMatrix &&aMatrix) const{ - if (aMatrix.isComplex()!=this->isComplex()) - { - std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize()); - return aMatrix; - } - CudaMatrix operator-(CudaMatrix &&aMatrix,CudaMatrix &aOther){ - if (aMatrix.isComplex()!=aOther.isComplex()) - { - std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aOther.isComplex()?"Complex":"Real")<(aMatrix) + aScalar; +} + +CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) { + if (aMatrix.isComplex()) { + std::complex v(aScalar,0); + return std::forward(aMatrix)+v; + } else { + unaryAdd(aMatrix.getData(), aScalar, aMatrix.getData(), + aMatrix.getDataSize()); + return aMatrix; + } + +} + +CudaMatrix CudaMatrix::operator+(std::complex aScalar) const{ + + if (isComplex()) { + auto out = deepCopyShape(); + unaryAddc((std::complex*)getData(), aScalar, (std::complex*)out.getData(), getDataSize()); + return out; + } else { + auto out = Aurora::complex(*this); + unaryAddc((std::complex*)out.getData(), aScalar, (std::complex*)out.getData(), getDataSize()); return out; } - CudaMatrix operator/(float aScalar, const CudaMatrix &aMatrix){ - if (aMatrix.isComplex()) - { - std::cerr<<"Complex matrix not support operator/(float aScalar, const CudaMatrix &aMatrix)"< aScalar, const CudaMatrix &aMatrix) { + return aMatrix + aScalar; +} + +CudaMatrix &operator+(std::complex aScalar, CudaMatrix &&aMatrix) { + return std::forward(aMatrix) + aScalar; +} + +CudaMatrix &operator+(CudaMatrix &&aMatrix, std::complex aScalar) { + if (aMatrix.isComplex()) { + unaryAddc((std::complex *)aMatrix.getData(), aScalar, + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } else { + aMatrix = Aurora::complex(aMatrix); + unaryAddc((std::complex *)aMatrix.getData(), aScalar, + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } + return aMatrix; +} + +CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const { + auto thisMatrix = *this; + if (thisMatrix.isScalar()) { + return thisMatrix.getScalar() + aMatrix; + } + if (aMatrix.isScalar()) { + return thisMatrix + aMatrix.getScalar(); + } + if (thisMatrix.compareShape(aMatrix)) { + //输出需考虑是否需要装变为complex + auto out = thisMatrix.deepCopyShape(aMatrix.isComplex()||thisMatrix.isComplex()); + unaryAddmm(aMatrix.getData(), thisMatrix.getData(), out.getData(), + out.getDataSize(), aMatrix.mValueType, thisMatrix.mValueType); return out; } - CudaMatrix& operator/(float aScalar, CudaMatrix &&aMatrix){ - if (aMatrix.isComplex()) - { - std::cerr<<"Complex matrix not support operator/(float aScalar, CudaMatrix &&aMatrix)"<(aMatrix)+ *this; +} + +CudaMatrix operator+(CudaMatrix &&aMatrix, CudaMatrix &aOther) { + if (aOther.isScalar()) { + return aOther.getScalar() + std::forward(aMatrix); } - CudaMatrix CudaMatrix::operator/(const CudaMatrix &aMatrix) const{ - if (aMatrix.isComplex()!=this->isComplex()) - { - std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<= aOther.mValueType? aMatrix : aOther.deepCopyShape(); + unaryAddmm(aMatrix.getData(), aOther.getData(), out.getData(), + out.getDataSize(), aMatrix.mValueType, aOther.mValueType); return out; } - CudaMatrix CudaMatrix::operator/(CudaMatrix &&aMatrix) const{ - if (aMatrix.isComplex()!=this->isComplex()) - { - std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<getDataSize() - <<" and the matrix1 size is "<getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize()); + // 尺寸不匹配 + else { + std::cerr + << "M + M fail, matrix shape not match the operation rule!" + << "with matrix1 shape:" + << getMatrixShapeStr(aMatrix) + << " and matrix2 shape:" + << getMatrixShapeStr(aOther)<< std::endl; } - else{ - unaryDiv(this->getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize()); - } + } + return CudaMatrix(); +} + +//--mul----------------------------------------------------- +CudaMatrix CudaMatrix::operator*(float aScalar) const{ + if (isComplex()) { + std::complex v(aScalar,0); + return (*this)*v; + } else { + auto out = deepCopyShape(); + unaryMul(getData(), aScalar, out.getData(), getDataSize()); + return out; + } + +} + +CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix) { + return aMatrix * aScalar; +} + +CudaMatrix &operator*(float aScalar, CudaMatrix &&aMatrix) { + return std::forward(aMatrix) * aScalar; +} + +CudaMatrix &operator*(CudaMatrix &&aMatrix, float aScalar) { + if (aMatrix.isComplex()) { + std::complex v(aScalar,0); + return std::forward(aMatrix)*v; + } else { + unaryMul(aMatrix.getData(), aScalar, aMatrix.getData(), + aMatrix.getDataSize()); return aMatrix; } - CudaMatrix operator/(CudaMatrix &&aMatrix, CudaMatrix &aOther){ - if (aMatrix.isComplex()!=aOther.isComplex()) - { - std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real") - <<" and the matrix1 type is "<<(aOther.isComplex()?"Complex":"Real")< aScalar) const{ + + if (isComplex()) { + auto out = deepCopyShape(); + unaryMulc((std::complex*)getData(), aScalar, (std::complex*)out.getData(), getDataSize()); + return out; + } else { + auto out = Aurora::complex(*this); + unaryMulc((std::complex*)out.getData(), aScalar, (std::complex*)out.getData(), getDataSize()); + return out; + } +} + +CudaMatrix operator*(std::complex aScalar, const CudaMatrix &aMatrix) { + return aMatrix * aScalar; +} + +CudaMatrix &operator*(std::complex aScalar, CudaMatrix &&aMatrix) { + return std::forward(aMatrix) * aScalar; +} + +CudaMatrix &operator*(CudaMatrix &&aMatrix, std::complex aScalar) { + if (aMatrix.isComplex()) { + unaryMulc((std::complex *)aMatrix.getData(), aScalar, + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } else { + aMatrix = Aurora::complex(aMatrix); + unaryMulc((std::complex *)aMatrix.getData(), aScalar, + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } + return aMatrix; +} + +CudaMatrix CudaMatrix::operator*(const CudaMatrix &aMatrix) const { + auto thisMatrix = *this; + if (thisMatrix.isScalar()) { + return thisMatrix.getScalar() * aMatrix; + } + if (aMatrix.isScalar()) { + return thisMatrix * aMatrix.getScalar(); + } + if (thisMatrix.compareShape(aMatrix)) { + //输出需考虑是否需要装变为complex + auto out = thisMatrix.deepCopyShape(aMatrix.isComplex()||thisMatrix.isComplex()); + unaryMulmm(aMatrix.getData(), thisMatrix.getData(), out.getData(), + out.getDataSize(), aMatrix.mValueType, thisMatrix.mValueType); + return out; + } + // 一个是向量一个是矩阵 + else if (aMatrix.isVector() != thisMatrix.isVector()) { + auto vec = aMatrix.isVector() ? aMatrix : (thisMatrix); + auto mat = aMatrix.isVector() ? (thisMatrix) : aMatrix; + // 行向量 + if (vec.getDimSize(0) == 1 && vec.getDimSize(1) == mat.getDimSize(1)) { + //输出需考虑是否需要装变为complex + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryMulmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Aurora::Row); + return out; } - if (aOther.getDataSize() != aMatrix.getDataSize()) { - std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<(aMatrix)* (*this); +} + +CudaMatrix operator*(CudaMatrix &&aMatrix, CudaMatrix &aOther) { + if (aOther.isScalar()) { + return aOther.getScalar() * std::forward(aMatrix); + } + if (aMatrix.isScalar()) { + return aOther * aMatrix.getScalar(); + } + if (aOther.compareShape(aMatrix)) { + //aMatrix的数据类型与aOther一致或者体积比aOther大时,仍旧用CudaMatrix &&aMatrix作为输出 + auto out = aMatrix.mValueType >= aOther.mValueType? aMatrix : aOther.deepCopyShape(); + unaryMulmm(aMatrix.getData(), aOther.getData(), out.getData(), + out.getDataSize(), aMatrix.mValueType, aOther.mValueType); + return out; + } + // 一个是向量一个是矩阵 + else if (aMatrix.isVector() != aOther.isVector()) { + auto vec = aMatrix.isVector() ? aMatrix : aOther; + auto mat = aMatrix.isVector() ? aOther : aMatrix; + // 行向量 + if (vec.getDimSize(0) == 1 && vec.getDimSize(1) == mat.getDimSize(1)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryMulmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Aurora::Row); + return out; + } + // 列向量 + else if (vec.getDimSize(1) == 1 && + vec.getDimSize(0) == mat.getDimSize(0)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryMulmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType); + return out; + } + // 尺寸不匹配 + else { + std::cerr + << "M * M fail, matrix shape not match the operation rule!" + << "with matrix1 shape:" + << getMatrixShapeStr(aMatrix) + << " and matrix2 shape:" + << getMatrixShapeStr(aOther)<< std::endl; + } + } + return CudaMatrix(); +} + +//--Sub----------------------------------------------------------------- +CudaMatrix CudaMatrix::operator-(float aScalar) const{ + if (isComplex()) { + std::complex v(aScalar,0); + return (*this)-v; + } else { + auto out = deepCopyShape(); + unarySub(getData(), aScalar, out.getData(), getDataSize()); + return out; + } + +} + +CudaMatrix operator-(float aScalar, const CudaMatrix &aMatrix) { + auto out = aMatrix.deepCopyShape(); + if (aMatrix.isComplex()) { + std::complex v(aScalar,0); + unarySubc(v, (std::complex *)aMatrix.getData(), + (std::complex *)out.getData(), + aMatrix.getDataSize()); + return out; + } else { + unarySub(aScalar, out.getData(),out.getData(), + out.getDataSize()); + return out; + } +} + +CudaMatrix &operator-(float aScalar, CudaMatrix &&aMatrix) { + if (aMatrix.isComplex()) { + unarySubc(aScalar, (std::complex *)aMatrix.getData(), + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } else { + unarySub(aScalar,aMatrix.getData(),aMatrix.getData(), + aMatrix.getDataSize()); + } + return aMatrix; +} + +CudaMatrix &operator-(CudaMatrix &&aMatrix, float aScalar) { + if (aMatrix.isComplex()) { + std::complex v(aScalar,0); + return std::forward(aMatrix)-v; + } else { + unarySub(aMatrix.getData(), aScalar, aMatrix.getData(), + aMatrix.getDataSize()); return aMatrix; } + +} + +CudaMatrix CudaMatrix::operator-(std::complex aScalar) const{ + + if (isComplex()) { + auto out = deepCopyShape(); + unarySubc((std::complex*)getData(), aScalar, (std::complex*)out.getData(), getDataSize()); + return out; + } else { + auto out = Aurora::complex(*this); + unarySubc((std::complex*)out.getData(), aScalar, (std::complex*)out.getData(), getDataSize()); + return out; + } +} + +CudaMatrix operator-(std::complex aScalar, const CudaMatrix &aMatrix) { + if (aMatrix.isComplex()) { + auto out = aMatrix.deepCopyShape(); + unarySubc(aScalar, (std::complex *)aMatrix.getData(), + (std::complex *)out.getData(), + aMatrix.getDataSize()); + return out; + } else { + auto out = Aurora::complex(aMatrix); + unarySubc(aScalar, (std::complex *)out.getData(), + (std::complex *)out.getData(), + out.getDataSize()); + return out; + } +} + +CudaMatrix &operator-(std::complex aScalar, CudaMatrix &&aMatrix) { + if (aMatrix.isComplex()) { + unarySubc(aScalar, (std::complex *)aMatrix.getData(), + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } else { + aMatrix = Aurora::complex(aMatrix); + unarySubc(aScalar, (std::complex *)aMatrix.getData(), + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } + return aMatrix; +} + +CudaMatrix &operator-(CudaMatrix &&aMatrix, std::complex aScalar) { + if (aMatrix.isComplex()) { + unarySubc((std::complex *)aMatrix.getData(), aScalar, + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } else { + aMatrix = Aurora::complex(aMatrix); + unarySubc((std::complex *)aMatrix.getData(), aScalar, + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } + return aMatrix; +} + +CudaMatrix CudaMatrix::operator-(const CudaMatrix &aMatrix) const { + auto thisMatrix = *this; + if (thisMatrix.isScalar()) { + return thisMatrix.getScalar() - aMatrix; + } + if (aMatrix.isScalar()) { + return thisMatrix - aMatrix.getScalar(); + } + if (thisMatrix.compareShape(aMatrix)) { + //输出需考虑是否需要装变为complex + auto out = thisMatrix.deepCopyShape(aMatrix.isComplex()||thisMatrix.isComplex()); + unarySubmm(thisMatrix.getData(), aMatrix.getData(), out.getData(), + out.getDataSize(), thisMatrix.mValueType, aMatrix.mValueType); + return out; + } + // 一个是向量一个是矩阵 + else if (aMatrix.isVector() != thisMatrix.isVector()) { + auto vec = aMatrix.isVector() ? aMatrix : (thisMatrix); + auto mat = aMatrix.isVector() ? (thisMatrix) : aMatrix; + int direction = thisMatrix.isVector()?1:0; + // 行向量 + if (vec.getDimSize(0) == 1 && vec.getDimSize(1) == mat.getDimSize(1)) { + //输出需考虑是否需要装变为complex + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unarySubmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Aurora::Row, direction); + return out; + } + // 列向量 + else if (vec.getDimSize(1) == 1 && + vec.getDimSize(0) == mat.getDimSize(0)) { + //输出需考虑是否需要装变为complex + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unarySubmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Column, direction); + return out; + } + // 尺寸不匹配 + else { + std::cerr + << "M - M fail, matrix shape not match the operation rule!" + << "with matrix1 shape:" + << getMatrixShapeStr(aMatrix) + << " and matrix2 shape:" + << getMatrixShapeStr(thisMatrix)<< std::endl; + } + } + return CudaMatrix(); +} + +CudaMatrix CudaMatrix::operator-(CudaMatrix &&aMatrix) const { + auto aOther = *this; + if (isScalar()) { + return getScalar() - std::forward(aMatrix); + } + if (aMatrix.isScalar()) { + return aOther - aMatrix.getScalar(); + } + if (aOther.compareShape(aMatrix)) { + //aMatrix的数据类型与aOther一致或者体积比aOther大时,仍旧用CudaMatrix &&aMatrix作为输出 + auto out = aMatrix.mValueType >= aOther.mValueType? aMatrix : aOther.deepCopyShape(); + unarySubmm(aOther.getData(), aMatrix.getData(), out.getData(), + out.getDataSize(), aOther.mValueType, aMatrix.mValueType); + return out; + } + // 一个是向量一个是矩阵 + else if (aMatrix.isVector() != aOther.isVector()) { + auto vec = aMatrix.isVector() ? aMatrix : aOther; + auto mat = aMatrix.isVector() ? aOther : aMatrix; + int direction = aOther.isVector()?1:0; + // 行向量 + if (vec.getDimSize(0) == 1 && vec.getDimSize(1) == mat.getDimSize(1)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unarySubmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Aurora::Row,direction); + return out; + } + // 列向量 + else if (vec.getDimSize(1) == 1 && + vec.getDimSize(0) == mat.getDimSize(0)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unarySubmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType,Column,direction); + return out; + } + // 尺寸不匹配 + else { + std::cerr + << "M - M fail, matrix shape not match the operation rule!" + << "with matrix1 shape:" + << getMatrixShapeStr(aMatrix) + << " and matrix2 shape:" + << getMatrixShapeStr(aOther)<< std::endl; + } + } + return CudaMatrix(); +} + +CudaMatrix operator-(CudaMatrix &&aMatrix, CudaMatrix &aOther) { + if (aOther.isScalar()) { + return std::forward(aMatrix) - aOther.getScalar(); + } + if (aMatrix.isScalar()) { + return aMatrix.getScalar() - aOther; + } + if (aOther.compareShape(aMatrix)) { + //aMatrix的数据类型与aOther一致或者体积比aOther大时,仍旧用CudaMatrix &&aMatrix作为输出 + auto out = aMatrix.mValueType >= aOther.mValueType? aMatrix : aOther.deepCopyShape(); + unarySubmm(aMatrix.getData(), aOther.getData(), out.getData(), + out.getDataSize(), aMatrix.mValueType, aOther.mValueType); + return out; + } + // 一个是向量一个是矩阵 + else if (aMatrix.isVector() != aOther.isVector()) { + auto vec = aMatrix.isVector() ? aMatrix : aOther; + auto mat = aMatrix.isVector() ? aOther : aMatrix; + int direction = aMatrix.isVector()?1:0; + // 行向量 + if (vec.getDimSize(0) == 1 && vec.getDimSize(1) == mat.getDimSize(1)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unarySubmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Aurora::Row,direction); + return out; + } + // 列向量 + else if (vec.getDimSize(1) == 1 && + vec.getDimSize(0) == mat.getDimSize(0)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unarySubmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType,Column,direction); + return out; + } + // 尺寸不匹配 + else { + std::cerr + << "M - M fail, matrix shape not match the operation rule!" + << "with matrix1 shape:" + << getMatrixShapeStr(aMatrix) + << " and matrix2 shape:" + << getMatrixShapeStr(aOther)<< std::endl; + } + } + return CudaMatrix(); +} + +// div +CudaMatrix CudaMatrix::operator/(float aScalar) const{ + if (isComplex()) { + std::complex v(aScalar,0); + return (*this)/v; + } else { + auto out = deepCopyShape(); + unaryDiv(getData(), aScalar, out.getData(), getDataSize()); + return out; + } + +} + +CudaMatrix operator/(float aScalar, const CudaMatrix &aMatrix) { + auto out = aMatrix.deepCopyShape(); + if (aMatrix.isComplex()) { + std::complex v(aScalar,0); + unaryDivc(v, (std::complex *)aMatrix.getData(), + (std::complex *)out.getData(), + aMatrix.getDataSize()); + return out; + } else { + unaryDiv(aScalar, out.getData(),out.getData(), + out.getDataSize()); + return out; + } +} + +CudaMatrix &operator/(float aScalar, CudaMatrix &&aMatrix) { + if (aMatrix.isComplex()) { + unaryDivc(aScalar, (std::complex *)aMatrix.getData(), + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } else { + unaryDiv(aScalar,aMatrix.getData(),aMatrix.getData(), + aMatrix.getDataSize()); + } + return aMatrix; +} + +CudaMatrix &operator/(CudaMatrix &&aMatrix, float aScalar) { + if (aMatrix.isComplex()) { + std::complex v(aScalar,0); + return std::forward(aMatrix)/v; + } else { + unaryDiv(aMatrix.getData(), aScalar, aMatrix.getData(), + aMatrix.getDataSize()); + return aMatrix; + } + +} + +CudaMatrix CudaMatrix::operator/(std::complex aScalar) const{ + + if (isComplex()) { + auto out = deepCopyShape(); + unaryDivc((std::complex*)getData(), aScalar, (std::complex*)out.getData(), getDataSize()); + return out; + } else { + auto out = Aurora::complex(*this); + unaryDivc((std::complex*)out.getData(), aScalar, (std::complex*)out.getData(), getDataSize()); + return out; + } +} + +CudaMatrix operator/(std::complex aScalar, const CudaMatrix &aMatrix) { + if (aMatrix.isComplex()) { + auto out = aMatrix.deepCopyShape(); + unaryDivc(aScalar, (std::complex *)aMatrix.getData(), + (std::complex *)out.getData(), + aMatrix.getDataSize()); + return out; + } else { + auto out = Aurora::complex(aMatrix); + unaryDivc(aScalar, (std::complex *)out.getData(), + (std::complex *)out.getData(), + out.getDataSize()); + return out; + } +} + +CudaMatrix &operator/(std::complex aScalar, CudaMatrix &&aMatrix) { + if (aMatrix.isComplex()) { + unaryDivc(aScalar, (std::complex *)aMatrix.getData(), + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } else { + aMatrix = Aurora::complex(aMatrix); + unaryDivc(aScalar, (std::complex *)aMatrix.getData(), + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } + return aMatrix; +} + +CudaMatrix &operator/(CudaMatrix &&aMatrix, std::complex aScalar) { + if (aMatrix.isComplex()) { + unaryDivc((std::complex *)aMatrix.getData(), aScalar, + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } else { + aMatrix = Aurora::complex(aMatrix); + unaryDivc((std::complex *)aMatrix.getData(), aScalar, + (std::complex *)aMatrix.getData(), + aMatrix.getDataSize()); + } + return aMatrix; +} + +CudaMatrix CudaMatrix::operator/(const CudaMatrix &aMatrix) const { + auto thisMatrix = *this; + if (thisMatrix.isScalar()) { + return thisMatrix.getScalar() / aMatrix; + } + if (aMatrix.isScalar()) { + return thisMatrix / aMatrix.getScalar(); + } + if (thisMatrix.compareShape(aMatrix)) { + //输出需考虑是否需要装变为complex + auto out = thisMatrix.deepCopyShape(aMatrix.isComplex()||thisMatrix.isComplex()); + unaryDivmm(thisMatrix.getData(), aMatrix.getData(), out.getData(), + out.getDataSize(), thisMatrix.mValueType, aMatrix.mValueType); + return out; + } + // 一个是向量一个是矩阵 + else if (aMatrix.isVector() != thisMatrix.isVector()) { + auto vec = aMatrix.isVector() ? aMatrix : (thisMatrix); + auto mat = aMatrix.isVector() ? (thisMatrix) : aMatrix; + int direction = thisMatrix.isVector()?1:0; + // 行向量 + if (vec.getDimSize(0) == 1 && vec.getDimSize(1) == mat.getDimSize(1)) { + //输出需考虑是否需要装变为complex + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryDivmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Aurora::Row, direction); + return out; + } + // 列向量 + else if (vec.getDimSize(1) == 1 && + vec.getDimSize(0) == mat.getDimSize(0)) { + //输出需考虑是否需要装变为complex + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryDivmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Column, direction); + return out; + } + // 尺寸不匹配 + else { + std::cerr + << "M - M fail, matrix shape not match the operation rule!" + << "with matrix1 shape:" + << getMatrixShapeStr(aMatrix) + << " and matrix2 shape:" + << getMatrixShapeStr(thisMatrix)<< std::endl; + } + } + return CudaMatrix(); +} + +CudaMatrix CudaMatrix::operator/(CudaMatrix &&aMatrix) const { + auto aOther = *this; + if (isScalar()) { + return getScalar() / std::forward(aMatrix); + } + if (aMatrix.isScalar()) { + return aOther / aMatrix.getScalar(); + } + if (aOther.compareShape(aMatrix)) { + //aMatrix的数据类型与aOther一致或者体积比aOther大时,仍旧用CudaMatrix &&aMatrix作为输出 + auto out = aMatrix.mValueType >= aOther.mValueType? aMatrix : aOther.deepCopyShape(); + unaryDivmm(aOther.getData(), aMatrix.getData(), out.getData(), + out.getDataSize(), aOther.mValueType, aMatrix.mValueType); + return out; + } + // 一个是向量一个是矩阵 + else if (aMatrix.isVector() != aOther.isVector()) { + auto vec = aMatrix.isVector() ? aMatrix : aOther; + auto mat = aMatrix.isVector() ? aOther : aMatrix; + int direction = aOther.isVector()?1:0; + // 行向量 + if (vec.getDimSize(0) == 1 && vec.getDimSize(1) == mat.getDimSize(1)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryDivmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Aurora::Row,direction); + return out; + } + // 列向量 + else if (vec.getDimSize(1) == 1 && + vec.getDimSize(0) == mat.getDimSize(0)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryDivmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType,Column,direction); + return out; + } + // 尺寸不匹配 + else { + std::cerr + << "M - M fail, matrix shape not match the operation rule!" + << "with matrix1 shape:" + << getMatrixShapeStr(aMatrix) + << " and matrix2 shape:" + << getMatrixShapeStr(aOther)<< std::endl; + } + } + return CudaMatrix(); +} + +CudaMatrix operator/(CudaMatrix &&aMatrix, CudaMatrix &aOther) { + if (aOther.isScalar()) { + return std::forward(aMatrix) / aOther.getScalar(); + } + if (aMatrix.isScalar()) { + return aMatrix.getScalar() / aOther; + } + if (aOther.compareShape(aMatrix)) { + //aMatrix的数据类型与aOther一致或者体积比aOther大时,仍旧用CudaMatrix &&aMatrix作为输出 + auto out = aMatrix.mValueType >= aOther.mValueType? aMatrix : aOther.deepCopyShape(); + unaryDivmm(aMatrix.getData(), aOther.getData(), out.getData(), + out.getDataSize(), aMatrix.mValueType, aOther.mValueType); + return out; + } + // 一个是向量一个是矩阵 + else if (aMatrix.isVector() != aOther.isVector()) { + auto vec = aMatrix.isVector() ? aMatrix : aOther; + auto mat = aMatrix.isVector() ? aOther : aMatrix; + int direction = aMatrix.isVector()?1:0; + // 行向量 + if (vec.getDimSize(0) == 1 && vec.getDimSize(1) == mat.getDimSize(1)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryDivmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType, Aurora::Row,direction); + return out; + } + // 列向量 + else if (vec.getDimSize(1) == 1 && + vec.getDimSize(0) == mat.getDimSize(0)) { + // 判定aMatrix是矩阵,且数据类型与输出结果一致时,继续使用CudaMatrix + // &&aMatrix作为输出 + auto out = mat.deepCopyShape(vec.isComplex() || mat.isComplex()); + unaryDivmv(mat.getData(), vec.getData(), out.getData(), + out.getDataSize(), vec.getDataSize(), mat.mValueType, + vec.mValueType,Column,direction); + return out; + } + // 尺寸不匹配 + else { + std::cerr + << "M - M fail, matrix shape not match the operation rule!" + << "with matrix1 shape:" + << getMatrixShapeStr(aMatrix) + << " and matrix2 shape:" + << getMatrixShapeStr(aOther)<< std::endl; + } + } + return CudaMatrix(); +} // -----------pow------------------------------------------------------- CudaMatrix CudaMatrix::operator^(int times) const{ diff --git a/src/CudaMatrix.h b/src/CudaMatrix.h index 32a1fbb..73b08b5 100644 --- a/src/CudaMatrix.h +++ b/src/CudaMatrix.h @@ -43,11 +43,23 @@ namespace Aurora */ CudaMatrix deepCopy() const; + /** + * @brief 深拷贝操作,但不复制数据 + * + * @return CudaMatrix + */ + CudaMatrix deepCopyShape(bool complex = false) const; + + // add CudaMatrix operator+(float aScalar) const; friend CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix); friend CudaMatrix& operator+(float aScalar, CudaMatrix &&aMatrix); friend CudaMatrix& operator+(CudaMatrix &&aMatrix,float aScalar); + CudaMatrix operator+(std::complex aScalar) const; + friend CudaMatrix operator+(std::complex aScalar, const CudaMatrix &aMatrix); + friend CudaMatrix& operator+(std::complex aScalar, CudaMatrix &&aMatrix); + friend CudaMatrix& operator+(CudaMatrix &&aMatrix,std::complex aScalar); CudaMatrix operator+(const CudaMatrix &aMatrix) const; CudaMatrix operator+(CudaMatrix &&aMatrix) const; friend CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther); @@ -57,6 +69,10 @@ namespace Aurora friend CudaMatrix operator-(float aScalar, const CudaMatrix &aMatrix); friend CudaMatrix& operator-(float aScalar, CudaMatrix &&aMatrix); friend CudaMatrix& operator-(CudaMatrix &&aMatrix,float aScalar); + CudaMatrix operator-(std::complex aScalar) const; + friend CudaMatrix operator-(std::complex aScalar, const CudaMatrix &aMatrix); + friend CudaMatrix& operator-(std::complex aScalar, CudaMatrix &&aMatrix); + friend CudaMatrix& operator-(CudaMatrix &&aMatrix,std::complex aScalar); CudaMatrix operator-(const CudaMatrix &aMatrix) const; CudaMatrix operator-(CudaMatrix &&aMatrix) const; friend CudaMatrix operator-(CudaMatrix &&aMatrix,CudaMatrix &aOther); @@ -70,6 +86,10 @@ namespace Aurora friend CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix); friend CudaMatrix& operator*(float aScalar, CudaMatrix &&aMatrix); friend CudaMatrix& operator*(CudaMatrix &&aMatrix,float aScalar); + CudaMatrix operator*(std::complex aScalar) const; + friend CudaMatrix operator*(std::complex aScalar, const CudaMatrix &aMatrix); + friend CudaMatrix& operator*(std::complex aScalar, CudaMatrix &&aMatrix); + friend CudaMatrix& operator*(CudaMatrix &&aMatrix,std::complex aScalar); CudaMatrix operator*(const CudaMatrix &aMatrix) const; CudaMatrix operator*(CudaMatrix &&aMatrix) const; friend CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther); @@ -79,9 +99,13 @@ namespace Aurora friend CudaMatrix operator/(float aScalar, const CudaMatrix &aMatrix); friend CudaMatrix& operator/(float aScalar, CudaMatrix &&aMatrix); friend CudaMatrix& operator/(CudaMatrix &&aMatrix,float aScalar); + CudaMatrix operator/(std::complex aScalar) const; + friend CudaMatrix operator/(std::complex aScalar, const CudaMatrix &aMatrix); + friend CudaMatrix& operator/(std::complex aScalar, CudaMatrix &&aMatrix); + friend CudaMatrix& operator/(CudaMatrix &&aMatrix,std::complex aScalar); CudaMatrix operator/(const CudaMatrix &aMatrix) const; CudaMatrix operator/(CudaMatrix &&aMatrix) const; - friend CudaMatrix operator/(CudaMatrix &&aMatrix, CudaMatrix &aOther); + friend CudaMatrix operator/(CudaMatrix &&aMatrix,CudaMatrix &aOther); // pow CudaMatrix operator^(int times) const; diff --git a/src/CudaMatrixPrivate.cu b/src/CudaMatrixPrivate.cu index 1dbf25e..e5c4d95 100644 --- a/src/CudaMatrixPrivate.cu +++ b/src/CudaMatrixPrivate.cu @@ -4,7 +4,9 @@ #include #include #include + #include "AuroraDefs.h" +#include "AuroraThrustIterator.cuh" using namespace thrust::placeholders; struct PowOp: public thrust::unary_function{ @@ -127,36 +129,224 @@ struct CompareANEOp{ } }; +typedef thrust::complex complexf; -void unaryAdd(float* in1, float* in2, float* out, unsigned long length) + + +void unaryAddmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut, + unsigned long aLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2) { - thrust::plus op; - thrust::transform(thrust::device,in1,in1+length,in2,out,op); + if (aValType1 == aValType2) + { + thrust::plus op; + thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength*aValType1,aMatrixIn2,aMatrixOut,op); + } + else if(Aurora::Complex == aValType1){ + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()+y,x.imag()); + }; + thrust::transform(thrust::device,(complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aLength,aMatrixIn2,(complexf*)aMatrixOut,lambda); + } + else{ + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()+y,x.imag()); + }; + thrust::transform(thrust::device,(complexf*)aMatrixIn2,(complexf*)aMatrixIn2+aLength,aMatrixIn1,(complexf*)aMatrixOut,lambda); + } + } -void unaryAdd(float* in1, const float& in2, float* out, unsigned long length) +void unaryAddmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, + unsigned long aMatrixLength,unsigned long aVectorLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2, + Aurora::FunctionDirection aVectype) { - thrust::transform(thrust::device,in1,in1+length,out,in2 + _1); + if (aVectype == Aurora::Column){ + if(aValType1 == aValType2){ + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + thrust::plus op; + thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength*aValType1,rowVectorIter,aMatrixOut,op); + } + else if (aValType1 == Aurora::Complex){ + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()+y,x.imag()); + }; + thrust::transform(thrust::device, (complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + else{ + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const complexf& y){ + return complexf(y.real()+x,y.imag()); + }; + thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + } + else{ + int colElementCount = aMatrixLength/aVectorLength; + if(aValType1 == aValType2){ + Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + thrust::plus op; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,op); + } + else if (aValType1 == Aurora::Complex){ + Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()+y,x.imag()); + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + else{ + Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const complexf& y){ + return complexf(y.real()+x,y.imag()); + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + } } -void unaryMul(float* in1, float* in2, float* out, unsigned long length) +void unaryAdd(float* aMatrixIn, const float& aVectorIn, float* aMatrixOut, unsigned long aLength) { - thrust::multiplies op; - thrust::transform(thrust::device,in1,in1+length,in2,out,op); + thrust::transform(thrust::device,aMatrixIn,aMatrixIn+aLength,aMatrixOut,aVectorIn + _1); } -void unaryMulc(float* in1, float* in2, float* out, unsigned long length) +void unaryAddc(std::complex* aMatrixIn, const std::complex& aScalarIn, + std::complex* aMatrixOut, unsigned long aLength) { - thrust::complex* _in1 = (thrust::complex*)in1; - thrust::complex* _in2 = (thrust::complex*)in2; - thrust::complex* _out = (thrust::complex*)out;; - thrust::multiplies> op; - thrust::transform(thrust::device,_in1,_in1+length,_in2,_out,op); + complexf value (aScalarIn.real(), aScalarIn.imag()); + thrust::transform(thrust::device,(complexf*)aMatrixIn, + (complexf*)aMatrixIn + aLength, + (complexf*)aMatrixOut, value + _1); } -void unaryMul(float* in1, const float& in2, float* out, unsigned long length) +void unaryMul(float* aMatrixIn, const float& aScalarIn, float* aMatrixOut, unsigned long aLength) { - thrust::transform(thrust::device,in1, in1+length, out, in2 * _1); + thrust::transform(thrust::device,aMatrixIn,aMatrixIn+aLength,aMatrixOut,aScalarIn * _1); +} + +void unaryMulc(std::complex *aMatrixIn, + const std::complex &aScalarIn, + std::complex *aMatrixOut, unsigned long aLength) +{ + complexf value (aScalarIn.real(), aScalarIn.imag()); + thrust::transform(thrust::device, (complexf *)aMatrixIn, + (complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut, + value * _1); +} + +void unaryMulmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut, + unsigned long aLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2) +{ + if (aValType1 == aValType2 ) + { + if(Aurora::Normal == aValType1){ + thrust::multiplies op; + thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength*aValType1,aMatrixIn2,aMatrixOut,op); + } + else{ + thrust::multiplies op; + thrust::transform(thrust::device, (complexf *)aMatrixIn1, + (complexf *)aMatrixIn1 + aLength, + (complexf *)aMatrixIn2, (complexf *)aMatrixOut, + op); + } + } + else if(Aurora::Complex == aValType1){ + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()*y,x.imag()*y); + }; + thrust::transform(thrust::device,(complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aLength,aMatrixIn2,(complexf*)aMatrixOut,lambda); + } + else{ + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()*y,x.imag()*y); + }; + thrust::transform(thrust::device,(complexf*)aMatrixIn2,(complexf*)aMatrixIn2+aLength,aMatrixIn1,(complexf*)aMatrixOut,lambda); + } + +} + +void unaryMulmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, + unsigned long aMatrixLength,unsigned long aVectorLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2, + Aurora::FunctionDirection aVectype) +{ + if (aVectype == Aurora::Column){ + if(aValType1 == aValType2){ + if(aValType1 == Aurora::Normal){ + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + thrust::multiplies op; + thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength*aValType1,rowVectorIter,aMatrixOut,op); + } + else{ + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + thrust::multiplies op; + thrust::transform(thrust::device, (complexf*)aMatrixIn1, + (complexf*)aMatrixIn1+aMatrixLength,rowVectorIter, + (complexf*)aMatrixOut,op); + } + } + else if (aValType1 == Aurora::Complex){ + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()*y,x.imag()*y); + }; + thrust::transform(thrust::device, (complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + else{ + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const complexf& y){ + return complexf(y.real()*x,y.imag()*x); + }; + thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + } + else{ + int colElementCount = aMatrixLength/aVectorLength; + if(aValType1 == aValType2){ + if(aValType1 == Aurora::Normal){ + Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + thrust::multiplies op; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,op); + } + else{ + Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + thrust::multiplies op; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,op); + } + } + else if (aValType1 == Aurora::Complex){ + Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()*y,x.imag()*y); + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + else{ + Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const complexf& y){ + return complexf(y.real()*x,y.imag()*x); + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + } } void unaryNeg(float* in1, float* out, unsigned long length){ @@ -164,42 +354,280 @@ void unaryNeg(float* in1, float* out, unsigned long length){ thrust::transform(thrust::device,in1,in1+length,out,op); } -void unarySub(float* in1, float* in2, float* out, unsigned long length){ - thrust::minus op; - thrust::transform(thrust::device,in1,in1+length,in2,out,op); -} - -void unaryDiv(float* in1, float* in2, float* out, unsigned long length){ - thrust::divides op; - thrust::transform(thrust::device,in1,in1+length,in2,out,op); -} - -void unaryDivc(float* in1, float* in2, float* out, unsigned long length) -{ - thrust::complex* _in1 = (thrust::complex*)in1; - thrust::complex* _in2 = (thrust::complex*)in2; - thrust::complex* _out = (thrust::complex*)out;; - thrust::divides> op; - thrust::transform(thrust::device,_in1,_in1+length,_in2,_out,op); -} - -void unarySub(const float& in1, float* in2, float* out, unsigned long length){ - thrust::transform(thrust::device,in2,in2+length,out,in1-_1); -} - -void unaryDiv(const float& in1, float* in2, float* out, unsigned long length){ - thrust::transform(thrust::device,in2,in2+length,out,in1/_1); - -} - void unarySub(float* in1, const float& in2, float* out, unsigned long length){ thrust::transform(thrust::device,in1,in1+length,out,_1-in2); } +void unarySub(const float& in2, float* in1, float* out, unsigned long length){ + thrust::transform(thrust::device,in1,in1+length,out,in2-_1); +} + +void unarySubc(std::complex *aMatrixIn, + const std::complex &aScalarIn, + std::complex *aMatrixOut, unsigned long aLength) +{ + complexf value (aScalarIn.real(), aScalarIn.imag()); + thrust::transform(thrust::device, (complexf *)aMatrixIn, + (complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut, + _1 - value); +} + +void unarySubc(const std::complex &aScalarIn, + std::complex *aMatrixIn, + std::complex *aMatrixOut, unsigned long aLength) +{ + complexf value (aScalarIn.real(), aScalarIn.imag()); + thrust::transform(thrust::device, (complexf *)aMatrixIn, + (complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut, + value - _1); +} + +void unarySubmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut, + unsigned long aLength, + Aurora::ValueType aValType1, + Aurora::ValueType aValType2 ) +{ + if (aValType1 == aValType2 ) + { + if(Aurora::Normal == aValType1){ + thrust::minus op; + thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength*aValType1,aMatrixIn2,aMatrixOut,op); + } + else{ + thrust::minus op; + thrust::transform(thrust::device, (complexf *)aMatrixIn1, + (complexf *)aMatrixIn1 + aLength, + (complexf *)aMatrixIn2, (complexf *)aMatrixOut, + op); + } + } + else if(Aurora::Complex == aValType1){ + auto lambda = [=] __device__(const complexf& x, const float& y){ + return complexf(x.real()-y,x.imag()); + }; + thrust::transform(thrust::device,(complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aLength,aMatrixIn2,(complexf*)aMatrixOut,lambda); + } + else{ + auto lambda = [=] __device__(const float& x, const complexf& y){ + return complexf(x-y.real(),0-y.imag()); + }; + thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength,(complexf*)aMatrixIn2,(complexf*)aMatrixOut,lambda); + } +} + +void unarySubmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, + unsigned long aMatrixLength,unsigned long aVectorLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2, + Aurora::FunctionDirection aVectype,int direction) +{ + if (aVectype == Aurora::Column){ + //类型一样时,逐元素处理即可 + if(aValType1 == aValType2){ + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const float& y){ + return direction==0?x-y:y-x; + }; + thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength*aValType1,rowVectorIter,aMatrixOut,lambda); + } + else if (aValType1 == Aurora::Complex){ + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const float& y){ + return direction==0?complexf(x.real()-y,x.imag()):complexf(y-x.real(),-x.imag()); + }; + thrust::transform(thrust::device, (complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + else{ + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const complexf& y){ + return direction==0?complexf(x-y.real(),-y.imag()):complexf(y.real()-x,y.imag()); + }; + thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + } + else{ + int colElementCount = aMatrixLength/aVectorLength; + if(aValType1 == aValType2){ + Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const float& y){ + return direction==0?x-y:y-x; + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,lambda); + } + else if (aValType1 == Aurora::Complex){ + Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const float& y){ + return direction==0?complexf(x.real()-y,x.imag()):complexf(y-x.real(),-x.imag()); + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + else{ + Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const complexf& y){ + return direction==0?complexf(x-y.real(),-y.imag()):complexf(y.real()-x,y.imag()); + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + } +} + void unaryDiv(float* in1, const float& in2, float* out, unsigned long length){ thrust::transform(thrust::device,in1,in1+length,out,_1/in2); } +void unaryDiv(const float& in2, float* in1, float* out, unsigned long length){ + thrust::transform(thrust::device,in1,in1+length,out,in2/_1); +} + +void unaryDivc(std::complex *aMatrixIn, + const std::complex &aScalarIn, + std::complex *aMatrixOut, unsigned long aLength) +{ + complexf value (aScalarIn.real(), aScalarIn.imag()); + thrust::transform(thrust::device, (complexf *)aMatrixIn, + (complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut, + _1 / value); +} + +void unaryDivc(const std::complex &aScalarIn, + std::complex *aMatrixIn, + std::complex *aMatrixOut, unsigned long aLength) +{ + complexf value (aScalarIn.real(), aScalarIn.imag()); + thrust::transform(thrust::device, (complexf *)aMatrixIn, + (complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut, + value / _1); +} + +void unaryDivmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut, + unsigned long aLength, + Aurora::ValueType aValType1, + Aurora::ValueType aValType2 ) +{ + if (aValType1 == aValType2 ) + { + if(Aurora::Normal == aValType1){ + thrust::divides op; + thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength*aValType1,aMatrixIn2,aMatrixOut,op); + } + else{ + thrust::divides op; + thrust::transform(thrust::device, (complexf *)aMatrixIn1, + (complexf *)aMatrixIn1 + aLength, + (complexf *)aMatrixIn2, (complexf *)aMatrixOut, + op); + } + } + else if(Aurora::Complex == aValType1){ + auto lambda = [=] __device__(const complexf& x, const float& y){ + return x/complexf(y,0); + }; + thrust::transform(thrust::device,(complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aLength,aMatrixIn2,(complexf*)aMatrixOut,lambda); + } + else{ + auto lambda = [=] __device__(const float& x, const complexf& y){ + return complexf(x,0)/y; + }; + thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength,(complexf*)aMatrixIn2,(complexf*)aMatrixOut,lambda); + } +} + +void unaryDivmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, + unsigned long aMatrixLength,unsigned long aVectorLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2, + Aurora::FunctionDirection aVectype,int direction) +{ + if (aVectype == Aurora::Column){ + //类型一样时,逐元素处理即可 + if(aValType1 == aValType2){ + if (aValType1==Aurora::Normal){ + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const float& y){ + return direction==0?x/y:y/x; + }; + thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,aMatrixOut,lambda); + } + else{ + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const complexf& y){ + return direction==0?x/y:y/x; + }; + thrust::transform(thrust::device, (complexf*)aMatrixIn1, + (complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + + } + else if (aValType1 == Aurora::Complex){ + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const float& y){ + complexf v (y,0); + return direction==0?x/v:v/x; + }; + thrust::transform(thrust::device, (complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + else{ + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const complexf& y){ + complexf v (x,0); + return direction==0?v/y:y/v; + }; + thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + } + else{ + int colElementCount = aMatrixLength/aVectorLength; + if(aValType1 == aValType2){ + if (aValType1 == Aurora::Normal) + { + Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const float& y){ + return direction==0?x/y:y/x; + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,lambda); + } + else{ + Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const complexf& y){ + return direction==0?x/y:y/x; + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + + } + else if (aValType1 == Aurora::Complex){ + Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const complexf& x, const float& y){ + complexf v (y,0); + return direction==0?x/v:v/x; + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + else{ + Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); + Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); + Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + auto lambda = [=] __device__(const float& x, const complexf& y){ + complexf v (x,0); + return direction==0?v/y:y/v; + }; + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + } + } +} + + + void unaryPow(float* in1, float N,float* out, unsigned long length){ if (N == 0.0f) { @@ -305,3 +733,4 @@ void thrustFill(float* aBegin, float* aEnd, std::complex aValue) thrust::fill(thrust::device, (std::complex*)aBegin, (std::complex*)aEnd, aValue); } + diff --git a/src/CudaMatrixPrivate.cuh b/src/CudaMatrixPrivate.cuh index 89b0fdc..6577a66 100644 --- a/src/CudaMatrixPrivate.cuh +++ b/src/CudaMatrixPrivate.cuh @@ -3,32 +3,290 @@ #define __CUDAMATRIX_CUH__ #include +#include "AuroraDefs.h" namespace{ enum CompareType { G,GE,E,NE,LE,L }; + enum VecType + { + COL,ROW + }; } -void unaryAdd(float* in1, float* in2, float* out, unsigned long length); -void unaryAdd(float* in1, const float& in2, float* out, unsigned long length); -void unaryMul(float* in1, float* in2, float* out, unsigned long length); -void unaryMulc(float* in1, float* in2, float* out, unsigned long length); -void unaryMul(float* in1, const float& in2, float* out, unsigned long length); +/** + * @brief 矩阵逐元素加一个float值 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue float值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unaryAdd(float *aMatrixIn, const float &aValue, float *aMatrixOut, + unsigned long aLength); + +/** + * @brief 矩阵逐元素加一个complex值 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue complex值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unaryAddc(std::complex *aMatrixIn, + const std::complex &aValue, + std::complex *aMatrixOut, unsigned long aLength); + +/** + * @brief 两相同大小矩阵逐元素相加 + * @param aMatrixIn1 矩阵1数据指针 + * @param aMatrixIn2 矩阵2数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + * @param aValType1 矩阵1数据类型 + * @param aValType2 矩阵2数据类型 + */ +void unaryAddmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut, + unsigned long aLength, + Aurora::ValueType aValType1 = Aurora::Normal, + Aurora::ValueType aValType2 = Aurora::Normal); + +/** + * @brief 矩阵与向量点对点相加 + * + * @param aMatrixIn 矩阵数据指针 + * @param aVectorIn 向量数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aMatrixLength 矩阵数据总长度 + * @param aVectorLength 向量数据总长度 + * @param aMatValType 矩阵数据类型,normal or complex + * @param aVecValType 向量数据类型 + * @param aFuncType 函数执行方向,只支持 Row or Column + */ +void unaryAddmv(float *aMatrixIn, float *aVectorIn, float *aMatrixOut, + unsigned long aMatrixLength,unsigned long aVectorLength, + Aurora::ValueType aMatValType, Aurora::ValueType aVecValType, + Aurora::FunctionDirection aFuncType = Aurora::FunctionDirection::Column); + +/** + * @brief 矩阵逐元素乘一个float值 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue float值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unaryMul(float *aMatrixIn, const float &aValue, float *aMatrixOut, + unsigned long aLength); + +/** + * @brief 矩阵逐元素乘一个complex值 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue complex值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unaryMulc(std::complex *aMatrixIn, + const std::complex &aValue, + std::complex *aMatrixOut, unsigned long aLength); + +/** + * @brief 两相同大小矩阵逐元素相乘 + * + * @param aMatrixIn1 矩阵1数据指针 + * @param aMatrixIn2 矩阵2数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + * @param aValType1 矩阵1数据类型 + * @param aValType2 矩阵2数据类型 + */ +void unaryMulmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut, + unsigned long aLength, + Aurora::ValueType aValType1 = Aurora::Normal, + Aurora::ValueType aValType2 = Aurora::Normal); + +/** + * @brief 矩阵与向量点对点相乘 + * + * @param aMatrixIn 矩阵数据指针 + * @param aVectorIn 向量数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aMatrixLength 矩阵数据总长度 + * @param aVectorLength 向量数据总长度 + * @param aMatValType 矩阵数据类型,normal or complex + * @param aVecValType 向量数据类型 + * @param aFuncType 函数执行方向,只支持 Row or Column + */ +void unaryMulmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, + unsigned long aMatrixLength,unsigned long aVectorLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2, + Aurora::FunctionDirection aVectype = Aurora::FunctionDirection::Column); + +/** + * @brief 矩阵逐元素减一个float值 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue float值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unarySub(float *aMatrixIn, const float &aValue, float *aMatrixOut, + unsigned long aLength); + +/** + * @brief 矩阵逐元素被一个float值减 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue float值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unarySub(const float &aValue,float *aMatrixIn, float *aMatrixOut, + unsigned long aLength); + +/** + * @brief 矩阵逐元素减一个complex值 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue complex值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unarySubc(std::complex *aMatrixIn, + const std::complex &aValue, + std::complex *aMatrixOut, unsigned long aLength); + +/** + * @brief 矩阵逐元素被一个complex值减 + * + * @param aValue complex值 + * @param aMatrixIn 矩阵数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unarySubc(const std::complex &aValue, + std::complex *aMatrixIn, + std::complex *aMatrixOut, unsigned long aLength); + +/** + * @brief 两相同大小矩阵逐元素相减 + * + * @param aMatrixIn1 矩阵1数据指针 + * @param aMatrixIn2 矩阵2数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + * @param aValType1 矩阵1数据类型 + * @param aValType2 矩阵2数据类型 + */ +void unarySubmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut, + unsigned long aLength, + Aurora::ValueType aValType1 = Aurora::Normal, + Aurora::ValueType aValType2 = Aurora::Normal); + +/** + * @brief 矩阵与向量点对点相减 + * + * @param aMatrixIn 矩阵数据指针 + * @param aVectorIn 向量数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aMatrixLength 矩阵数据总长度 + * @param aVectorLength 向量数据总长度 + * @param aMatValType 矩阵数据类型,normal or complex + * @param aVecValType 向量数据类型 + * @param aFuncType 函数执行方向,只支持 Row or Column + * @param direction 减法执行方向, 0 mat - vec, 1 vec- mat + */ +void unarySubmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, + unsigned long aMatrixLength,unsigned long aVectorLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2, + Aurora::FunctionDirection aVectype = Aurora::FunctionDirection::Column, + int direction = 0); + +/** + * @brief 矩阵逐元素除以一个float值 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue float值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unaryDiv(float *aMatrixIn, const float &aValue, float *aMatrixOut, + unsigned long aLength); + +/** + * @brief 矩阵逐元素被一个float值除 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue float值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unaryDiv(const float &aValue,float *aMatrixIn, float *aMatrixOut, + unsigned long aLength); + +/** + * @brief 矩阵逐元素除以一个complex值 + * + * @param aMatrixIn 矩阵数据指针 + * @param aValue complex值 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unaryDivc(std::complex *aMatrixIn, + const std::complex &aValue, + std::complex *aMatrixOut, unsigned long aLength); + +/** + * @brief 矩阵逐元素被一个complex值除 + * + * @param aValue complex值 + * @param aMatrixIn 矩阵数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + */ +void unaryDivc(const std::complex &aValue, + std::complex *aMatrixIn, + std::complex *aMatrixOut, unsigned long aLength); + +/** + * @brief 两相同大小矩阵逐元素相除 + * + * @param aMatrixIn1 矩阵1数据指针 + * @param aMatrixIn2 矩阵2数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aLength 矩阵总元素长度 + * @param aValType1 矩阵1数据类型 + * @param aValType2 矩阵2数据类型 + */ +void unaryDivmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut, + unsigned long aLength, + Aurora::ValueType aValType1 = Aurora::Normal, + Aurora::ValueType aValType2 = Aurora::Normal); + +/** + * @brief 矩阵与向量点对点相除 + * + * @param aMatrixIn 矩阵数据指针 + * @param aVectorIn 向量数据指针 + * @param aMatrixOut 输出矩阵数据指针 + * @param aMatrixLength 矩阵数据总长度 + * @param aVectorLength 向量数据总长度 + * @param aMatValType 矩阵数据类型,normal or complex + * @param aVecValType 向量数据类型 + * @param aFuncType 函数执行方向,只支持 Row or Column + * @param direction 减法执行方向, 0 mat - vec, 1 vec- mat + */ +void unaryDivmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, + unsigned long aMatrixLength,unsigned long aVectorLength, + Aurora::ValueType aValType1, Aurora::ValueType aValType2, + Aurora::FunctionDirection aVectype = Aurora::FunctionDirection::Column, + int direction = 0); void unaryNeg(float* in1, float* out, unsigned long length); void unaryPow(float* in1, float N,float* out, unsigned long length); -void unarySub(float* in1, float* in2, float* out, unsigned long length); -void unaryDiv(float* in1, float* in2, float* out, unsigned long length); -void unaryDivc(float* in1, float* in2, float* out, unsigned long length); - -void unarySub(const float& in1, float* in2, float* out, unsigned long length); -void unaryDiv(const float& in1, float* in2, float* out, unsigned long length); -void unarySub(float* in1, const float& in2, float* out, unsigned long length); -void unaryDiv(float* in1, const float& in2, float* out, unsigned long length); - - void unaryCompare(float* in1, const float& in2, float* out, unsigned long length,int type); void unaryCompare(const float& in1, float* in2, float* out, unsigned long length, int type); void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type); diff --git a/src/Matrix.cpp b/src/Matrix.cpp index b480b58..475cd9d 100644 --- a/src/Matrix.cpp +++ b/src/Matrix.cpp @@ -2,8 +2,10 @@ #include "CudaMatrix.h" #include +#include #include #include +#include #include #include #include @@ -16,6 +18,7 @@ #include "Eigen/src/Core/Map.h" #include "Eigen/src/Core/Matrix.h" #include "Function.h" +#include "Function1D.h" #ifdef USE_CUDA #include @@ -130,7 +133,7 @@ namespace Aurora{ } else{ float zero = 0.0; - aFuncD(size, yC+ 1, 2, &zero, 0, output + 1, 2); + aFuncD(size, &zero, 0, yC+ 1, 2, output + 1, 2); } } @@ -560,7 +563,14 @@ namespace Aurora { } //operation / Matrix Matrix::operator/(float aScalar) const { return operatorMxA(&vsDivI, aScalar, *this);} - Matrix operator/(float aScalar, const Matrix &matrix) {return operatorAxM(&vsDivI, aScalar, matrix);} + Matrix operator/(float aScalar, const Matrix &aMatrix) { + if(!aMatrix.isComplex())return operatorAxM(&vsDivI, aScalar, aMatrix); + float *output = malloc(aMatrix.getDataSize(), true); + std::complex v(aScalar,0); + vcDivI(aMatrix.getDataSize(), &v, 0,(std::complex*)aMatrix.getData(), 1, (std::complex*)output, 1); + return Matrix::New(output, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), + aMatrix.getValueType()); + } Matrix Matrix::operator/(const Matrix &aMatrix) const { if (isScalar()){ return getScalar()/aMatrix; @@ -568,6 +578,9 @@ namespace Aurora { if (aMatrix.isScalar()){ return (*this)/aMatrix.getScalar(); } + if(aMatrix.isComplex()&& !isComplex()){ + return operatorMxM_RR(&vsDivI,&vcDivI,aMatrix,complex(*this),true); + } return operatorMxM(vsDivI, vcDivI, *this, aMatrix); } Matrix &operator/(float aScalar, Matrix &&matrix) { @@ -583,7 +596,8 @@ namespace Aurora { if (aMatrix.isScalar()){ return (*this)/aMatrix.getScalar(); } - return operatorMxM_RR(&vsDivI,&vcDivI,*this,std::forward(aMatrix)); + Matrix leftMat = (aMatrix.isComplex()&& !isComplex())? complex(*this):(*this); + return operatorMxM_RR(&vsDivI,&vcDivI,leftMat,std::forward(aMatrix)); } Matrix operator/(Matrix &&aMatrix, Matrix &aOther) { @@ -593,7 +607,12 @@ namespace Aurora { if (aMatrix.isScalar()){ return aMatrix.getScalar()/aOther; } + if( aOther.isComplex()&& !aMatrix.isComplex()) + { + return operatorMxM_RR(&vsDivI,&vcDivI,aOther,std::forward(complex(aMatrix)),true); + } return operatorMxM_RR(&vsDivI,&vcDivI,aOther,std::forward(aMatrix),true); + } //operator ^ (pow) Matrix Matrix::operator^(int times) const { return operatorMxA(&vsPowI, times, *this);} diff --git a/test/CudaMatrix_Test.cpp b/test/CudaMatrix_Test.cpp index 31dabf6..e84baf8 100644 --- a/test/CudaMatrix_Test.cpp +++ b/test/CudaMatrix_Test.cpp @@ -1,4 +1,5 @@ #include +#include #include #include "AuroraDefs.h" #include "Matrix.h" @@ -26,332 +27,2181 @@ protected: }; TEST_F(CudaMatrix_Test, MatrixAdd) { - auto A = Aurora::zeros(1000,1,1); - auto B = Aurora::zeros(1000,1,1); - for (size_t i = 0; i < 1000; i++) { - A[i] = -1; - B[i] = i; - } - printf("Test CudaMatrix operator+(const CudaMatrix &aMatrix) const \r\n"); - //CudaMatrix operator+(const CudaMatrix &aMatrix) const - auto C = A+B; - auto dA = A.toDeviceMatrix(); - auto dB = B.toDeviceMatrix(); - auto dC = (dA+dB); - auto dhC = dC.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) - { - ASSERT_FLOAT_EQ(C[i],dhC[i]); - } - printf("Test CudaMatrix operator+(float aScalar) const \r\n"); - //CudaMatrix operator+(float aScalar) const - auto D = C+0.5; - auto dD = dC+0.5; - auto dhD = dD.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) - { - ASSERT_FLOAT_EQ(D[i],dhD[i]); - } - printf("Test CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) \r\n"); - // CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) - dD = 0.5 + dC; - dhD = dD.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) - { - ASSERT_FLOAT_EQ(D[i],dhD[i]); - } - printf("Test CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) \r\n"); - // CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) - { - auto dD2 = 0.5 + (dA+dB); - dhD = dD2.toHostMatrix(); + auto A = Aurora::zeros(1000,1,1); + auto B = Aurora::zeros(1000,1,1); + for (size_t i = 0; i < 1000; i++) + { + A[i] = -1; + B[i] = i; + } + printf("Test CudaMatrix operator+(const CudaMatrix &aMatrix) const \r\n"); + //CudaMatrix operator+(const CudaMatrix &aMatrix) const + auto C = A+B; + auto dA = A.toDeviceMatrix(); + auto dB = B.toDeviceMatrix(); + auto dC = (dA+dB); + auto dhC = dC.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(C[i],dhC[i]); + } + printf("Test CudaMatrix operator+(float aScalar) const \r\n"); + //CudaMatrix operator+(float aScalar) const + auto D = C+0.5; + auto dD = dC+0.5; + auto dhD = dD.toHostMatrix(); for (size_t i = 0; i < 1000; i++) { ASSERT_FLOAT_EQ(D[i],dhD[i]); } - } - printf("Test CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) \r\n"); - // CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) - { - - auto dD2 = (dA+dB)+0.5; - dhD = dD2.toHostMatrix(); + printf("Test CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) \r\n"); + // CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) + dD = 0.5 + dC; + dhD = dD.toHostMatrix(); for (size_t i = 0; i < 1000; i++) { ASSERT_FLOAT_EQ(D[i],dhD[i]); } - } - //CudaMatrix operator+(CudaMatrix &&aMatrix) const - printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix) const \r\n"); - { - auto D = A+C; - auto dD2 = dA+(dA+dB); - dhD = dD2.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + printf("Test CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) \r\n"); + // CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + auto dD2 = 0.5 + (dA+dB); + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } + printf("Test CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) \r\n"); + // CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) + { + + auto dD2 = (dA+dB)+0.5; + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } + //CudaMatrix operator+(CudaMatrix &&aMatrix) const + printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix) const \r\n"); + { + auto D = A+C; + auto dD2 = dA+(dA+dB); + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } + } + //CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) + printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) \r\n"); + { + auto D = A+C; + auto dD2 = (dA+dB)+dA; + dhD = dD2.toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + ASSERT_FLOAT_EQ(D[i],dhD[i]); + } } } - //CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) - printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) \r\n"); { - auto D = A+C; - auto dD2 = (dA+dB)+dA; - dhD = dD2.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + float *dataA = new float[10000]; + float *dataB = new float[10000]; + for (size_t i = 0; i < 10000; i++) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + dataA[i] = 8; + dataB[i] = -1; + } + auto A = Aurora::Matrix::fromRawData(dataA, 50,100,1,Aurora::Complex); + auto B = Aurora::Matrix::fromRawData(dataB, 50,100,1,Aurora::Complex); + auto C = A+B; + std::complex scalar(-1,-1); + auto dA = A.toDeviceMatrix(); + + auto dC = (dA+scalar); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + + dC = A.toDeviceMatrix()+scalar; + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + + dC = scalar+A.toDeviceMatrix(); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + C = A+1; + dC = (dA+1); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i))<<"index"< scalar(-1,-1); + auto dA = A.toDeviceMatrix(); - printf("Test CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix) \r\n"); - // CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix) - dD = 0.5 * dC; - dhD = dD.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) - { - ASSERT_FLOAT_EQ(D[i],dhD[i]); - } - printf("Test CudaMatrix &operator*(float aScalar, CudaMatrix &&aMatrix) \r\n"); - // CudaMatrix &operator*(float aScalar, CudaMatrix &&aMatrix) - { - auto dD2 = 0.5 * (dA*dB); - dhD = dD2.toHostMatrix(); - for (size_t i = 0; i < 1000; i++) + auto dC = (dA*scalar); + for (size_t i = 0; i < C.getDataSize()*2; i++) { - ASSERT_FLOAT_EQ(D[i],dhD[i]); + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + + dC = A.toDeviceMatrix()*scalar; + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + + dC = scalar*A.toDeviceMatrix(); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + C = A*2; + dC = (dA*2); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i))<<"index"< scalar(-1,-1); + auto dA = A.toDeviceMatrix(); + + auto dC = (dA-scalar); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + + dC = A.toDeviceMatrix()-scalar; + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + C = B-A; + dC = scalar-A.toDeviceMatrix(); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + C = A-1; + dC = (dA-1); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i))<<"index"< scalar(-2,-2); + auto dA = A.toDeviceMatrix(); + + auto dC = (dA/scalar); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + + dC = A.toDeviceMatrix()/scalar; + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + C = B/A; + dC = scalar/A.toDeviceMatrix(); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i)); + } + C = A/2.0; + dC = (dA/2.0); + for (size_t i = 0; i < C.getDataSize()*2; i++) + { + ASSERT_FLOAT_EQ(C[i],dC.getValue(i))<<"index"<