Cuda +-*/ with mm.mv and scalar(complex and real) support

This commit is contained in:
kradchen
2023-12-13 16:40:11 +08:00
parent f2b81eddc3
commit deaac03704
6 changed files with 3783 additions and 709 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -43,11 +43,23 @@ namespace Aurora
*/
CudaMatrix deepCopy() const;
/**
* @brief 深拷贝操作,但不复制数据
*
* @return CudaMatrix
*/
CudaMatrix deepCopyShape(bool complex = false) const;
// add
CudaMatrix operator+(float aScalar) const;
friend CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix);
friend CudaMatrix& operator+(float aScalar, CudaMatrix &&aMatrix);
friend CudaMatrix& operator+(CudaMatrix &&aMatrix,float aScalar);
CudaMatrix operator+(std::complex<float> aScalar) const;
friend CudaMatrix operator+(std::complex<float> aScalar, const CudaMatrix &aMatrix);
friend CudaMatrix& operator+(std::complex<float> aScalar, CudaMatrix &&aMatrix);
friend CudaMatrix& operator+(CudaMatrix &&aMatrix,std::complex<float> aScalar);
CudaMatrix operator+(const CudaMatrix &aMatrix) const;
CudaMatrix operator+(CudaMatrix &&aMatrix) const;
friend CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther);
@@ -57,6 +69,10 @@ namespace Aurora
friend CudaMatrix operator-(float aScalar, const CudaMatrix &aMatrix);
friend CudaMatrix& operator-(float aScalar, CudaMatrix &&aMatrix);
friend CudaMatrix& operator-(CudaMatrix &&aMatrix,float aScalar);
CudaMatrix operator-(std::complex<float> aScalar) const;
friend CudaMatrix operator-(std::complex<float> aScalar, const CudaMatrix &aMatrix);
friend CudaMatrix& operator-(std::complex<float> aScalar, CudaMatrix &&aMatrix);
friend CudaMatrix& operator-(CudaMatrix &&aMatrix,std::complex<float> aScalar);
CudaMatrix operator-(const CudaMatrix &aMatrix) const;
CudaMatrix operator-(CudaMatrix &&aMatrix) const;
friend CudaMatrix operator-(CudaMatrix &&aMatrix,CudaMatrix &aOther);
@@ -70,6 +86,10 @@ namespace Aurora
friend CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix);
friend CudaMatrix& operator*(float aScalar, CudaMatrix &&aMatrix);
friend CudaMatrix& operator*(CudaMatrix &&aMatrix,float aScalar);
CudaMatrix operator*(std::complex<float> aScalar) const;
friend CudaMatrix operator*(std::complex<float> aScalar, const CudaMatrix &aMatrix);
friend CudaMatrix& operator*(std::complex<float> aScalar, CudaMatrix &&aMatrix);
friend CudaMatrix& operator*(CudaMatrix &&aMatrix,std::complex<float> aScalar);
CudaMatrix operator*(const CudaMatrix &aMatrix) const;
CudaMatrix operator*(CudaMatrix &&aMatrix) const;
friend CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther);
@@ -79,6 +99,10 @@ namespace Aurora
friend CudaMatrix operator/(float aScalar, const CudaMatrix &aMatrix);
friend CudaMatrix& operator/(float aScalar, CudaMatrix &&aMatrix);
friend CudaMatrix& operator/(CudaMatrix &&aMatrix,float aScalar);
CudaMatrix operator/(std::complex<float> aScalar) const;
friend CudaMatrix operator/(std::complex<float> aScalar, const CudaMatrix &aMatrix);
friend CudaMatrix& operator/(std::complex<float> aScalar, CudaMatrix &&aMatrix);
friend CudaMatrix& operator/(CudaMatrix &&aMatrix,std::complex<float> aScalar);
CudaMatrix operator/(const CudaMatrix &aMatrix) const;
CudaMatrix operator/(CudaMatrix &&aMatrix) const;
friend CudaMatrix operator/(CudaMatrix &&aMatrix,CudaMatrix &aOther);

View File

@@ -4,7 +4,9 @@
#include <thrust/complex.h>
#include <thrust/functional.h>
#include <thrust/execution_policy.h>
#include "AuroraDefs.h"
#include "AuroraThrustIterator.cuh"
using namespace thrust::placeholders;
struct PowOp: public thrust::unary_function<float, float>{
@@ -127,36 +129,224 @@ struct CompareANEOp{
}
};
typedef thrust::complex<float> complexf;
void unaryAdd(float* in1, float* in2, float* out, unsigned long length)
void unaryAddmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut,
unsigned long aLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2)
{
if (aValType1 == aValType2)
{
thrust::plus<float> op;
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength*aValType1,aMatrixIn2,aMatrixOut,op);
}
else if(Aurora::Complex == aValType1){
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()+y,x.imag());
};
thrust::transform(thrust::device,(complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aLength,aMatrixIn2,(complexf*)aMatrixOut,lambda);
}
else{
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()+y,x.imag());
};
thrust::transform(thrust::device,(complexf*)aMatrixIn2,(complexf*)aMatrixIn2+aLength,aMatrixIn1,(complexf*)aMatrixOut,lambda);
}
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length)
{
thrust::transform(thrust::device,in1,in1+length,out,in2 + _1);
}
void unaryMul(float* in1, float* in2, float* out, unsigned long length)
void unaryAddmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut,
unsigned long aMatrixLength,unsigned long aVectorLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2,
Aurora::FunctionDirection aVectype)
{
if (aVectype == Aurora::Column){
if(aValType1 == aValType2){
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
thrust::plus<float> op;
thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength*aValType1,rowVectorIter,aMatrixOut,op);
}
else if (aValType1 == Aurora::Complex){
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()+y,x.imag());
};
thrust::transform(thrust::device, (complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
else{
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const complexf& y){
return complexf(y.real()+x,y.imag());
};
thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
else{
int colElementCount = aMatrixLength/aVectorLength;
if(aValType1 == aValType2){
Aurora::RowWiseIterator<float> rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<float> rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
thrust::plus<float> op;
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,op);
}
else if (aValType1 == Aurora::Complex){
Aurora::RowWiseIterator<complexf> rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<complexf> rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()+y,x.imag());
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
else{
Aurora::RowWiseIterator<float> rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<float> rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const complexf& y){
return complexf(y.real()+x,y.imag());
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
}
void unaryAdd(float* aMatrixIn, const float& aVectorIn, float* aMatrixOut, unsigned long aLength)
{
thrust::transform(thrust::device,aMatrixIn,aMatrixIn+aLength,aMatrixOut,aVectorIn + _1);
}
void unaryAddc(std::complex<float>* aMatrixIn, const std::complex<float>& aScalarIn,
std::complex<float>* aMatrixOut, unsigned long aLength)
{
complexf value (aScalarIn.real(), aScalarIn.imag());
thrust::transform(thrust::device,(complexf*)aMatrixIn,
(complexf*)aMatrixIn + aLength,
(complexf*)aMatrixOut, value + _1);
}
void unaryMul(float* aMatrixIn, const float& aScalarIn, float* aMatrixOut, unsigned long aLength)
{
thrust::transform(thrust::device,aMatrixIn,aMatrixIn+aLength,aMatrixOut,aScalarIn * _1);
}
void unaryMulc(std::complex<float> *aMatrixIn,
const std::complex<float> &aScalarIn,
std::complex<float> *aMatrixOut, unsigned long aLength)
{
complexf value (aScalarIn.real(), aScalarIn.imag());
thrust::transform(thrust::device, (complexf *)aMatrixIn,
(complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut,
value * _1);
}
void unaryMulmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut,
unsigned long aLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2)
{
if (aValType1 == aValType2 )
{
if(Aurora::Normal == aValType1){
thrust::multiplies<float> op;
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength*aValType1,aMatrixIn2,aMatrixOut,op);
}
else{
thrust::multiplies<complexf> op;
thrust::transform(thrust::device, (complexf *)aMatrixIn1,
(complexf *)aMatrixIn1 + aLength,
(complexf *)aMatrixIn2, (complexf *)aMatrixOut,
op);
}
}
else if(Aurora::Complex == aValType1){
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()*y,x.imag()*y);
};
thrust::transform(thrust::device,(complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aLength,aMatrixIn2,(complexf*)aMatrixOut,lambda);
}
else{
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()*y,x.imag()*y);
};
thrust::transform(thrust::device,(complexf*)aMatrixIn2,(complexf*)aMatrixIn2+aLength,aMatrixIn1,(complexf*)aMatrixOut,lambda);
}
void unaryMulc(float* in1, float* in2, float* out, unsigned long length)
{
thrust::complex<float>* _in1 = (thrust::complex<float>*)in1;
thrust::complex<float>* _in2 = (thrust::complex<float>*)in2;
thrust::complex<float>* _out = (thrust::complex<float>*)out;;
thrust::multiplies<thrust::complex<float>> op;
thrust::transform(thrust::device,_in1,_in1+length,_in2,_out,op);
}
void unaryMul(float* in1, const float& in2, float* out, unsigned long length)
void unaryMulmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut,
unsigned long aMatrixLength,unsigned long aVectorLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2,
Aurora::FunctionDirection aVectype)
{
thrust::transform(thrust::device,in1, in1+length, out, in2 * _1);
if (aVectype == Aurora::Column){
if(aValType1 == aValType2){
if(aValType1 == Aurora::Normal){
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
thrust::multiplies<float> op;
thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength*aValType1,rowVectorIter,aMatrixOut,op);
}
else{
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
thrust::multiplies<complexf> op;
thrust::transform(thrust::device, (complexf*)aMatrixIn1,
(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,
(complexf*)aMatrixOut,op);
}
}
else if (aValType1 == Aurora::Complex){
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()*y,x.imag()*y);
};
thrust::transform(thrust::device, (complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
else{
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const complexf& y){
return complexf(y.real()*x,y.imag()*x);
};
thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
else{
int colElementCount = aMatrixLength/aVectorLength;
if(aValType1 == aValType2){
if(aValType1 == Aurora::Normal){
Aurora::RowWiseIterator<float> rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<float> rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
thrust::multiplies<float> op;
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,op);
}
else{
Aurora::RowWiseIterator<complexf> rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<complexf> rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
thrust::multiplies<complexf> op;
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,op);
}
}
else if (aValType1 == Aurora::Complex){
Aurora::RowWiseIterator<complexf> rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<complexf> rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()*y,x.imag()*y);
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
else{
Aurora::RowWiseIterator<float> rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<float> rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const complexf& y){
return complexf(y.real()*x,y.imag()*x);
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
}
void unaryNeg(float* in1, float* out, unsigned long length){
@@ -164,42 +354,280 @@ void unaryNeg(float* in1, float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,out,op);
}
void unarySub(float* in1, float* in2, float* out, unsigned long length){
thrust::minus<float> op;
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
}
void unaryDiv(float* in1, float* in2, float* out, unsigned long length){
thrust::divides<float> op;
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
}
void unaryDivc(float* in1, float* in2, float* out, unsigned long length)
{
thrust::complex<float>* _in1 = (thrust::complex<float>*)in1;
thrust::complex<float>* _in2 = (thrust::complex<float>*)in2;
thrust::complex<float>* _out = (thrust::complex<float>*)out;;
thrust::divides<thrust::complex<float>> op;
thrust::transform(thrust::device,_in1,_in1+length,_in2,_out,op);
}
void unarySub(const float& in1, float* in2, float* out, unsigned long length){
thrust::transform(thrust::device,in2,in2+length,out,in1-_1);
}
void unaryDiv(const float& in1, float* in2, float* out, unsigned long length){
thrust::transform(thrust::device,in2,in2+length,out,in1/_1);
}
void unarySub(float* in1, const float& in2, float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,out,_1-in2);
}
void unarySub(const float& in2, float* in1, float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,out,in2-_1);
}
void unarySubc(std::complex<float> *aMatrixIn,
const std::complex<float> &aScalarIn,
std::complex<float> *aMatrixOut, unsigned long aLength)
{
complexf value (aScalarIn.real(), aScalarIn.imag());
thrust::transform(thrust::device, (complexf *)aMatrixIn,
(complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut,
_1 - value);
}
void unarySubc(const std::complex<float> &aScalarIn,
std::complex<float> *aMatrixIn,
std::complex<float> *aMatrixOut, unsigned long aLength)
{
complexf value (aScalarIn.real(), aScalarIn.imag());
thrust::transform(thrust::device, (complexf *)aMatrixIn,
(complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut,
value - _1);
}
void unarySubmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut,
unsigned long aLength,
Aurora::ValueType aValType1,
Aurora::ValueType aValType2 )
{
if (aValType1 == aValType2 )
{
if(Aurora::Normal == aValType1){
thrust::minus<float> op;
thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength*aValType1,aMatrixIn2,aMatrixOut,op);
}
else{
thrust::minus<complexf> op;
thrust::transform(thrust::device, (complexf *)aMatrixIn1,
(complexf *)aMatrixIn1 + aLength,
(complexf *)aMatrixIn2, (complexf *)aMatrixOut,
op);
}
}
else if(Aurora::Complex == aValType1){
auto lambda = [=] __device__(const complexf& x, const float& y){
return complexf(x.real()-y,x.imag());
};
thrust::transform(thrust::device,(complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aLength,aMatrixIn2,(complexf*)aMatrixOut,lambda);
}
else{
auto lambda = [=] __device__(const float& x, const complexf& y){
return complexf(x-y.real(),0-y.imag());
};
thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength,(complexf*)aMatrixIn2,(complexf*)aMatrixOut,lambda);
}
}
void unarySubmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut,
unsigned long aMatrixLength,unsigned long aVectorLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2,
Aurora::FunctionDirection aVectype,int direction)
{
if (aVectype == Aurora::Column){
//类型一样时,逐元素处理即可
if(aValType1 == aValType2){
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const float& y){
return direction==0?x-y:y-x;
};
thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength*aValType1,rowVectorIter,aMatrixOut,lambda);
}
else if (aValType1 == Aurora::Complex){
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const float& y){
return direction==0?complexf(x.real()-y,x.imag()):complexf(y-x.real(),-x.imag());
};
thrust::transform(thrust::device, (complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
else{
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const complexf& y){
return direction==0?complexf(x-y.real(),-y.imag()):complexf(y.real()-x,y.imag());
};
thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
else{
int colElementCount = aMatrixLength/aVectorLength;
if(aValType1 == aValType2){
Aurora::RowWiseIterator<float> rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<float> rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const float& y){
return direction==0?x-y:y-x;
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,lambda);
}
else if (aValType1 == Aurora::Complex){
Aurora::RowWiseIterator<complexf> rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<complexf> rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const float& y){
return direction==0?complexf(x.real()-y,x.imag()):complexf(y-x.real(),-x.imag());
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
else{
Aurora::RowWiseIterator<float> rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<float> rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const complexf& y){
return direction==0?complexf(x-y.real(),-y.imag()):complexf(y.real()-x,y.imag());
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
}
void unaryDiv(float* in1, const float& in2, float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,out,_1/in2);
}
void unaryDiv(const float& in2, float* in1, float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,out,in2/_1);
}
void unaryDivc(std::complex<float> *aMatrixIn,
const std::complex<float> &aScalarIn,
std::complex<float> *aMatrixOut, unsigned long aLength)
{
complexf value (aScalarIn.real(), aScalarIn.imag());
thrust::transform(thrust::device, (complexf *)aMatrixIn,
(complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut,
_1 / value);
}
void unaryDivc(const std::complex<float> &aScalarIn,
std::complex<float> *aMatrixIn,
std::complex<float> *aMatrixOut, unsigned long aLength)
{
complexf value (aScalarIn.real(), aScalarIn.imag());
thrust::transform(thrust::device, (complexf *)aMatrixIn,
(complexf *)aMatrixIn + aLength, (complexf *)aMatrixOut,
value / _1);
}
void unaryDivmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut,
unsigned long aLength,
Aurora::ValueType aValType1,
Aurora::ValueType aValType2 )
{
if (aValType1 == aValType2 )
{
if(Aurora::Normal == aValType1){
thrust::divides<float> op;
thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength*aValType1,aMatrixIn2,aMatrixOut,op);
}
else{
thrust::divides<complexf> op;
thrust::transform(thrust::device, (complexf *)aMatrixIn1,
(complexf *)aMatrixIn1 + aLength,
(complexf *)aMatrixIn2, (complexf *)aMatrixOut,
op);
}
}
else if(Aurora::Complex == aValType1){
auto lambda = [=] __device__(const complexf& x, const float& y){
return x/complexf(y,0);
};
thrust::transform(thrust::device,(complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aLength,aMatrixIn2,(complexf*)aMatrixOut,lambda);
}
else{
auto lambda = [=] __device__(const float& x, const complexf& y){
return complexf(x,0)/y;
};
thrust::transform(thrust::device,aMatrixIn1,aMatrixIn1+aLength,(complexf*)aMatrixIn2,(complexf*)aMatrixOut,lambda);
}
}
void unaryDivmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut,
unsigned long aMatrixLength,unsigned long aVectorLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2,
Aurora::FunctionDirection aVectype,int direction)
{
if (aVectype == Aurora::Column){
//类型一样时,逐元素处理即可
if(aValType1 == aValType2){
if (aValType1==Aurora::Normal){
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const float& y){
return direction==0?x/y:y/x;
};
thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,aMatrixOut,lambda);
}
else{
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const complexf& y){
return direction==0?x/y:y/x;
};
thrust::transform(thrust::device, (complexf*)aMatrixIn1,
(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
else if (aValType1 == Aurora::Complex){
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const float& y){
complexf v (y,0);
return direction==0?x/v:v/x;
};
thrust::transform(thrust::device, (complexf*)aMatrixIn1,(complexf*)aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
else{
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const complexf& y){
complexf v (x,0);
return direction==0?v/y:y/v;
};
thrust::transform(thrust::device, aMatrixIn1,aMatrixIn1+aMatrixLength,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
else{
int colElementCount = aMatrixLength/aVectorLength;
if(aValType1 == aValType2){
if (aValType1 == Aurora::Normal)
{
Aurora::RowWiseIterator<float> rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<float> rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const float& y){
return direction==0?x/y:y/x;
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,lambda);
}
else{
Aurora::RowWiseIterator<complexf> rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<complexf> rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const complexf& y){
return direction==0?x/y:y/x;
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
else if (aValType1 == Aurora::Complex){
Aurora::RowWiseIterator<complexf> rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<complexf> rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<float> rowVectorIter(aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const complexf& x, const float& y){
complexf v (y,0);
return direction==0?x/v:v/x;
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
else{
Aurora::RowWiseIterator<float> rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0);
Aurora::RowWiseIterator<float> rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength);
Aurora::LoopVectorIterator<complexf> rowVectorIter((complexf*)aVectorIn2,aVectorLength);
auto lambda = [=] __device__(const float& x, const complexf& y){
complexf v (x,0);
return direction==0?v/y:y/v;
};
thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda);
}
}
}
void unaryPow(float* in1, float N,float* out, unsigned long length){
if (N == 0.0f)
{
@@ -305,3 +733,4 @@ void thrustFill(float* aBegin, float* aEnd, std::complex<float> aValue)
thrust::fill(thrust::device, (std::complex<float>*)aBegin, (std::complex<float>*)aEnd, aValue);
}

View File

@@ -3,32 +3,290 @@
#define __CUDAMATRIX_CUH__
#include <complex>
#include "AuroraDefs.h"
namespace{
enum CompareType
{
G,GE,E,NE,LE,L
};
enum VecType
{
COL,ROW
};
}
void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
void unaryMul(float* in1, float* in2, float* out, unsigned long length);
void unaryMulc(float* in1, float* in2, float* out, unsigned long length);
void unaryMul(float* in1, const float& in2, float* out, unsigned long length);
/**
* @brief 矩阵逐元素加一个float值
*
* @param aMatrixIn 矩阵数据指针
* @param aValue float值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unaryAdd(float *aMatrixIn, const float &aValue, float *aMatrixOut,
unsigned long aLength);
/**
* @brief 矩阵逐元素加一个complex<float>值
*
* @param aMatrixIn 矩阵数据指针
* @param aValue complex<float>值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unaryAddc(std::complex<float> *aMatrixIn,
const std::complex<float> &aValue,
std::complex<float> *aMatrixOut, unsigned long aLength);
/**
* @brief 两相同大小矩阵逐元素相加
* @param aMatrixIn1 矩阵1数据指针
* @param aMatrixIn2 矩阵2数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
* @param aValType1 矩阵1数据类型
* @param aValType2 矩阵2数据类型
*/
void unaryAddmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut,
unsigned long aLength,
Aurora::ValueType aValType1 = Aurora::Normal,
Aurora::ValueType aValType2 = Aurora::Normal);
/**
* @brief 矩阵与向量点对点相加
*
* @param aMatrixIn 矩阵数据指针
* @param aVectorIn 向量数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aMatrixLength 矩阵数据总长度
* @param aVectorLength 向量数据总长度
* @param aMatValType 矩阵数据类型normal or complex
* @param aVecValType 向量数据类型
* @param aFuncType 函数执行方向,只支持 Row or Column
*/
void unaryAddmv(float *aMatrixIn, float *aVectorIn, float *aMatrixOut,
unsigned long aMatrixLength,unsigned long aVectorLength,
Aurora::ValueType aMatValType, Aurora::ValueType aVecValType,
Aurora::FunctionDirection aFuncType = Aurora::FunctionDirection::Column);
/**
* @brief 矩阵逐元素乘一个float值
*
* @param aMatrixIn 矩阵数据指针
* @param aValue float值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unaryMul(float *aMatrixIn, const float &aValue, float *aMatrixOut,
unsigned long aLength);
/**
* @brief 矩阵逐元素乘一个complex<float>值
*
* @param aMatrixIn 矩阵数据指针
* @param aValue complex<float>值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unaryMulc(std::complex<float> *aMatrixIn,
const std::complex<float> &aValue,
std::complex<float> *aMatrixOut, unsigned long aLength);
/**
* @brief 两相同大小矩阵逐元素相乘
*
* @param aMatrixIn1 矩阵1数据指针
* @param aMatrixIn2 矩阵2数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
* @param aValType1 矩阵1数据类型
* @param aValType2 矩阵2数据类型
*/
void unaryMulmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut,
unsigned long aLength,
Aurora::ValueType aValType1 = Aurora::Normal,
Aurora::ValueType aValType2 = Aurora::Normal);
/**
* @brief 矩阵与向量点对点相乘
*
* @param aMatrixIn 矩阵数据指针
* @param aVectorIn 向量数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aMatrixLength 矩阵数据总长度
* @param aVectorLength 向量数据总长度
* @param aMatValType 矩阵数据类型normal or complex
* @param aVecValType 向量数据类型
* @param aFuncType 函数执行方向,只支持 Row or Column
*/
void unaryMulmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut,
unsigned long aMatrixLength,unsigned long aVectorLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2,
Aurora::FunctionDirection aVectype = Aurora::FunctionDirection::Column);
/**
* @brief 矩阵逐元素减一个float值
*
* @param aMatrixIn 矩阵数据指针
* @param aValue float值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unarySub(float *aMatrixIn, const float &aValue, float *aMatrixOut,
unsigned long aLength);
/**
* @brief 矩阵逐元素被一个float值减
*
* @param aMatrixIn 矩阵数据指针
* @param aValue float值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unarySub(const float &aValue,float *aMatrixIn, float *aMatrixOut,
unsigned long aLength);
/**
* @brief 矩阵逐元素减一个complex<float>值
*
* @param aMatrixIn 矩阵数据指针
* @param aValue complex<float>值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unarySubc(std::complex<float> *aMatrixIn,
const std::complex<float> &aValue,
std::complex<float> *aMatrixOut, unsigned long aLength);
/**
* @brief 矩阵逐元素被一个complex<float>值减
*
* @param aValue complex<float>值
* @param aMatrixIn 矩阵数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unarySubc(const std::complex<float> &aValue,
std::complex<float> *aMatrixIn,
std::complex<float> *aMatrixOut, unsigned long aLength);
/**
* @brief 两相同大小矩阵逐元素相减
*
* @param aMatrixIn1 矩阵1数据指针
* @param aMatrixIn2 矩阵2数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
* @param aValType1 矩阵1数据类型
* @param aValType2 矩阵2数据类型
*/
void unarySubmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut,
unsigned long aLength,
Aurora::ValueType aValType1 = Aurora::Normal,
Aurora::ValueType aValType2 = Aurora::Normal);
/**
* @brief 矩阵与向量点对点相减
*
* @param aMatrixIn 矩阵数据指针
* @param aVectorIn 向量数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aMatrixLength 矩阵数据总长度
* @param aVectorLength 向量数据总长度
* @param aMatValType 矩阵数据类型normal or complex
* @param aVecValType 向量数据类型
* @param aFuncType 函数执行方向,只支持 Row or Column
* @param direction 减法执行方向, 0 mat - vec, 1 vec- mat
*/
void unarySubmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut,
unsigned long aMatrixLength,unsigned long aVectorLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2,
Aurora::FunctionDirection aVectype = Aurora::FunctionDirection::Column,
int direction = 0);
/**
* @brief 矩阵逐元素除以一个float值
*
* @param aMatrixIn 矩阵数据指针
* @param aValue float值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unaryDiv(float *aMatrixIn, const float &aValue, float *aMatrixOut,
unsigned long aLength);
/**
* @brief 矩阵逐元素被一个float值除
*
* @param aMatrixIn 矩阵数据指针
* @param aValue float值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unaryDiv(const float &aValue,float *aMatrixIn, float *aMatrixOut,
unsigned long aLength);
/**
* @brief 矩阵逐元素除以一个complex<float>值
*
* @param aMatrixIn 矩阵数据指针
* @param aValue complex<float>值
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unaryDivc(std::complex<float> *aMatrixIn,
const std::complex<float> &aValue,
std::complex<float> *aMatrixOut, unsigned long aLength);
/**
* @brief 矩阵逐元素被一个complex<float>值除
*
* @param aValue complex<float>值
* @param aMatrixIn 矩阵数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
*/
void unaryDivc(const std::complex<float> &aValue,
std::complex<float> *aMatrixIn,
std::complex<float> *aMatrixOut, unsigned long aLength);
/**
* @brief 两相同大小矩阵逐元素相除
*
* @param aMatrixIn1 矩阵1数据指针
* @param aMatrixIn2 矩阵2数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aLength 矩阵总元素长度
* @param aValType1 矩阵1数据类型
* @param aValType2 矩阵2数据类型
*/
void unaryDivmm(float *aMatrixIn1, float *aMatrixIn2, float *aMatrixOut,
unsigned long aLength,
Aurora::ValueType aValType1 = Aurora::Normal,
Aurora::ValueType aValType2 = Aurora::Normal);
/**
* @brief 矩阵与向量点对点相除
*
* @param aMatrixIn 矩阵数据指针
* @param aVectorIn 向量数据指针
* @param aMatrixOut 输出矩阵数据指针
* @param aMatrixLength 矩阵数据总长度
* @param aVectorLength 向量数据总长度
* @param aMatValType 矩阵数据类型normal or complex
* @param aVecValType 向量数据类型
* @param aFuncType 函数执行方向,只支持 Row or Column
* @param direction 减法执行方向, 0 mat - vec, 1 vec- mat
*/
void unaryDivmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut,
unsigned long aMatrixLength,unsigned long aVectorLength,
Aurora::ValueType aValType1, Aurora::ValueType aValType2,
Aurora::FunctionDirection aVectype = Aurora::FunctionDirection::Column,
int direction = 0);
void unaryNeg(float* in1, float* out, unsigned long length);
void unaryPow(float* in1, float N,float* out, unsigned long length);
void unarySub(float* in1, float* in2, float* out, unsigned long length);
void unaryDiv(float* in1, float* in2, float* out, unsigned long length);
void unaryDivc(float* in1, float* in2, float* out, unsigned long length);
void unarySub(const float& in1, float* in2, float* out, unsigned long length);
void unaryDiv(const float& in1, float* in2, float* out, unsigned long length);
void unarySub(float* in1, const float& in2, float* out, unsigned long length);
void unaryDiv(float* in1, const float& in2, float* out, unsigned long length);
void unaryCompare(float* in1, const float& in2, float* out, unsigned long length,int type);
void unaryCompare(const float& in1, float* in2, float* out, unsigned long length, int type);
void unaryCompare(float* in1, float* in2, float* out, unsigned long length, int type);

View File

@@ -2,8 +2,10 @@
#include "CudaMatrix.h"
#include <cmath>
#include <complex>
#include <cstddef>
#include <mkl_cblas.h>
#include <mkl_vml_functions.h>
#include <string>
#include <cstring>
#include <iostream>
@@ -16,6 +18,7 @@
#include "Eigen/src/Core/Map.h"
#include "Eigen/src/Core/Matrix.h"
#include "Function.h"
#include "Function1D.h"
#ifdef USE_CUDA
#include <cuda_runtime.h>
@@ -130,7 +133,7 @@ namespace Aurora{
}
else{
float zero = 0.0;
aFuncD(size, yC+ 1, 2, &zero, 0, output + 1, 2);
aFuncD(size, &zero, 0, yC+ 1, 2, output + 1, 2);
}
}
@@ -560,7 +563,14 @@ namespace Aurora {
}
//operation /
Matrix Matrix::operator/(float aScalar) const { return operatorMxA(&vsDivI, aScalar, *this);}
Matrix operator/(float aScalar, const Matrix &matrix) {return operatorAxM(&vsDivI, aScalar, matrix);}
Matrix operator/(float aScalar, const Matrix &aMatrix) {
if(!aMatrix.isComplex())return operatorAxM(&vsDivI, aScalar, aMatrix);
float *output = malloc(aMatrix.getDataSize(), true);
std::complex<float> v(aScalar,0);
vcDivI(aMatrix.getDataSize(), &v, 0,(std::complex<float>*)aMatrix.getData(), 1, (std::complex<float>*)output, 1);
return Matrix::New(output, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2),
aMatrix.getValueType());
}
Matrix Matrix::operator/(const Matrix &aMatrix) const {
if (isScalar()){
return getScalar()/aMatrix;
@@ -568,6 +578,9 @@ namespace Aurora {
if (aMatrix.isScalar()){
return (*this)/aMatrix.getScalar();
}
if(aMatrix.isComplex()&& !isComplex()){
return operatorMxM_RR(&vsDivI,&vcDivI,aMatrix,complex(*this),true);
}
return operatorMxM(vsDivI, vcDivI, *this, aMatrix);
}
Matrix &operator/(float aScalar, Matrix &&matrix) {
@@ -583,7 +596,8 @@ namespace Aurora {
if (aMatrix.isScalar()){
return (*this)/aMatrix.getScalar();
}
return operatorMxM_RR(&vsDivI,&vcDivI,*this,std::forward<Matrix&&>(aMatrix));
Matrix leftMat = (aMatrix.isComplex()&& !isComplex())? complex(*this):(*this);
return operatorMxM_RR(&vsDivI,&vcDivI,leftMat,std::forward<Matrix&&>(aMatrix));
}
Matrix operator/(Matrix &&aMatrix, Matrix &aOther) {
@@ -593,7 +607,12 @@ namespace Aurora {
if (aMatrix.isScalar()){
return aMatrix.getScalar()/aOther;
}
if( aOther.isComplex()&& !aMatrix.isComplex())
{
return operatorMxM_RR(&vsDivI,&vcDivI,aOther,std::forward<Matrix&&>(complex(aMatrix)),true);
}
return operatorMxM_RR(&vsDivI,&vcDivI,aOther,std::forward<Matrix&&>(aMatrix),true);
}
//operator ^ (pow)
Matrix Matrix::operator^(int times) const { return operatorMxA(&vsPowI, times, *this);}

File diff suppressed because it is too large Load Diff