diff --git a/src/CudaMatrixPrivate.cu b/src/CudaMatrixPrivate.cu index e5c4d95..26c6813 100644 --- a/src/CudaMatrixPrivate.cu +++ b/src/CudaMatrixPrivate.cu @@ -450,28 +450,31 @@ void unarySubmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + Aurora::RowWiseIterator outIter(aMatrixOut,aVectorLength,colElementCount,0); auto lambda = [=] __device__(const float& x, const float& y){ return direction==0?x-y:y-x; }; - thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,lambda); + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,outIter,lambda); } else if (aValType1 == Aurora::Complex){ Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + Aurora::RowWiseIterator outIter((complexf*)aMatrixOut,aVectorLength,colElementCount,0); auto lambda = [=] __device__(const complexf& x, const float& y){ return direction==0?complexf(x.real()-y,x.imag()):complexf(y-x.real(),-x.imag()); }; - thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,outIter,lambda); } else{ Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + Aurora::RowWiseIterator outIter((complexf*)aMatrixOut,aVectorLength,colElementCount,0); auto lambda = [=] __device__(const float& x, const complexf& y){ return direction==0?complexf(x-y.real(),-y.imag()):complexf(y.real()-x,y.imag()); }; - thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,outIter,lambda); } } } @@ -587,19 +590,21 @@ void unaryDivmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + Aurora::RowWiseIterator outIter(aMatrixOut,aVectorLength,colElementCount,0); auto lambda = [=] __device__(const float& x, const float& y){ return direction==0?x/y:y/x; }; - thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,aMatrixOut,lambda); + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,outIter,lambda); } else{ Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + Aurora::RowWiseIterator outIter((complexf*)aMatrixOut,aVectorLength,colElementCount,0); auto lambda = [=] __device__(const complexf& x, const complexf& y){ return direction==0?x/y:y/x; }; - thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,outIter,lambda); } } @@ -607,21 +612,23 @@ void unaryDivmv(float *aMatrixIn1, float *aVectorIn2, float *aMatrixOut, Aurora::RowWiseIterator rowIter_Begin((complexf*)aMatrixIn1,aVectorLength,colElementCount,0); Aurora::RowWiseIterator rowIter_End((complexf*)aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); Aurora::LoopVectorIterator rowVectorIter(aVectorIn2,aVectorLength); + Aurora::RowWiseIterator outIter((complexf*)aMatrixOut,aVectorLength,colElementCount,0); auto lambda = [=] __device__(const complexf& x, const float& y){ complexf v (y,0); return direction==0?x/v:v/x; }; - thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,outIter,lambda); } else{ Aurora::RowWiseIterator rowIter_Begin(aMatrixIn1,aVectorLength,colElementCount,0); Aurora::RowWiseIterator rowIter_End(aMatrixIn1,aVectorLength,colElementCount,aMatrixLength); Aurora::LoopVectorIterator rowVectorIter((complexf*)aVectorIn2,aVectorLength); + Aurora::RowWiseIterator outIter((complexf*)aMatrixOut,aVectorLength,colElementCount,0); auto lambda = [=] __device__(const float& x, const complexf& y){ complexf v (x,0); return direction==0?v/y:y/v; }; - thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,(complexf*)aMatrixOut,lambda); + thrust::transform(thrust::device, rowIter_Begin,rowIter_End,rowVectorIter,outIter,lambda); } } }