1083 lines
44 KiB
C++
1083 lines
44 KiB
C++
#ifdef USE_CUDA
|
|
#include "CudaMatrix.h"
|
|
|
|
#include "Function.h"
|
|
#include "Matrix.h"
|
|
|
|
#include <iostream>
|
|
#include <cstddef>
|
|
#include <cuda_runtime.h>
|
|
#include "CudaMatrixPrivate.cuh"
|
|
|
|
namespace Aurora{
|
|
|
|
CudaMatrix::CudaMatrix(std::shared_ptr<float> aData, std::vector<int> aInfo, ValueType aValueType)
|
|
: mValueType(aValueType)
|
|
, mData(aData)
|
|
, mInfo(aInfo)
|
|
{
|
|
size_t infoSize = mInfo.size();
|
|
for(; infoSize<3; ++infoSize)
|
|
{
|
|
mInfo.push_back(1);
|
|
}
|
|
}
|
|
|
|
bool CudaMatrix::isNull() const
|
|
{
|
|
return !mData || mInfo.empty();
|
|
}
|
|
|
|
bool CudaMatrix::isNan() const
|
|
{
|
|
for(size_t i=0; i<getDataSize(); ++i)
|
|
{
|
|
if(mData.get()[i] == mData.get()[i])
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool CudaMatrix::isScalar() const
|
|
{
|
|
return (getDimSize(0) == 1 &&
|
|
getDimSize(1) == 1 &&
|
|
getDimSize(2) < 2);
|
|
}
|
|
|
|
float CudaMatrix::getScalar() const
|
|
{
|
|
if (isNull()) return 0.0;
|
|
if (isNull()) return 0.0;
|
|
return getValue(0);
|
|
}
|
|
|
|
bool CudaMatrix::isVector() const
|
|
{
|
|
if (getDimSize(2)>1) return false;
|
|
if (isScalar()) return false;
|
|
return getDimSize(0) == 1 ||
|
|
getDimSize(1) == 1;
|
|
}
|
|
|
|
int CudaMatrix::getDims() const
|
|
{
|
|
if(mInfo[2] > 1)
|
|
{
|
|
return 3;
|
|
}
|
|
return 2;
|
|
}
|
|
|
|
float *CudaMatrix::getData() const
|
|
{
|
|
return mData.get();
|
|
}
|
|
|
|
int CudaMatrix::getDimSize(int aIndex) const
|
|
{
|
|
if (aIndex >= 0 && aIndex < 3) {
|
|
return mInfo.at(aIndex);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
size_t CudaMatrix::getDataSize() const
|
|
{
|
|
if (!mData.get())return 0;
|
|
size_t ret = 1;
|
|
for (auto v: mInfo) {
|
|
ret *= v;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void CudaMatrix::forceReshape(int rows, int columns, int slices)
|
|
{
|
|
mInfo = {rows,columns,slices};
|
|
}
|
|
|
|
bool CudaMatrix::compareShape(const CudaMatrix &other) const
|
|
{
|
|
if (mInfo[2] == 1 && other.mInfo[2] == 1) {
|
|
if (mInfo[0]==1 && other.mInfo[1] == 1 && mInfo[1] == other.mInfo[0]) return true;
|
|
if (mInfo[1]==1 && other.mInfo[0] == 1 && mInfo[0] == other.mInfo[1]) return true;
|
|
}
|
|
for (int i = 0; i < mInfo.size(); ++i) {
|
|
if (mInfo[i] != other.mInfo[i]) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::fromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType)
|
|
{
|
|
if (!aData)
|
|
{
|
|
return CudaMatrix();
|
|
}
|
|
std::vector<int> vector{aRows, aCols, aSlices};
|
|
CudaMatrix ret({aData, gpuFree}, vector, aType);
|
|
return ret;
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::copyFromRawData(float *aData, int aRows, int aCols, int aSlices, ValueType aType)
|
|
{
|
|
if (!aData)
|
|
{
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = aRows * aCols * aSlices * aType;
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
cudaMemcpy(data, aData, sizeof(float) * size, cudaMemcpyDeviceToDevice);
|
|
std::vector<int> vector{aRows, aCols, aSlices};
|
|
return CudaMatrix({data, gpuFree}, vector, aType);
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::deepCopy() const
|
|
{
|
|
float* data = nullptr;
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToDevice);
|
|
return CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
}
|
|
|
|
Matrix CudaMatrix::toHostMatrix() const
|
|
{
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
float* data = new float[size];
|
|
cudaMemcpy(data, mData.get(), sizeof(float) * size, cudaMemcpyDeviceToHost);
|
|
return Matrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::block(int aDim,int aBeginIndex, int aEndIndex) const
|
|
{
|
|
if(aDim > 2)
|
|
{
|
|
std::cerr<<"CudaMatrix block only support 1D-3D data!"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
|
|
if (aBeginIndex>=getDimSize(aDim) || aBeginIndex<0)
|
|
{
|
|
std::cerr<<"CudaMatrix block BeginIndx error!BeginIndx:"<<aBeginIndex<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
|
|
if (aEndIndex>=getDimSize(aDim) || aEndIndex<0)
|
|
{
|
|
std::cerr<<"CudaMatrix block EndIndex error!EndIndex:"<<aEndIndex<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
|
|
if (aEndIndex < aBeginIndex)
|
|
{
|
|
std::cerr<<"CudaMatrix block EndIndex can not less than BeginIndex ! BeginIndex:"<<aBeginIndex <<", EndIndex:"<<aEndIndex<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
|
|
int dimLength = aEndIndex - aBeginIndex + 1;
|
|
int dataSize = getDataSize()/getDimSize(aDim)*dimLength;
|
|
float * dataOutput = nullptr;
|
|
cudaMalloc((void**)&dataOutput, sizeof(float) * dataSize * getValueType());
|
|
int colStride = getDimSize(0);
|
|
int sliceStride = getDimSize(0)*getDimSize(1);
|
|
|
|
switch (aDim)
|
|
{
|
|
case 0:
|
|
{
|
|
int colStride2 = dimLength;
|
|
int sliceStride2 = dimLength*getDimSize(1);
|
|
for (size_t i = 0; i < getDimSize(2); i++)
|
|
{
|
|
for (size_t j = 0; j < getDimSize(1); j++)
|
|
{
|
|
cudaMemcpy(dataOutput + (colStride2 * j + i * sliceStride2)*getValueType(),
|
|
mData.get()+ (aBeginIndex + j * colStride + i * sliceStride)*getValueType(),
|
|
sizeof(float) * colStride2*getValueType(), cudaMemcpyDeviceToDevice);
|
|
}
|
|
}
|
|
return CudaMatrix::fromRawData(dataOutput,dimLength,getDimSize(1),getDimSize(2),getValueType());
|
|
}
|
|
case 1:
|
|
{
|
|
int colStride2 = getDimSize(0);
|
|
int copySize = dimLength*getDimSize(0);
|
|
for (size_t i = 0; i < getDimSize(2); i++)
|
|
{
|
|
cudaMemcpy(dataOutput + getValueType()*(i * copySize),
|
|
mData.get() + getValueType()*(aBeginIndex * colStride + i * sliceStride),
|
|
sizeof(float) * copySize*getValueType(), cudaMemcpyDeviceToDevice);
|
|
}
|
|
return CudaMatrix::fromRawData(dataOutput,getDimSize(0),dimLength,getDimSize(2),getValueType());
|
|
}
|
|
case 2:
|
|
{
|
|
int copySize = dimLength*sliceStride;
|
|
cudaMemcpy(dataOutput,
|
|
mData.get() + aBeginIndex * sliceStride*getValueType(),
|
|
sizeof(float) * copySize*getValueType(), cudaMemcpyDeviceToDevice);
|
|
return CudaMatrix::fromRawData(dataOutput,getDimSize(0),getDimSize(1),dimLength,getValueType());
|
|
}
|
|
default:
|
|
{
|
|
return CudaMatrix();
|
|
}
|
|
}
|
|
}
|
|
|
|
bool CudaMatrix::setBlockValue(int aDim,int aBeginIndex, int aEndIndex,float aValue)
|
|
{
|
|
if(aDim>2 )
|
|
{
|
|
std::cerr<<"CudaMatrix setblockValue only support 1D-3D data!"<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (aBeginIndex>=getDimSize(aDim) || aBeginIndex<0)
|
|
{
|
|
std::cerr<<"CudaMatrix setblockValue BeginIndx error!BeginIndx:"<<aBeginIndex<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (aEndIndex>=getDimSize(aDim) || aEndIndex<0)
|
|
{
|
|
std::cerr<<"CudaMatrix setblockValue EndIndex error!EndIndex:"<<aEndIndex<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (aEndIndex < aBeginIndex)
|
|
{
|
|
std::cerr<<"CudaMatrix setblockValue EndIndex can not less than BeginIndex ! BeginIndex:"<<aBeginIndex <<", EndIndex:"<<aEndIndex<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
int dimLength = aEndIndex - aBeginIndex + 1;
|
|
int dataSize = getDataSize()/getDimSize(aDim)*dimLength;
|
|
int colStride = getDimSize(0);
|
|
int sliceStride = getDimSize(0)*getDimSize(1);
|
|
|
|
switch (aDim)
|
|
{
|
|
case 0:
|
|
{
|
|
int colStride2 = dimLength;
|
|
int sliceStride2 = dimLength*getDimSize(1);
|
|
for (size_t i = 0; i < getDimSize(2); i++)
|
|
{
|
|
for (size_t j = 0; j < getDimSize(1); j++)
|
|
{
|
|
float* begin = mData.get() + (aBeginIndex + j * colStride + i * sliceStride)*getValueType();
|
|
thrustFill(begin, begin + colStride2*getValueType(), aValue);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
case 1:
|
|
{
|
|
int colStride2 = getDimSize(0);
|
|
int copySize = dimLength*getDimSize(0);
|
|
for (size_t i = 0; i < getDimSize(2); i++)
|
|
{
|
|
float* begin = mData.get() + getValueType()*(aBeginIndex * colStride + i * sliceStride);
|
|
thrustFill(begin, begin + copySize * getValueType(), aValue);
|
|
}
|
|
return true;
|
|
}
|
|
case 2:
|
|
{
|
|
int copySize = dimLength*sliceStride;
|
|
float* begin = mData.get() + aBeginIndex * sliceStride*getValueType();
|
|
thrustFill(begin, begin + copySize *getValueType(), aValue);
|
|
return true;
|
|
}
|
|
default:
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool CudaMatrix::setBlockComplexValue(int aDim,int aBeginIndex, int aEndIndex, std::complex<float> aValue)
|
|
{
|
|
if(getValueType() != Complex)
|
|
{
|
|
std::cerr<<"CudaMatrix setBlockComplexValue only support complex matrix"<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if(aDim>2 )
|
|
{
|
|
std::cerr<<"CudaMatrix setBlockComplexValue only support 1D-3D data!"<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (aBeginIndex>=getDimSize(aDim) || aBeginIndex<0)
|
|
{
|
|
std::cerr<<"CudaMatrix setBlockComplexValue BeginIndx error!BeginIndx:"<<aBeginIndex<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (aEndIndex>=getDimSize(aDim) || aEndIndex<0)
|
|
{
|
|
std::cerr<<"CudaMatrix setBlockComplexValue EndIndex error!EndIndex:"<<aEndIndex<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (aEndIndex < aBeginIndex)
|
|
{
|
|
std::cerr<<"CudaMatrix setBlockComplexValue EndIndex can not less than BeginIndex ! BeginIndex:"<<aBeginIndex <<", EndIndex:"<<aEndIndex<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
int dimLength = aEndIndex - aBeginIndex + 1;
|
|
int dataSize = getDataSize()/getDimSize(aDim)*dimLength;
|
|
int colStride = getDimSize(0);
|
|
int sliceStride = getDimSize(0)*getDimSize(1);
|
|
|
|
switch (aDim)
|
|
{
|
|
case 0:
|
|
{
|
|
int colStride2 = dimLength;
|
|
int sliceStride2 = dimLength*getDimSize(1);
|
|
for (size_t i = 0; i < getDimSize(2); i++)
|
|
{
|
|
for (size_t j = 0; j < getDimSize(1); j++)
|
|
{
|
|
float* begin = mData.get() + (aBeginIndex + j * colStride + i * sliceStride)*getValueType();
|
|
thrustFill(begin, begin + colStride2*getValueType(), aValue);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
case 1:
|
|
{
|
|
int colStride2 = getDimSize(0);
|
|
int copySize = dimLength*getDimSize(0);
|
|
for (size_t i = 0; i < getDimSize(2); i++)
|
|
{
|
|
float* begin = mData.get() + getValueType()*(aBeginIndex * colStride + i * sliceStride);
|
|
thrustFill(begin, begin + copySize * getValueType(), aValue);
|
|
}
|
|
return true;
|
|
}
|
|
case 2:
|
|
{
|
|
int copySize = dimLength*sliceStride;
|
|
float* begin = mData.get() + aBeginIndex * sliceStride*getValueType();
|
|
thrustFill(begin, begin + copySize *getValueType(), aValue);
|
|
return true;
|
|
}
|
|
default:
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool CudaMatrix::setBlock(int aDim,int aBeginIndex, int aEndIndex, const CudaMatrix& aMatrix)
|
|
{
|
|
if( aDim>2 )
|
|
{
|
|
std::cerr<<"CudaMatrix setBlock only support 1D-3D data!"<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (aBeginIndex>=getDimSize(aDim) || aBeginIndex<0)
|
|
{
|
|
std::cerr<<"CudaMatrix setBlock BeginIndx error!BeginIndx:"<<aBeginIndex<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (aEndIndex>=getDimSize(aDim) || aEndIndex<0)
|
|
{
|
|
std::cerr<<"CudaMatrix setBlock EndIndex error!EndIndex:"<<aEndIndex<<std::endl;
|
|
return false;
|
|
}
|
|
|
|
int dimLength = std::abs(aEndIndex-aBeginIndex)+1;
|
|
size_t newdims[3]{0};
|
|
|
|
switch (aDim)
|
|
{
|
|
case 0:
|
|
{
|
|
newdims[0] = dimLength;
|
|
newdims[1] = getDimSize(1);
|
|
newdims[2] = getDimSize(2);
|
|
break;
|
|
}
|
|
case 1:
|
|
{
|
|
newdims[0] = getDimSize(0);
|
|
newdims[1] = dimLength;
|
|
newdims[2] = getDimSize(2);
|
|
break;
|
|
}
|
|
case 2:
|
|
{
|
|
newdims[0] = getDimSize(0);
|
|
newdims[1] = getDimSize(1);
|
|
newdims[2] = dimLength;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (aMatrix.getDimSize(0)!= newdims[0]
|
|
|| aMatrix.getDimSize(1)!= newdims[1]
|
|
|| aMatrix.getDimSize(2)!= newdims[2])
|
|
{
|
|
std::cerr << "CudaMatrix setBlock src Matrix(" << aMatrix.getDimSize(0) << ","
|
|
<< aMatrix.getDimSize(1) << "," << aMatrix.getDimSize(2)
|
|
<< ") not match the des shape(" << newdims[0] << ","
|
|
<< newdims[1] << "," << newdims[2] << ")"
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
int dataSize = getDataSize()/getDimSize(aDim)*dimLength;
|
|
int colStride = getDimSize(0);
|
|
int sliceStride = getDimSize(0)*getDimSize(1);
|
|
switch (aDim)
|
|
{
|
|
case 0:
|
|
{
|
|
int colStride2 = dimLength;
|
|
int sliceStride2 = dimLength*getDimSize(1);
|
|
for (size_t i = 0; i < getDimSize(2); i++)
|
|
{
|
|
for (size_t j = 0; j < getDimSize(1); j++)
|
|
{
|
|
cudaMemcpy(mData.get() + (aBeginIndex + j * colStride + i * sliceStride)*getValueType(),
|
|
aMatrix.getData() + (colStride2 * j + i * sliceStride2)*getValueType(),
|
|
sizeof(float) * colStride2*getValueType(), cudaMemcpyDeviceToDevice);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
case 1:
|
|
{
|
|
int colStride2 = getDimSize(0);
|
|
int copySize = dimLength*getDimSize(0);
|
|
for (size_t i = 0; i < getDimSize(2); i++)
|
|
{
|
|
cudaMemcpy(mData.get() + getValueType()*(aBeginIndex * colStride + i * sliceStride),
|
|
aMatrix.getData() + getValueType()*(i * copySize),
|
|
sizeof(float) * copySize*getValueType(), cudaMemcpyDeviceToDevice);
|
|
}
|
|
return true;
|
|
}
|
|
case 2:
|
|
{
|
|
int copySize = dimLength*sliceStride;
|
|
cudaMemcpy(mData.get() + aBeginIndex * sliceStride*getValueType(),
|
|
aMatrix.getData(),
|
|
sizeof(float) * copySize*getValueType(), cudaMemcpyDeviceToDevice);
|
|
return true;
|
|
}
|
|
default:
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
//--Add----------------------------------------------------------------
|
|
CudaMatrix CudaMatrix::operator+(float aScalar) const{
|
|
if (isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
unaryAdd(getData(),aScalar,out.getData(),getDataSize());
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryAdd(aMatrix.getData(),aScalar,out.getData(),aMatrix.getDataSize());
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix& operator+(float aScalar, CudaMatrix &&aMatrix){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
|
return aMatrix;
|
|
}
|
|
unaryAdd(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
|
|
CudaMatrix& operator+(CudaMatrix &&aMatrix,float aScalar){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
|
return aMatrix;
|
|
}
|
|
unaryAdd(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
|
|
if (this->getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
|
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (this->isComplex() != aMatrix.isComplex()) {
|
|
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
unaryAdd(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{
|
|
if (this->getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
|
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (this->isComplex() != aMatrix.isComplex()) {
|
|
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
unaryAdd(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize());
|
|
return aMatrix;
|
|
}
|
|
|
|
|
|
CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){
|
|
if (aOther.getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
|
|
<<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (aOther.isComplex() != aMatrix.isComplex()) {
|
|
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aOther.isComplex()?"Comples":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
unaryAdd(aOther.getData(),aMatrix.getData(),aMatrix.getData(),aOther.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
|
|
//--mul-----------------------------------------------------
|
|
CudaMatrix CudaMatrix::operator*(float aScalar) const{
|
|
if (isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator*(float aScalar)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
unaryMul(getData(),aScalar,out.getData(),getDataSize());
|
|
return out;
|
|
}
|
|
CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator*(float aScalar, const CudaMatrix &aMatrix)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryMul(aMatrix.getData(),aScalar,out.getData(),aMatrix.getDataSize());
|
|
return out;
|
|
}
|
|
CudaMatrix& operator*(float aScalar, CudaMatrix &&aMatrix){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator*(float aScalar, CudaMatrix &&aMatrix)"<<std::endl;
|
|
return aMatrix;
|
|
}
|
|
unaryMul(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix& operator*(CudaMatrix &&aMatrix,float aScalar){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator*(float aScalar)"<<std::endl;
|
|
return aMatrix;
|
|
}
|
|
unaryMul(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix CudaMatrix::operator*(const CudaMatrix &aMatrix) const{
|
|
if (this->getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
|
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (this->isComplex() != aMatrix.isComplex()) {
|
|
std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
unaryMul(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator*(CudaMatrix &&aMatrix) const{
|
|
if (this->getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
|
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (this->isComplex() != aMatrix.isComplex()) {
|
|
std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
unaryMul(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther){
|
|
if (aOther.getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator* must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
|
|
<<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (aOther.isComplex() != aMatrix.isComplex()) {
|
|
std::cerr<<"operator* must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aOther.isComplex()?"Comples":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
unaryMul(aOther.getData(),aMatrix.getData(),aMatrix.getData(),aOther.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
|
|
//--Sub-----------------------------------------------------------------
|
|
CudaMatrix CudaMatrix::operator-(float aScalar) const{
|
|
if (isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator-(float aScalar)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
unarySub(getData(),aScalar,out.getData(),getDataSize());
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix operator-(float aScalar, const CudaMatrix &aMatrix){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator-(float aScalar, const CudaMatrix &aMatrix)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unarySub(aScalar,aMatrix.getData(),out.getData(),aMatrix.getDataSize());
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix& operator-(float aScalar, CudaMatrix &&aMatrix){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator-(float aScalar, CudaMatrix &&aMatrix)"<<std::endl;
|
|
return aMatrix;
|
|
}
|
|
unarySub(aScalar,aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix& operator-(CudaMatrix &&aMatrix,float aScalar){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator-(CudaMatrix &&aMatrix, float aScalar)"<<std::endl;
|
|
return aMatrix;
|
|
}
|
|
unarySub(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix CudaMatrix::operator-(const CudaMatrix &aMatrix) const{
|
|
if (aMatrix.isComplex()!=this->isComplex())
|
|
{
|
|
std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (this->getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
|
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unarySub(this->getData(),aMatrix.getData(),out.getData(),aMatrix.getDataSize());
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator-(CudaMatrix &&aMatrix) const{
|
|
if (aMatrix.isComplex()!=this->isComplex())
|
|
{
|
|
std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (this->getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
|
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
unarySub(this->getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix operator-(CudaMatrix &&aMatrix,CudaMatrix &aOther){
|
|
if (aMatrix.isComplex()!=aOther.isComplex())
|
|
{
|
|
std::cerr<<"operator- must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aOther.isComplex()?"Complex":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (aOther.getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator- must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
|
|
<<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
unarySub(aMatrix.getData(),aOther.getData(),aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
|
|
// div
|
|
CudaMatrix CudaMatrix::operator/(float aScalar) const{
|
|
if (isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator/(float aScalar)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
unaryDiv(getData(),aScalar,out.getData(),getDataSize());
|
|
return out;
|
|
}
|
|
CudaMatrix operator/(float aScalar, const CudaMatrix &aMatrix){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator/(float aScalar, const CudaMatrix &aMatrix)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryDiv(aScalar,aMatrix.getData(),out.getData(),aMatrix.getDataSize());
|
|
return out;
|
|
}
|
|
CudaMatrix& operator/(float aScalar, CudaMatrix &&aMatrix){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator/(float aScalar, CudaMatrix &&aMatrix)"<<std::endl;
|
|
return aMatrix;
|
|
}
|
|
unaryDiv(aScalar,aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix& operator/(CudaMatrix &&aMatrix,float aScalar){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator/(CudaMatrix &&aMatrix, float aScalar)"<<std::endl;
|
|
return aMatrix;
|
|
}
|
|
unaryDiv(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix CudaMatrix::operator/(const CudaMatrix &aMatrix) const{
|
|
if (aMatrix.isComplex()!=this->isComplex())
|
|
{
|
|
std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (this->getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
|
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryDiv(this->getData(),aMatrix.getData(),out.getData(),aMatrix.getDataSize());
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator/(CudaMatrix &&aMatrix) const{
|
|
if (aMatrix.isComplex()!=this->isComplex())
|
|
{
|
|
std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Complex":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (this->getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
|
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
unaryDiv(this->getData(),aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix operator/(CudaMatrix &&aMatrix, CudaMatrix &aOther){
|
|
if (aMatrix.isComplex()!=aOther.isComplex())
|
|
{
|
|
std::cerr<<"operator/ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
|
|
<<" and the matrix1 type is "<<(aOther.isComplex()?"Complex":"Real")<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
if (aOther.getDataSize() != aMatrix.getDataSize()) {
|
|
std::cerr<<"operator/ must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
|
|
<<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
unaryDiv(aMatrix.getData(),aOther.getData(),aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
|
|
// -----------pow-------------------------------------------------------
|
|
CudaMatrix CudaMatrix::operator^(int times) const{
|
|
if (isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator^(int times)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float scalar = (float)times;
|
|
float* data = nullptr;
|
|
unsigned long long size = getDataSize() * getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
|
unaryPow(getData(),scalar,out.getData(),getDataSize());
|
|
return out;
|
|
}
|
|
CudaMatrix operator^(CudaMatrix &&aMatrix,int times){
|
|
if (aMatrix.isComplex())
|
|
{
|
|
std::cerr<<"Complex matrix not support operator^(int times)"<<std::endl;
|
|
return CudaMatrix();
|
|
}
|
|
float scalar = (float)times;
|
|
unaryPow(aMatrix.getData(),scalar,aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
|
|
//----------negetive------------------------------------------------
|
|
CudaMatrix operator-(CudaMatrix &&aMatrix){
|
|
unaryNeg(aMatrix.getData(),aMatrix.getData(),aMatrix.getDataSize());
|
|
return aMatrix;
|
|
}
|
|
CudaMatrix operator-(const CudaMatrix &aMatrix){
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryNeg(aMatrix.getData(),out.getData(),aMatrix.getDataSize());
|
|
return out;
|
|
}
|
|
|
|
//----compare---------------------------------------------------
|
|
CudaMatrix CudaMatrix::operator>(float aScalar) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = this->getDataSize() * this->getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
|
|
unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::G);
|
|
return out;
|
|
}
|
|
CudaMatrix operator>(float aScalar, const CudaMatrix &aMatrix){
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::G);
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator>(const CudaMatrix &aMatrix) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::G);
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::operator<(float aScalar) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = this->getDataSize() * this->getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
|
|
unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::L);
|
|
return out;
|
|
}
|
|
CudaMatrix operator<(float aScalar, const CudaMatrix &aMatrix){
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::L);
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator<(const CudaMatrix &aMatrix) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::L);
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::operator>=(float aScalar) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = this->getDataSize() * this->getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
|
|
unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::GE);
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix operator>=(float aScalar, const CudaMatrix &aMatrix){
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::GE);
|
|
return out;
|
|
}
|
|
|
|
CudaMatrix CudaMatrix::operator>=(const CudaMatrix &aMatrix) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::GE);
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator<=(float aScalar) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = this->getDataSize() * this->getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
|
|
unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::LE);
|
|
return out;
|
|
}
|
|
CudaMatrix operator<=(float aScalar, const CudaMatrix &aMatrix){
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::LE);
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator<=(const CudaMatrix &aMatrix) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::LE);
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator==(float aScalar) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = this->getDataSize() * this->getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
|
|
unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::E);
|
|
return out;
|
|
}
|
|
CudaMatrix operator==(float aScalar, const CudaMatrix &aMatrix){
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::E);
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator==(const CudaMatrix &aMatrix) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::E);
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator!=(float aScalar) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = this->getDataSize() * this->getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, this->getDimSize(0), this->getDimSize(1), this->getDimSize(2), this->getValueType());
|
|
unaryCompare(this->getData(),aScalar,data,this->getDataSize(),::NE);
|
|
return out;
|
|
}
|
|
CudaMatrix operator!=(float aScalar, const CudaMatrix &aMatrix){
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(aScalar,aMatrix.getData(),data,aMatrix.getDataSize(),::NE);
|
|
return out;
|
|
}
|
|
CudaMatrix CudaMatrix::operator!=(const CudaMatrix &aMatrix) const{
|
|
float* data = nullptr;
|
|
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
|
cudaMalloc((void**)&data, sizeof(float) * size);
|
|
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
|
unaryCompare(this->getData(),aMatrix.getData(),data,this->getDataSize(),::NE);
|
|
return out;
|
|
}
|
|
float CudaMatrix::getValue(size_t index) const{
|
|
float result;
|
|
cudaError_t cuda_error = cudaMemcpy(&result, getData() + index, sizeof(float), cudaMemcpyDeviceToHost);
|
|
|
|
if (cuda_error != cudaSuccess) {
|
|
fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error));
|
|
return nan("");
|
|
}
|
|
return result;
|
|
}
|
|
void CudaMatrix::setValue(size_t index, const float& value){
|
|
cudaError_t cuda_error = cudaMemcpy( getData() + index,&value, sizeof(float), cudaMemcpyHostToDevice);
|
|
if (cuda_error != cudaSuccess) {
|
|
fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(cuda_error));
|
|
}
|
|
}
|
|
}
|
|
#endif // USE_CUDA
|