Fix UnitTest add cudamatrix add and mul
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include "CudaMatrixPrivate.cuh"
|
||||
|
||||
using namespace Aurora;
|
||||
namespace Aurora{
|
||||
|
||||
CudaMatrix::CudaMatrix(std::shared_ptr<float> aData, std::vector<int> aInfo, ValueType aValueType)
|
||||
: mValueType(aValueType)
|
||||
@@ -241,8 +241,66 @@ bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float valu
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
CudaMatrix CudaMatrix::operator+(float aScalar) const{
|
||||
if (isComplex())
|
||||
{
|
||||
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
float* data = nullptr;
|
||||
unsigned long long size = getDataSize() * getValueType();
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
||||
unaryAdd(getData(),aScalar,out.getData(),getDataSize());
|
||||
return out;
|
||||
}
|
||||
|
||||
CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix){
|
||||
if (aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
float* data = nullptr;
|
||||
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
unaryAdd(aMatrix.getData(),aScalar,out.getData(),aMatrix.getDataSize());
|
||||
return out;
|
||||
}
|
||||
|
||||
CudaMatrix& operator+(float aScalar, CudaMatrix &&aMatrix){
|
||||
if (aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
||||
return aMatrix;
|
||||
}
|
||||
unaryAdd(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
||||
return aMatrix;
|
||||
}
|
||||
|
||||
CudaMatrix& operator+(CudaMatrix &&aMatrix,float aScalar){
|
||||
if (aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
||||
return aMatrix;
|
||||
}
|
||||
unaryAdd(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
||||
return aMatrix;
|
||||
}
|
||||
|
||||
CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
|
||||
if (this->getDataSize() != aMatrix.getDataSize()) return CudaMatrix();
|
||||
if (this->getDataSize() != aMatrix.getDataSize()) {
|
||||
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
||||
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (this->isComplex() != aMatrix.isComplex()) {
|
||||
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
||||
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
float* data = nullptr;
|
||||
unsigned long long size = getDataSize() * getValueType();
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
@@ -250,4 +308,128 @@ CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
|
||||
unaryAdd(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
|
||||
return out;
|
||||
}
|
||||
|
||||
CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{
|
||||
if (this->getDataSize() != aMatrix.getDataSize()) {
|
||||
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
||||
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (this->isComplex() != aMatrix.isComplex()) {
|
||||
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
||||
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
unaryAdd(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize());
|
||||
return aMatrix;
|
||||
}
|
||||
|
||||
|
||||
CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){
|
||||
if (aOther.getDataSize() != aMatrix.getDataSize()) {
|
||||
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
|
||||
<<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (aOther.isComplex() != aMatrix.isComplex()) {
|
||||
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
|
||||
<<" and the matrix1 type is "<<(aOther.isComplex()?"Comples":"Real")<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
unaryAdd(aOther.getData(),aMatrix.getData(),aMatrix.getData(),aOther.getDataSize());
|
||||
return aMatrix;
|
||||
}
|
||||
|
||||
// mul
|
||||
CudaMatrix CudaMatrix::operator*(float aScalar) const{
|
||||
if (isComplex())
|
||||
{
|
||||
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
float* data = nullptr;
|
||||
unsigned long long size = getDataSize() * getValueType();
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
||||
unaryMul(getData(),aScalar,out.getData(),getDataSize());
|
||||
return out;
|
||||
}
|
||||
CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix){
|
||||
if (aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
float* data = nullptr;
|
||||
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
unaryMul(aMatrix.getData(),aScalar,out.getData(),aMatrix.getDataSize());
|
||||
return out;
|
||||
}
|
||||
CudaMatrix& operator*(float aScalar, CudaMatrix &&aMatrix){
|
||||
if (aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
||||
return aMatrix;
|
||||
}
|
||||
unaryMul(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
||||
return aMatrix;
|
||||
}
|
||||
CudaMatrix& operator*(CudaMatrix &&aMatrix,float aScalar){
|
||||
if (aMatrix.isComplex())
|
||||
{
|
||||
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
|
||||
return aMatrix;
|
||||
}
|
||||
unaryMul(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
|
||||
return aMatrix;
|
||||
}
|
||||
CudaMatrix CudaMatrix::operator*(const CudaMatrix &aMatrix) const{
|
||||
if (this->getDataSize() != aMatrix.getDataSize()) {
|
||||
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
||||
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (this->isComplex() != aMatrix.isComplex()) {
|
||||
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
||||
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
float* data = nullptr;
|
||||
unsigned long long size = getDataSize() * getValueType();
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
||||
unaryMul(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
|
||||
return out;
|
||||
}
|
||||
CudaMatrix CudaMatrix::operator*(CudaMatrix &&aMatrix) const{
|
||||
if (this->getDataSize() != aMatrix.getDataSize()) {
|
||||
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
|
||||
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (this->isComplex() != aMatrix.isComplex()) {
|
||||
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
|
||||
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
unaryMul(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize());
|
||||
return aMatrix;
|
||||
}
|
||||
CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther){
|
||||
if (aOther.getDataSize() != aMatrix.getDataSize()) {
|
||||
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
|
||||
<<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
if (aOther.isComplex() != aMatrix.isComplex()) {
|
||||
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
|
||||
<<" and the matrix1 type is "<<(aOther.isComplex()?"Comples":"Real")<<std::endl;
|
||||
return CudaMatrix();
|
||||
}
|
||||
unaryMul(aOther.getData(),aMatrix.getData(),aMatrix.getData(),aOther.getDataSize());
|
||||
return aMatrix;
|
||||
}
|
||||
}
|
||||
#endif // USE_CUDA
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include <thrust/execution_policy.h>
|
||||
using namespace thrust::placeholders;
|
||||
|
||||
struct PowOperator{
|
||||
struct PowOperator: public thrust::unary_function<float, float>{
|
||||
float exponent;
|
||||
PowOperator(float v):exponent(v) {}
|
||||
void setExponent(float v){
|
||||
@@ -25,7 +25,7 @@ void unaryAdd(float* in1, float* in2, float* out, unsigned long length)
|
||||
|
||||
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length)
|
||||
{
|
||||
thrust::transform(thrust::device,in1,in1+length,out,in2*_1);
|
||||
thrust::transform(thrust::device,in1,in1+length,out,in2 + _1);
|
||||
}
|
||||
|
||||
void unaryMul(float* in1, float* in2, float* out, unsigned long length)
|
||||
@@ -34,6 +34,11 @@ void unaryMul(float* in1, float* in2, float* out, unsigned long length)
|
||||
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||
}
|
||||
|
||||
void unaryMul(float* in1, const float& in2, float* out, unsigned long length)
|
||||
{
|
||||
thrust::transform(thrust::device,in1, in1+length, out, in2 * _1);
|
||||
}
|
||||
|
||||
void unaryNeg(float* in1, float* out, unsigned long length){
|
||||
thrust::negate<float> op;
|
||||
thrust::transform(thrust::device,in1,in1+length,out,op);
|
||||
@@ -49,6 +54,23 @@ void unaryDiv(float* in1, float* in2, float* out, unsigned long length){
|
||||
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||
}
|
||||
|
||||
void unarySub(const float& in1, float* in2, float* out, unsigned long length){
|
||||
thrust::transform(thrust::device,in2,in2+length,out,in1-_1);
|
||||
}
|
||||
|
||||
void unaryDiv(const float& in1, float* in2, float* out, unsigned long length){
|
||||
thrust::transform(thrust::device,in2,in2+length,out,in1/_1);
|
||||
|
||||
}
|
||||
|
||||
void unarySub(float* in1, const float& in2, float* out, unsigned long length){
|
||||
thrust::transform(thrust::device,in1,in1+length,out,_1-in2);
|
||||
}
|
||||
|
||||
void unaryDiv(float* in1, const float& in2, float* out, unsigned long length){
|
||||
thrust::transform(thrust::device,in1,in1+length,out,_1/in2);
|
||||
}
|
||||
|
||||
void unaryPow(float* in1, float N,float* out, unsigned long length){
|
||||
if (N == 0.0f)
|
||||
{
|
||||
@@ -65,7 +87,6 @@ void unaryPow(float* in1, float N,float* out, unsigned long length){
|
||||
thrust::transform(thrust::device,in1,in1+length,out,op);
|
||||
return;
|
||||
}
|
||||
thrust::transform(thrust::device,in1,in1+length,out,powf(_1,N));
|
||||
|
||||
thrust::transform(thrust::device,in1,in1+length,out,PowOperator(N));
|
||||
}
|
||||
|
||||
|
||||
@@ -5,11 +5,17 @@
|
||||
void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
|
||||
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
|
||||
void unaryMul(float* in1, float* in2, float* out, unsigned long length);
|
||||
void unaryMul(float* in1, const float& in2, float* out, unsigned long length);
|
||||
|
||||
void unaryNeg(float* in1, float* out, unsigned long length);
|
||||
void unaryPow(float* in1, float N,float* out, unsigned long length);
|
||||
|
||||
void unarySub(float* in1, float* in2, float* out, unsigned long length);
|
||||
void unaryDiv(float* in1, float* in2, float* out, unsigned long length);
|
||||
void unarySub(const float& in1, float* in2, float* out, unsigned long length);
|
||||
void unaryDiv(const float& in1, float* in2, float* out, unsigned long length);
|
||||
void unarySub(float* in1, const float& in2, float* out, unsigned long length);
|
||||
void unaryDiv(float* in1, const float& in2, float* out, unsigned long length);
|
||||
|
||||
|
||||
#endif // __CUDAMATRIX_H__
|
||||
@@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "Function.h"
|
||||
#include <cstddef>
|
||||
|
||||
//必须在mkl.h和Eigen的头之前,<complex>之后
|
||||
#define MKL_Complex8 std::complex<float>
|
||||
@@ -29,6 +30,17 @@ namespace Aurora {
|
||||
void free(void* ptr){
|
||||
mkl_free(ptr);
|
||||
}
|
||||
float* random(size_t length){
|
||||
VSLStreamStatePtr stream;
|
||||
vslNewStream(&stream, VSL_BRNG_MT19937, 1);
|
||||
float * ret = new float[length];
|
||||
float low = -100.0;
|
||||
float high = 10000.0;
|
||||
vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, length, ret, low, high);
|
||||
// 释放资源
|
||||
vslDeleteStream(&stream);
|
||||
return ret;
|
||||
}
|
||||
#ifdef USE_CUDA
|
||||
void gpuFree(void* ptr)
|
||||
{
|
||||
|
||||
@@ -6,11 +6,13 @@
|
||||
#define MATLABFUNCTIONS_MATRIXFUNCTIONS_H
|
||||
|
||||
#include <complex>
|
||||
#include <cstddef>
|
||||
|
||||
namespace Aurora{
|
||||
float* malloc(size_t size,bool complex = false);
|
||||
void free(void* ptr);
|
||||
void gpuFree(void* ptr);
|
||||
float* random(size_t length);
|
||||
};
|
||||
|
||||
|
||||
|
||||
88
src/main.cxx
88
src/main.cxx
@@ -23,17 +23,97 @@ int main()
|
||||
A[i] = -1;
|
||||
B[i] = i;
|
||||
}
|
||||
printf("Test CudaMatrix operator+(const CudaMatrix &aMatrix) const \r\n");
|
||||
//CudaMatrix operator+(const CudaMatrix &aMatrix) const
|
||||
auto C = A+B;
|
||||
auto dA = A.toDeviceMatrix();
|
||||
auto dB = B.toDeviceMatrix();
|
||||
auto dC = (dA+dB).toHostMatrix();
|
||||
auto dC = (dA+dB);
|
||||
auto dhC = dC.toHostMatrix();
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
if (C[i]!=dC[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dC[i]);
|
||||
if (C[i]!=dhC[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dhC[i]);
|
||||
return 9;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Test CudaMatrix operator+(float aScalar) const \r\n");
|
||||
//CudaMatrix operator+(float aScalar) const
|
||||
auto D = C+0.5;
|
||||
auto dD = dC+0.5;
|
||||
auto dhD = dD.toHostMatrix();
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
if (D[i]!=dhD[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
|
||||
return 9;
|
||||
}
|
||||
}
|
||||
printf("Test CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) \r\n");
|
||||
// CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix)
|
||||
dD = 0.5 + dC;
|
||||
dhD = dD.toHostMatrix();
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
if (D[i]!=dhD[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
|
||||
return 9;
|
||||
}
|
||||
}
|
||||
printf("Test CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) \r\n");
|
||||
// CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix)
|
||||
{
|
||||
auto dD2 = 0.5 + (dA+dB);
|
||||
dhD = dD2.toHostMatrix();
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
if (D[i]!=dhD[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
|
||||
return 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("Test CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) \r\n");
|
||||
// CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar)
|
||||
{
|
||||
|
||||
auto dD2 = (dA+dB)+0.5;
|
||||
dhD = dD2.toHostMatrix();
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
if (D[i]!=dhD[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
|
||||
return 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
//CudaMatrix operator+(CudaMatrix &&aMatrix) const
|
||||
printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix) const \r\n");
|
||||
{
|
||||
auto D = A+C;
|
||||
auto dD2 = dA+(dA+dB);
|
||||
dhD = dD2.toHostMatrix();
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
if (C[i]!=dhC[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
|
||||
return 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
//CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther)
|
||||
printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) \r\n");
|
||||
{
|
||||
auto D = A+C;
|
||||
auto dD2 = (dA+dB)+dA;
|
||||
dhD = dD2.toHostMatrix();
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
if (C[i]!=dhC[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
|
||||
return 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user