Fix UnitTest add cudamatrix add and mul

This commit is contained in:
kradchen
2023-11-01 14:31:29 +08:00
parent fe0abf8ee6
commit 029b86013e
13 changed files with 1108 additions and 617 deletions

View File

@@ -9,7 +9,7 @@
#include <cuda_runtime.h>
#include "CudaMatrixPrivate.cuh"
using namespace Aurora;
namespace Aurora{
CudaMatrix::CudaMatrix(std::shared_ptr<float> aData, std::vector<int> aInfo, ValueType aValueType)
: mValueType(aValueType)
@@ -241,8 +241,66 @@ bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float valu
return true;
}
CudaMatrix CudaMatrix::operator+(float aScalar) const{
if (isComplex())
{
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
return CudaMatrix();
}
float* data = nullptr;
unsigned long long size = getDataSize() * getValueType();
cudaMalloc((void**)&data, sizeof(float) * size);
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
unaryAdd(getData(),aScalar,out.getData(),getDataSize());
return out;
}
CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix){
if (aMatrix.isComplex())
{
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
return CudaMatrix();
}
float* data = nullptr;
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
cudaMalloc((void**)&data, sizeof(float) * size);
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
unaryAdd(aMatrix.getData(),aScalar,out.getData(),aMatrix.getDataSize());
return out;
}
CudaMatrix& operator+(float aScalar, CudaMatrix &&aMatrix){
if (aMatrix.isComplex())
{
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
return aMatrix;
}
unaryAdd(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
return aMatrix;
}
CudaMatrix& operator+(CudaMatrix &&aMatrix,float aScalar){
if (aMatrix.isComplex())
{
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
return aMatrix;
}
unaryAdd(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
return aMatrix;
}
CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
if (this->getDataSize() != aMatrix.getDataSize()) return CudaMatrix();
if (this->getDataSize() != aMatrix.getDataSize()) {
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
return CudaMatrix();
}
if (this->isComplex() != aMatrix.isComplex()) {
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
return CudaMatrix();
}
float* data = nullptr;
unsigned long long size = getDataSize() * getValueType();
cudaMalloc((void**)&data, sizeof(float) * size);
@@ -250,4 +308,128 @@ CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
unaryAdd(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
return out;
}
CudaMatrix CudaMatrix::operator+(CudaMatrix &&aMatrix) const{
if (this->getDataSize() != aMatrix.getDataSize()) {
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
return CudaMatrix();
}
if (this->isComplex() != aMatrix.isComplex()) {
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
return CudaMatrix();
}
unaryAdd(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize());
return aMatrix;
}
CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther){
if (aOther.getDataSize() != aMatrix.getDataSize()) {
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
<<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
return CudaMatrix();
}
if (aOther.isComplex() != aMatrix.isComplex()) {
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
<<" and the matrix1 type is "<<(aOther.isComplex()?"Comples":"Real")<<std::endl;
return CudaMatrix();
}
unaryAdd(aOther.getData(),aMatrix.getData(),aMatrix.getData(),aOther.getDataSize());
return aMatrix;
}
// mul
CudaMatrix CudaMatrix::operator*(float aScalar) const{
if (isComplex())
{
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
return CudaMatrix();
}
float* data = nullptr;
unsigned long long size = getDataSize() * getValueType();
cudaMalloc((void**)&data, sizeof(float) * size);
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
unaryMul(getData(),aScalar,out.getData(),getDataSize());
return out;
}
CudaMatrix operator*(float aScalar, const CudaMatrix &aMatrix){
if (aMatrix.isComplex())
{
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
return CudaMatrix();
}
float* data = nullptr;
unsigned long long size = aMatrix.getDataSize() * aMatrix.getValueType();
cudaMalloc((void**)&data, sizeof(float) * size);
auto out = CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
unaryMul(aMatrix.getData(),aScalar,out.getData(),aMatrix.getDataSize());
return out;
}
CudaMatrix& operator*(float aScalar, CudaMatrix &&aMatrix){
if (aMatrix.isComplex())
{
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
return aMatrix;
}
unaryMul(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
return aMatrix;
}
CudaMatrix& operator*(CudaMatrix &&aMatrix,float aScalar){
if (aMatrix.isComplex())
{
std::cerr<<"Complex matrix not support operator+(float aScalar)"<<std::endl;
return aMatrix;
}
unaryMul(aMatrix.getData(),aScalar,aMatrix.getData(),aMatrix.getDataSize());
return aMatrix;
}
CudaMatrix CudaMatrix::operator*(const CudaMatrix &aMatrix) const{
if (this->getDataSize() != aMatrix.getDataSize()) {
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
return CudaMatrix();
}
if (this->isComplex() != aMatrix.isComplex()) {
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
return CudaMatrix();
}
float* data = nullptr;
unsigned long long size = getDataSize() * getValueType();
cudaMalloc((void**)&data, sizeof(float) * size);
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
unaryMul(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
return out;
}
CudaMatrix CudaMatrix::operator*(CudaMatrix &&aMatrix) const{
if (this->getDataSize() != aMatrix.getDataSize()) {
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<this->getDataSize()
<<" and the matrix1 size is "<<aMatrix.getDataSize()<<std::endl;
return CudaMatrix();
}
if (this->isComplex() != aMatrix.isComplex()) {
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(this->isComplex()?"Comples":"Real")
<<" and the matrix1 type is "<<(aMatrix.isComplex()?"Comples":"Real")<<std::endl;
return CudaMatrix();
}
unaryMul(this->getData(),aMatrix.getData(),aMatrix.getData(),this->getDataSize());
return aMatrix;
}
CudaMatrix operator*(CudaMatrix &&aMatrix,CudaMatrix &aOther){
if (aOther.getDataSize() != aMatrix.getDataSize()) {
std::cerr<<"operator+ must with Same DataSize, now the matrix0 size is "<<aMatrix.getDataSize()
<<" and the matrix1 size is "<<aOther.getDataSize()<<std::endl;
return CudaMatrix();
}
if (aOther.isComplex() != aMatrix.isComplex()) {
std::cerr<<"operator+ must with Data type, now the matrix0 type is "<<(aMatrix.isComplex()?"Comples":"Real")
<<" and the matrix1 type is "<<(aOther.isComplex()?"Comples":"Real")<<std::endl;
return CudaMatrix();
}
unaryMul(aOther.getData(),aMatrix.getData(),aMatrix.getData(),aOther.getDataSize());
return aMatrix;
}
}
#endif // USE_CUDA

View File

@@ -5,7 +5,7 @@
#include <thrust/execution_policy.h>
using namespace thrust::placeholders;
struct PowOperator{
struct PowOperator: public thrust::unary_function<float, float>{
float exponent;
PowOperator(float v):exponent(v) {}
void setExponent(float v){
@@ -25,7 +25,7 @@ void unaryAdd(float* in1, float* in2, float* out, unsigned long length)
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length)
{
thrust::transform(thrust::device,in1,in1+length,out,in2*_1);
thrust::transform(thrust::device,in1,in1+length,out,in2 + _1);
}
void unaryMul(float* in1, float* in2, float* out, unsigned long length)
@@ -34,6 +34,11 @@ void unaryMul(float* in1, float* in2, float* out, unsigned long length)
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
}
void unaryMul(float* in1, const float& in2, float* out, unsigned long length)
{
thrust::transform(thrust::device,in1, in1+length, out, in2 * _1);
}
void unaryNeg(float* in1, float* out, unsigned long length){
thrust::negate<float> op;
thrust::transform(thrust::device,in1,in1+length,out,op);
@@ -49,6 +54,23 @@ void unaryDiv(float* in1, float* in2, float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
}
void unarySub(const float& in1, float* in2, float* out, unsigned long length){
thrust::transform(thrust::device,in2,in2+length,out,in1-_1);
}
void unaryDiv(const float& in1, float* in2, float* out, unsigned long length){
thrust::transform(thrust::device,in2,in2+length,out,in1/_1);
}
void unarySub(float* in1, const float& in2, float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,out,_1-in2);
}
void unaryDiv(float* in1, const float& in2, float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,out,_1/in2);
}
void unaryPow(float* in1, float N,float* out, unsigned long length){
if (N == 0.0f)
{
@@ -65,7 +87,6 @@ void unaryPow(float* in1, float N,float* out, unsigned long length){
thrust::transform(thrust::device,in1,in1+length,out,op);
return;
}
thrust::transform(thrust::device,in1,in1+length,out,powf(_1,N));
thrust::transform(thrust::device,in1,in1+length,out,PowOperator(N));
}

View File

@@ -5,11 +5,17 @@
void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
void unaryMul(float* in1, float* in2, float* out, unsigned long length);
void unaryMul(float* in1, const float& in2, float* out, unsigned long length);
void unaryNeg(float* in1, float* out, unsigned long length);
void unaryPow(float* in1, float N,float* out, unsigned long length);
void unarySub(float* in1, float* in2, float* out, unsigned long length);
void unaryDiv(float* in1, float* in2, float* out, unsigned long length);
void unarySub(const float& in1, float* in2, float* out, unsigned long length);
void unaryDiv(const float& in1, float* in2, float* out, unsigned long length);
void unarySub(float* in1, const float& in2, float* out, unsigned long length);
void unaryDiv(float* in1, const float& in2, float* out, unsigned long length);
#endif // __CUDAMATRIX_H__

View File

@@ -3,6 +3,7 @@
//
#include "Function.h"
#include <cstddef>
//必须在mkl.h和Eigen的头之前<complex>之后
#define MKL_Complex8 std::complex<float>
@@ -29,6 +30,17 @@ namespace Aurora {
void free(void* ptr){
mkl_free(ptr);
}
float* random(size_t length){
VSLStreamStatePtr stream;
vslNewStream(&stream, VSL_BRNG_MT19937, 1);
float * ret = new float[length];
float low = -100.0;
float high = 10000.0;
vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, length, ret, low, high);
// 释放资源
vslDeleteStream(&stream);
return ret;
}
#ifdef USE_CUDA
void gpuFree(void* ptr)
{

View File

@@ -6,11 +6,13 @@
#define MATLABFUNCTIONS_MATRIXFUNCTIONS_H
#include <complex>
#include <cstddef>
namespace Aurora{
float* malloc(size_t size,bool complex = false);
void free(void* ptr);
void gpuFree(void* ptr);
float* random(size_t length);
};

View File

@@ -23,17 +23,97 @@ int main()
A[i] = -1;
B[i] = i;
}
printf("Test CudaMatrix operator+(const CudaMatrix &aMatrix) const \r\n");
//CudaMatrix operator+(const CudaMatrix &aMatrix) const
auto C = A+B;
auto dA = A.toDeviceMatrix();
auto dB = B.toDeviceMatrix();
auto dC = (dA+dB).toHostMatrix();
auto dC = (dA+dB);
auto dhC = dC.toHostMatrix();
for (size_t i = 0; i < 1000; i++)
{
if (C[i]!=dC[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dC[i]);
if (C[i]!=dhC[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dhC[i]);
return 9;
}
}
printf("Test CudaMatrix operator+(float aScalar) const \r\n");
//CudaMatrix operator+(float aScalar) const
auto D = C+0.5;
auto dD = dC+0.5;
auto dhD = dD.toHostMatrix();
for (size_t i = 0; i < 1000; i++)
{
if (D[i]!=dhD[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
return 9;
}
}
printf("Test CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix) \r\n");
// CudaMatrix operator+(float aScalar, const CudaMatrix &aMatrix)
dD = 0.5 + dC;
dhD = dD.toHostMatrix();
for (size_t i = 0; i < 1000; i++)
{
if (D[i]!=dhD[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
return 9;
}
}
printf("Test CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix) \r\n");
// CudaMatrix &operator+(float aScalar, CudaMatrix &&aMatrix)
{
auto dD2 = 0.5 + (dA+dB);
dhD = dD2.toHostMatrix();
for (size_t i = 0; i < 1000; i++)
{
if (D[i]!=dhD[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
return 9;
}
}
}
printf("Test CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar) \r\n");
// CudaMatrix &operator+(CudaMatrix &&aMatrix, float aScalar)
{
auto dD2 = (dA+dB)+0.5;
dhD = dD2.toHostMatrix();
for (size_t i = 0; i < 1000; i++)
{
if (D[i]!=dhD[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
return 9;
}
}
}
//CudaMatrix operator+(CudaMatrix &&aMatrix) const
printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix) const \r\n");
{
auto D = A+C;
auto dD2 = dA+(dA+dB);
dhD = dD2.toHostMatrix();
for (size_t i = 0; i < 1000; i++)
{
if (C[i]!=dhC[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
return 9;
}
}
}
//CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther)
printf("Test CudaMatrix operator+(CudaMatrix &&aMatrix,CudaMatrix &aOther) \r\n");
{
auto D = A+C;
auto dD2 = (dA+dB)+dA;
dhD = dD2.toHostMatrix();
for (size_t i = 0; i < 1000; i++)
{
if (C[i]!=dhC[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,D[i],dhD[i]);
return 9;
}
}
}
return 0;
}