CudaMatrix Operator logic patch1
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
#include <iostream>
|
||||
#include <cstddef>
|
||||
#include <cuda_runtime.h>
|
||||
#include "CudaMatrixPrivate.cuh"
|
||||
|
||||
using namespace Aurora;
|
||||
|
||||
@@ -239,4 +240,14 @@ bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float valu
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif // USE_CUDA
|
||||
|
||||
CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
|
||||
if (this->getDataSize() != aMatrix.getDataSize()) return CudaMatrix();
|
||||
float* data = nullptr;
|
||||
unsigned long long size = getDataSize() * getValueType();
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
|
||||
unaryAdd(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
|
||||
return out;
|
||||
}
|
||||
#endif // USE_CUDA
|
||||
|
||||
71
src/CudaMatrixPrivate.cu
Normal file
71
src/CudaMatrixPrivate.cu
Normal file
@@ -0,0 +1,71 @@
|
||||
#include <CudaMatrixPrivate.cuh>
|
||||
#include <math.h>
|
||||
#include <thrust/transform.h>
|
||||
#include <thrust/functional.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
using namespace thrust::placeholders;
|
||||
|
||||
struct PowOperator{
|
||||
float exponent;
|
||||
PowOperator(float v):exponent(v) {}
|
||||
void setExponent(float v){
|
||||
exponent = v;
|
||||
}
|
||||
__host__ __device__
|
||||
float operator()(const float& x) {
|
||||
return powf(x, exponent);
|
||||
}
|
||||
};
|
||||
|
||||
void unaryAdd(float* in1, float* in2, float* out, unsigned long length)
|
||||
{
|
||||
thrust::plus<float> op;
|
||||
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||
}
|
||||
|
||||
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length)
|
||||
{
|
||||
thrust::transform(thrust::device,in1,in1+length,out,in2*_1);
|
||||
}
|
||||
|
||||
void unaryMul(float* in1, float* in2, float* out, unsigned long length)
|
||||
{
|
||||
thrust::multiplies<float> op;
|
||||
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||
}
|
||||
|
||||
void unaryNeg(float* in1, float* out, unsigned long length){
|
||||
thrust::negate<float> op;
|
||||
thrust::transform(thrust::device,in1,in1+length,out,op);
|
||||
}
|
||||
|
||||
void unarySub(float* in1, float* in2, float* out, unsigned long length){
|
||||
thrust::minus<float> op;
|
||||
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||
}
|
||||
|
||||
void unaryDiv(float* in1, float* in2, float* out, unsigned long length){
|
||||
thrust::divides<float> op;
|
||||
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
|
||||
}
|
||||
|
||||
void unaryPow(float* in1, float N,float* out, unsigned long length){
|
||||
if (N == 0.0f)
|
||||
{
|
||||
thrust::fill(out,out+length,0);
|
||||
return;
|
||||
}
|
||||
if (N == 1.0f)
|
||||
{
|
||||
thrust::copy(in1,in1+length,out);
|
||||
return;
|
||||
}
|
||||
if (N == 2.0f){
|
||||
thrust::square<float> op;
|
||||
thrust::transform(thrust::device,in1,in1+length,out,op);
|
||||
return;
|
||||
}
|
||||
thrust::transform(thrust::device,in1,in1+length,out,powf(_1,N));
|
||||
|
||||
}
|
||||
|
||||
15
src/CudaMatrixPrivate.cuh
Normal file
15
src/CudaMatrixPrivate.cuh
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
#ifndef __CUDAMATRIX_CUH__
|
||||
#define __CUDAMATRIX_CUH__
|
||||
|
||||
void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
|
||||
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
|
||||
void unaryMul(float* in1, float* in2, float* out, unsigned long length);
|
||||
void unaryNeg(float* in1, float* out, unsigned long length);
|
||||
void unaryPow(float* in1, float N,float* out, unsigned long length);
|
||||
|
||||
void unarySub(float* in1, float* in2, float* out, unsigned long length);
|
||||
void unaryDiv(float* in1, float* in2, float* out, unsigned long length);
|
||||
|
||||
|
||||
#endif // __CUDAMATRIX_H__
|
||||
20
src/main.cxx
20
src/main.cxx
@@ -7,6 +7,7 @@
|
||||
#include <complex>
|
||||
|
||||
#include "Matrix.h"
|
||||
#include "CudaMatrix.h"
|
||||
#include "Function.h"
|
||||
#include "Function1D.h"
|
||||
#include "Function2D.h"
|
||||
@@ -15,5 +16,24 @@
|
||||
|
||||
int main()
|
||||
{
|
||||
auto A = Aurora::zeros(1000,1,1);
|
||||
auto B = Aurora::zeros(1000,1,1);
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
A[i] = -1;
|
||||
B[i] = i;
|
||||
}
|
||||
auto C = A+B;
|
||||
auto dA = A.toDeviceMatrix();
|
||||
auto dB = B.toDeviceMatrix();
|
||||
auto dC = (dA+dB).toHostMatrix();
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
{
|
||||
if (C[i]!=dC[i]){
|
||||
printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dC[i]);
|
||||
return 9;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user