CudaMatrix Operator logic patch1

This commit is contained in:
kradchen
2023-10-31 14:35:29 +08:00
parent dd6a22f47d
commit fe0abf8ee6
5 changed files with 125 additions and 3 deletions

View File

@@ -24,12 +24,17 @@ endif()
set(MKL_INTERFACE_FULL intel_lp64)
find_package(MKL CONFIG REQUIRED)
include_directories(./ ./src /usr/local/include/eigen3 ./thirdparty/include)
include_directories(
./ ./src /usr/local/include/eigen3 ./thirdparty/include)
file(GLOB_RECURSE cpp_files ./src/*.cpp)
file(GLOB_RECURSE cxx_files ./src/*.cxx)
add_executable(Aurora ${cpp_files} ${cxx_files} )
if (Aurora_USE_CUDA)
file(GLOB_RECURSE cu_files ./src/*.cu)
file(GLOB_RECURSE cuh_files ./src/*.cuh)
endif(Aurora_USE_CUDA)
add_executable(Aurora ${cpp_files} ${cxx_files} ${cu_files} ${cuh_files})
target_compile_options(Aurora PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
target_include_directories(Aurora PUBLIC $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(Aurora PUBLIC $<LINK_ONLY:MKL::MKL>)

View File

@@ -7,6 +7,7 @@
#include <iostream>
#include <cstddef>
#include <cuda_runtime.h>
#include "CudaMatrixPrivate.cuh"
using namespace Aurora;
@@ -239,4 +240,14 @@ bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float valu
}
return true;
}
#endif // USE_CUDA
CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{
if (this->getDataSize() != aMatrix.getDataSize()) return CudaMatrix();
float* data = nullptr;
unsigned long long size = getDataSize() * getValueType();
cudaMalloc((void**)&data, sizeof(float) * size);
auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType());
unaryAdd(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize());
return out;
}
#endif // USE_CUDA

71
src/CudaMatrixPrivate.cu Normal file
View File

@@ -0,0 +1,71 @@
#include <CudaMatrixPrivate.cuh>
#include <math.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/execution_policy.h>
using namespace thrust::placeholders;
struct PowOperator{
float exponent;
PowOperator(float v):exponent(v) {}
void setExponent(float v){
exponent = v;
}
__host__ __device__
float operator()(const float& x) {
return powf(x, exponent);
}
};
void unaryAdd(float* in1, float* in2, float* out, unsigned long length)
{
thrust::plus<float> op;
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
}
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length)
{
thrust::transform(thrust::device,in1,in1+length,out,in2*_1);
}
void unaryMul(float* in1, float* in2, float* out, unsigned long length)
{
thrust::multiplies<float> op;
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
}
void unaryNeg(float* in1, float* out, unsigned long length){
thrust::negate<float> op;
thrust::transform(thrust::device,in1,in1+length,out,op);
}
void unarySub(float* in1, float* in2, float* out, unsigned long length){
thrust::minus<float> op;
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
}
void unaryDiv(float* in1, float* in2, float* out, unsigned long length){
thrust::divides<float> op;
thrust::transform(thrust::device,in1,in1+length,in2,out,op);
}
void unaryPow(float* in1, float N,float* out, unsigned long length){
if (N == 0.0f)
{
thrust::fill(out,out+length,0);
return;
}
if (N == 1.0f)
{
thrust::copy(in1,in1+length,out);
return;
}
if (N == 2.0f){
thrust::square<float> op;
thrust::transform(thrust::device,in1,in1+length,out,op);
return;
}
thrust::transform(thrust::device,in1,in1+length,out,powf(_1,N));
}

15
src/CudaMatrixPrivate.cuh Normal file
View File

@@ -0,0 +1,15 @@
#ifndef __CUDAMATRIX_CUH__
#define __CUDAMATRIX_CUH__
void unaryAdd(float* in1, float* in2, float* out, unsigned long length);
void unaryAdd(float* in1, const float& in2, float* out, unsigned long length);
void unaryMul(float* in1, float* in2, float* out, unsigned long length);
void unaryNeg(float* in1, float* out, unsigned long length);
void unaryPow(float* in1, float N,float* out, unsigned long length);
void unarySub(float* in1, float* in2, float* out, unsigned long length);
void unaryDiv(float* in1, float* in2, float* out, unsigned long length);
#endif // __CUDAMATRIX_H__

View File

@@ -7,6 +7,7 @@
#include <complex>
#include "Matrix.h"
#include "CudaMatrix.h"
#include "Function.h"
#include "Function1D.h"
#include "Function2D.h"
@@ -15,5 +16,24 @@
int main()
{
auto A = Aurora::zeros(1000,1,1);
auto B = Aurora::zeros(1000,1,1);
for (size_t i = 0; i < 1000; i++)
{
A[i] = -1;
B[i] = i;
}
auto C = A+B;
auto dA = A.toDeviceMatrix();
auto dB = B.toDeviceMatrix();
auto dC = (dA+dB).toHostMatrix();
for (size_t i = 0; i < 1000; i++)
{
if (C[i]!=dC[i]){
printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dC[i]);
return 9;
}
}
return 0;
}