From fe0abf8ee62320c338c7c289b46f7f5f6cfe1773 Mon Sep 17 00:00:00 2001 From: kradchen Date: Tue, 31 Oct 2023 14:35:29 +0800 Subject: [PATCH] CudaMatrix Operator logic patch1 --- CMakeLists.txt | 9 +++-- src/CudaMatrix.cpp | 13 ++++++- src/CudaMatrixPrivate.cu | 71 +++++++++++++++++++++++++++++++++++++++ src/CudaMatrixPrivate.cuh | 15 +++++++++ src/main.cxx | 20 +++++++++++ 5 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 src/CudaMatrixPrivate.cu create mode 100644 src/CudaMatrixPrivate.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ccf00e..990d637 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,12 +24,17 @@ endif() set(MKL_INTERFACE_FULL intel_lp64) find_package(MKL CONFIG REQUIRED) -include_directories(./ ./src /usr/local/include/eigen3 ./thirdparty/include) +include_directories( +./ ./src /usr/local/include/eigen3 ./thirdparty/include) file(GLOB_RECURSE cpp_files ./src/*.cpp) file(GLOB_RECURSE cxx_files ./src/*.cxx) -add_executable(Aurora ${cpp_files} ${cxx_files} ) +if (Aurora_USE_CUDA) +file(GLOB_RECURSE cu_files ./src/*.cu) +file(GLOB_RECURSE cuh_files ./src/*.cuh) +endif(Aurora_USE_CUDA) +add_executable(Aurora ${cpp_files} ${cxx_files} ${cu_files} ${cuh_files}) target_compile_options(Aurora PUBLIC $) target_include_directories(Aurora PUBLIC $) target_link_libraries(Aurora PUBLIC $) diff --git a/src/CudaMatrix.cpp b/src/CudaMatrix.cpp index 9562f57..6a6efec 100644 --- a/src/CudaMatrix.cpp +++ b/src/CudaMatrix.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "CudaMatrixPrivate.cuh" using namespace Aurora; @@ -239,4 +240,14 @@ bool CudaMatrix::setBlockValue(int aDim,int aBeginIndx, int aEndIndex,float valu } return true; } -#endif // USE_CUDA \ No newline at end of file + +CudaMatrix CudaMatrix::operator+(const CudaMatrix &aMatrix) const{ + if (this->getDataSize() != aMatrix.getDataSize()) return CudaMatrix(); + float* data = nullptr; + unsigned long long size = getDataSize() * getValueType(); + cudaMalloc((void**)&data, sizeof(float) * size); + auto out = CudaMatrix::fromRawData(data, getDimSize(0), getDimSize(1), getDimSize(2), getValueType()); + unaryAdd(this->getData(),aMatrix.getData(),out.getData(),this->getDataSize()); + return out; +} +#endif // USE_CUDA diff --git a/src/CudaMatrixPrivate.cu b/src/CudaMatrixPrivate.cu new file mode 100644 index 0000000..558b9a6 --- /dev/null +++ b/src/CudaMatrixPrivate.cu @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +using namespace thrust::placeholders; + +struct PowOperator{ + float exponent; + PowOperator(float v):exponent(v) {} + void setExponent(float v){ + exponent = v; + } + __host__ __device__ + float operator()(const float& x) { + return powf(x, exponent); + } +}; + +void unaryAdd(float* in1, float* in2, float* out, unsigned long length) +{ + thrust::plus op; + thrust::transform(thrust::device,in1,in1+length,in2,out,op); +} + +void unaryAdd(float* in1, const float& in2, float* out, unsigned long length) +{ + thrust::transform(thrust::device,in1,in1+length,out,in2*_1); +} + +void unaryMul(float* in1, float* in2, float* out, unsigned long length) +{ + thrust::multiplies op; + thrust::transform(thrust::device,in1,in1+length,in2,out,op); +} + +void unaryNeg(float* in1, float* out, unsigned long length){ + thrust::negate op; + thrust::transform(thrust::device,in1,in1+length,out,op); +} + +void unarySub(float* in1, float* in2, float* out, unsigned long length){ + thrust::minus op; + thrust::transform(thrust::device,in1,in1+length,in2,out,op); +} + +void unaryDiv(float* in1, float* in2, float* out, unsigned long length){ + thrust::divides op; + thrust::transform(thrust::device,in1,in1+length,in2,out,op); +} + +void unaryPow(float* in1, float N,float* out, unsigned long length){ + if (N == 0.0f) + { + thrust::fill(out,out+length,0); + return; + } + if (N == 1.0f) + { + thrust::copy(in1,in1+length,out); + return; + } + if (N == 2.0f){ + thrust::square op; + thrust::transform(thrust::device,in1,in1+length,out,op); + return; + } + thrust::transform(thrust::device,in1,in1+length,out,powf(_1,N)); + +} + diff --git a/src/CudaMatrixPrivate.cuh b/src/CudaMatrixPrivate.cuh new file mode 100644 index 0000000..09348b3 --- /dev/null +++ b/src/CudaMatrixPrivate.cuh @@ -0,0 +1,15 @@ + +#ifndef __CUDAMATRIX_CUH__ +#define __CUDAMATRIX_CUH__ + +void unaryAdd(float* in1, float* in2, float* out, unsigned long length); +void unaryAdd(float* in1, const float& in2, float* out, unsigned long length); +void unaryMul(float* in1, float* in2, float* out, unsigned long length); +void unaryNeg(float* in1, float* out, unsigned long length); +void unaryPow(float* in1, float N,float* out, unsigned long length); + +void unarySub(float* in1, float* in2, float* out, unsigned long length); +void unaryDiv(float* in1, float* in2, float* out, unsigned long length); + + +#endif // __CUDAMATRIX_H__ \ No newline at end of file diff --git a/src/main.cxx b/src/main.cxx index 6d29767..2476b87 100644 --- a/src/main.cxx +++ b/src/main.cxx @@ -7,6 +7,7 @@ #include #include "Matrix.h" +#include "CudaMatrix.h" #include "Function.h" #include "Function1D.h" #include "Function2D.h" @@ -15,5 +16,24 @@ int main() { + auto A = Aurora::zeros(1000,1,1); + auto B = Aurora::zeros(1000,1,1); + for (size_t i = 0; i < 1000; i++) + { + A[i] = -1; + B[i] = i; + } + auto C = A+B; + auto dA = A.toDeviceMatrix(); + auto dB = B.toDeviceMatrix(); + auto dC = (dA+dB).toHostMatrix(); + for (size_t i = 0; i < 1000; i++) + { + if (C[i]!=dC[i]){ + printf("error value i:%zu, value1:%f, value2: %f",i,C[i],dC[i]); + return 9; + } + } + return 0; }