From b170022b21677f3763a83050c7434775ee36e090 Mon Sep 17 00:00:00 2001 From: sunwen Date: Tue, 24 Dec 2024 10:43:26 +0800 Subject: [PATCH] feat: Move common function to Aurora library. --- src/Aurora.cu | 110 -------------------------------------------------- src/Aurora.h | 16 -------- 2 files changed, 126 deletions(-) delete mode 100644 src/Aurora.cu delete mode 100644 src/Aurora.h diff --git a/src/Aurora.cu b/src/Aurora.cu deleted file mode 100644 index 27818b3..0000000 --- a/src/Aurora.cu +++ /dev/null @@ -1,110 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "Aurora.h" -#include "CudaMatrix.h" - -#include "log/log.h" - -__global__ void doubleToComplexKernel(const double* input, cufftDoubleComplex* output, int size) -{ - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < size) { - output[idx].x = input[idx]; - output[idx].y = 0; - } -} - -void Aurora::doubleToComplex(const double* input, cufftDoubleComplex* output, int size) -{ - int threadsPerBlock = 1024; - int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock; - doubleToComplexKernel<<>>(input, output, size); - cudaDeviceSynchronize(); // 等待GPU完成操作 -} - -__global__ void maxKernel(const float* aInput, const float* aOutput, int aSize) -{ - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = gridDim.x*blockDim.x; - float maxResult = aInput[0]; - while (index < aSize) - { - if(maxResult < aInput[index]) - { - maxResult = aInput[index]; - } - index += stride; - } - -} - -void Aurora::max(const float* aInput, const float* aOutput, int aSize) -{ - int threadsPerBlock = 1024; - int blocksPerGrid = 68; - //max<<>>(aInput, aOutput, aSize); - cudaDeviceSynchronize(); -} - -__global__ void validKernel(const float* aData, const float* aValid, float* aOutput, int aOutputRowCount, int aOutputColumnCount) -{ - int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; - int dataIndex = (int)aValid[threadIndex]; - if(threadIndex < aOutputColumnCount) - { - for(int i=0; i < aOutputRowCount; ++i) - { - aOutput[threadIndex * aOutputRowCount + i] = aData[dataIndex * aOutputRowCount + i]; - } - } -} - -Aurora::CudaMatrix Aurora::valid(const Aurora::CudaMatrix aData, const Aurora::CudaMatrix aValid) -{ - int validSize = aValid.getDataSize(); - int rowCount = aData.getDimSize(0); - float* hostValid = new float[validSize]; - float* validProcessed = new float[validSize]; - float* validProcessedDevice = nullptr; - cudaMemcpy(hostValid, aValid.getData(), sizeof(float) * validSize, cudaMemcpyDeviceToHost); - int validColumnCount = 0; - for(int i=0;i>>(aData.getData(), validProcessedDevice, result, rowCount, validColumnCount); - cudaDeviceSynchronize(); - - cudaFree(validProcessedDevice); - delete[] hostValid; - delete[] validProcessed; - return Aurora::CudaMatrix::fromRawData(result, rowCount, validColumnCount); -} - -void Aurora::sort(const Aurora::Matrix& aMatrix) -{ - RECON_INFO("cuda start"); - thrust::sort(thrust::device, aMatrix.getData(), aMatrix.getData()+aMatrix.getDataSize(), thrust::greater()); - RECON_INFO("cuda end"); -} - - diff --git a/src/Aurora.h b/src/Aurora.h deleted file mode 100644 index 1d0ce02..0000000 --- a/src/Aurora.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef SUM_MATRIX_CU_H -#define SUM_MATRIX_CU_H -#include -#include - -#include "Matrix.h" -namespace Aurora -{ - void doubleToComplex(const double* input, cufftDoubleComplex* output, int size); - void max(const float* aInput, const float* aOutput, int aSize); - Aurora::CudaMatrix valid(const Aurora::CudaMatrix aData, const Aurora::CudaMatrix aValid); - void sort(const Aurora::Matrix& aMatrix); -} - - -#endif \ No newline at end of file