diff --git a/src/Function2D.cu b/src/Function2D.cu index d467406..49daf5e 100644 --- a/src/Function2D.cu +++ b/src/Function2D.cu @@ -1675,4 +1675,50 @@ CudaMatrix Aurora::hilbert(const CudaMatrix &aMatrix) x = x * h; auto result = ifft(x); return result; -} \ No newline at end of file +} + +__global__ void validKernel(const float* aData, const float* aValid, float* aOutput, int aOutputRowCount, int aOutputColumnCount) +{ + int threadIndex = blockIdx.x * blockDim.x + threadIdx.x; + int dataIndex = (int)aValid[threadIndex]; + if(threadIndex < aOutputColumnCount) + { + for(int i=0; i < aOutputRowCount; ++i) + { + aOutput[threadIndex * aOutputRowCount + i] = aData[dataIndex * aOutputRowCount + i]; + } + } +} + +Aurora::CudaMatrix Aurora::valid(const Aurora::CudaMatrix aData, const Aurora::CudaMatrix aValid) +{ + int validSize = aValid.getDataSize(); + int rowCount = aData.getDimSize(0); + float* hostValid = new float[validSize]; + float* validProcessed = new float[validSize]; + float* validProcessedDevice = nullptr; + cudaMemcpy(hostValid, aValid.getData(), sizeof(float) * validSize, cudaMemcpyDeviceToHost); + int validColumnCount = 0; + for(int i=0;i>>(aData.getData(), validProcessedDevice, result, rowCount, validColumnCount); + cudaDeviceSynchronize(); + + cudaFree(validProcessedDevice); + delete[] hostValid; + delete[] validProcessed; + return Aurora::CudaMatrix::fromRawData(result, rowCount, validColumnCount); +} diff --git a/src/Function2D.cuh b/src/Function2D.cuh index 4a0fd4c..afa92db 100644 --- a/src/Function2D.cuh +++ b/src/Function2D.cuh @@ -86,6 +86,8 @@ namespace Aurora */ CudaMatrix ifft_symmetric(const CudaMatrix &aMatrix,long aLength); + CudaMatrix valid(const CudaMatrix aData, const CudaMatrix aValid); + } #endif // __FUNCTION2D_CUDA_H__ \ No newline at end of file