Merge branch 'dtof' of http://192.168.1.9:3000/Bug/Aurora into dtof
This commit is contained in:
304
src/Function1D.cu
Normal file
304
src/Function1D.cu
Normal file
@@ -0,0 +1,304 @@
|
||||
#include "CudaMatrix.h"
|
||||
#include "Function1D.cuh"
|
||||
#include "Matrix.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/transform.h>
|
||||
#include <thrust/iterator/constant_iterator.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
using namespace Aurora;
|
||||
|
||||
namespace
|
||||
{
|
||||
const int THREADS_PER_BLOCK = 256;
|
||||
}
|
||||
|
||||
__global__ void complexKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
aOutput[2*idx] = aInputData[idx];
|
||||
aOutput[2*idx + 1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::complex(const CudaMatrix& aMatrix)
|
||||
{
|
||||
if(aMatrix.isComplex())
|
||||
{
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * aMatrix.getDataSize() * Aurora::Complex);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
complexKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), Aurora::Complex);
|
||||
}
|
||||
|
||||
__global__ void realKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
aOutput[idx] = aInputData[idx*2];
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::real(const CudaMatrix& aMatrix)
|
||||
{
|
||||
if(!aMatrix.isComplex())
|
||||
{
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * aMatrix.getDataSize());
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
realKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), Aurora::Normal);
|
||||
}
|
||||
|
||||
__global__ void imageKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
aOutput[idx] = aInputData[idx*2 + 1];
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::imag(const CudaMatrix& aMatrix)
|
||||
{
|
||||
if(!aMatrix.isComplex())
|
||||
{
|
||||
return CudaMatrix();
|
||||
}
|
||||
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * aMatrix.getDataSize());
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
imageKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), Aurora::Normal);
|
||||
}
|
||||
|
||||
__global__ void ceilKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
aOutput[idx] = std::ceil(aInputData[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::ceil(const CudaMatrix& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
ceilKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::ceil(const CudaMatrix&& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
ceilKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
__global__ void roundKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
aOutput[idx] = std::round(aInputData[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::round(const CudaMatrix& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
roundKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::round(const CudaMatrix&& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
roundKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
__global__ void floorKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
aOutput[idx] = std::floor(aInputData[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::floor(const CudaMatrix& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
floorKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::floor(const CudaMatrix&& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
floorKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
__global__ void sqrtKernel(float* aInputData, float* aOutput, unsigned int aSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
aOutput[idx] = std::sqrt(aInputData[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::sqrt(const CudaMatrix& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
sqrtKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::sqrt(const CudaMatrix&& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
sqrtKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
__global__ void absKernel(float* aInputData, float* aOutput, unsigned int aSize, bool aIsComplex)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
if(aIsComplex)
|
||||
{
|
||||
aOutput[idx] = sqrt(aInputData[2*idx] * aInputData[2*idx] + aInputData[2*idx+1] * aInputData[2*idx+1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
aOutput[idx] = abs(aInputData[idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::abs(const CudaMatrix& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
absKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size, aMatrix.isComplex());
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2));
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::abs(const CudaMatrix&& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
absKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, size, aMatrix.isComplex());
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2));
|
||||
}
|
||||
|
||||
__global__ void signKernel(float* aInputData, float* aOutput, unsigned int aSize, bool aIsComplex)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
if(aIsComplex)
|
||||
{
|
||||
float absValue = sqrt(aInputData[2*idx] * aInputData[2*idx] + aInputData[2*idx + 1] * aInputData[2*idx + 1]);
|
||||
aOutput[2*idx] = aInputData[2*idx] / absValue;
|
||||
aOutput[2*idx + 1] = aInputData[2*idx + 1] / absValue;
|
||||
return;
|
||||
}
|
||||
|
||||
if(aInputData[idx] < 0)
|
||||
{
|
||||
aOutput[idx] = -1;
|
||||
}
|
||||
else if(aInputData[idx] > 0)
|
||||
{
|
||||
aOutput[idx] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
aOutput[idx] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::sign(const CudaMatrix& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
signKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, aMatrix.getDataSize(), aMatrix.isComplex());
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
|
||||
CudaMatrix Aurora::sign(const CudaMatrix&& aMatrix)
|
||||
{
|
||||
size_t size = aMatrix.getDataSize() * aMatrix.getValueType();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
signKernel<<<THREADS_PER_BLOCK, blocksPerGrid>>>(aMatrix.getData(), data, aMatrix.getDataSize(), aMatrix.isComplex());
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aMatrix.getDimSize(0), aMatrix.getDimSize(1), aMatrix.getDimSize(2), aMatrix.getValueType());
|
||||
}
|
||||
44
src/Function1D.cuh
Normal file
44
src/Function1D.cuh
Normal file
@@ -0,0 +1,44 @@
|
||||
#ifndef AURORA_CUDA_FUNCTION1D_H
|
||||
#define AURORA_CUDA_FUNCTION1D_H
|
||||
|
||||
#include "CudaMatrix.h"
|
||||
|
||||
namespace Aurora
|
||||
{
|
||||
CudaMatrix complex(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix real(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix imag(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix ceil(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix ceil(const CudaMatrix&& aMatrix);
|
||||
|
||||
CudaMatrix round(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix round(const CudaMatrix&& aMatrix);
|
||||
|
||||
CudaMatrix floor(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix floor(const CudaMatrix&& aMatrix);
|
||||
|
||||
/**
|
||||
* 开根号,暂时只支持正整数
|
||||
* @param matrix
|
||||
* @return
|
||||
*/
|
||||
CudaMatrix sqrt(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix sqrt(const CudaMatrix&& aMatrix);
|
||||
|
||||
CudaMatrix abs(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix abs(const CudaMatrix&& aMatrix);
|
||||
|
||||
CudaMatrix sign(const CudaMatrix& aMatrix);
|
||||
|
||||
CudaMatrix sign(const CudaMatrix&& aMatrix);
|
||||
}
|
||||
|
||||
#endif //AURORA_CUDA_FUNCTION1D_H
|
||||
224
test/Function1D_Cuda_Test.cpp
Normal file
224
test/Function1D_Cuda_Test.cpp
Normal file
@@ -0,0 +1,224 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "CudaMatrix.h"
|
||||
#include "Matrix.h"
|
||||
#include "TestUtility.h"
|
||||
|
||||
#include "Function1D.h"
|
||||
#include "Function1D.cuh"
|
||||
|
||||
class Function1D_Cuda_Test:public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
static void SetUpFunction1DCudaTester(){
|
||||
|
||||
}
|
||||
static void TearDownTestCase(){
|
||||
}
|
||||
void SetUp(){
|
||||
}
|
||||
void TearDown(){
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, complex)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1,2,3,4,5,6,7,8}, 2,2,2);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::complex(hostMatrix);
|
||||
auto result2 = Aurora::complex(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 8);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
||||
for(size_t i=0; i<result1.getDataSize() * 2; ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, real)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1,2,3,4,5,6,7,8}, 2,2,1,Aurora::Complex);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::real(hostMatrix);
|
||||
auto result2 = Aurora::real(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 4);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, imag)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1,2,3,4,5,6,7,8}, 2,2,1,Aurora::Complex);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::imag(hostMatrix);
|
||||
auto result2 = Aurora::imag(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 4);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, ceil)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::ceil(hostMatrix);
|
||||
auto result2 = Aurora::ceil(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 8);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,2,1,Aurora::Complex);
|
||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
result1 = Aurora::ceil(hostMatrix);
|
||||
result2 = Aurora::ceil(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 4);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, round)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::round(hostMatrix);
|
||||
auto result2 = Aurora::round(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 8);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,2,1,Aurora::Complex);
|
||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
result1 = Aurora::round(hostMatrix);
|
||||
result2 = Aurora::round(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 4);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, floor)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::floor(hostMatrix);
|
||||
auto result2 = Aurora::floor(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 8);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,2,1,Aurora::Complex);
|
||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
result1 = Aurora::floor(hostMatrix);
|
||||
result2 = Aurora::floor(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 4);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, sqrt)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::sqrt(hostMatrix);
|
||||
auto result2 = Aurora::sqrt(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 8);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,2,1,Aurora::Complex);
|
||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
result1 = Aurora::sqrt(hostMatrix);
|
||||
result2 = Aurora::sqrt(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 4);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, abs)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::abs(hostMatrix);
|
||||
auto result2 = Aurora::abs(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 8);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,2,1,Aurora::Complex);
|
||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
result1 = Aurora::abs(hostMatrix);
|
||||
result2 = Aurora::abs(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 4);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Function1D_Cuda_Test, sign)
|
||||
{
|
||||
Aurora::Matrix hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,4);
|
||||
Aurora::CudaMatrix deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
|
||||
auto result1 = Aurora::sign(hostMatrix);
|
||||
auto result2 = Aurora::sign(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 8);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Normal);
|
||||
for(size_t i=0; i<result1.getDataSize(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
|
||||
hostMatrix = Aurora::Matrix::fromRawData(new float[8]{1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8}, 2,2,1,Aurora::Complex);
|
||||
deviceMatrix = hostMatrix.toDeviceMatrix();
|
||||
result1 = Aurora::sign(hostMatrix);
|
||||
result2 = Aurora::sign(deviceMatrix).toHostMatrix();
|
||||
EXPECT_EQ(result2.getDataSize(), 4);
|
||||
EXPECT_EQ(result2.getValueType(), Aurora::Complex);
|
||||
for(size_t i=0; i<result1.getDataSize() * result1.getValueType(); ++i)
|
||||
{
|
||||
EXPECT_EQ(result1[i], result2[i]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user