cuda2023
This commit is contained in:
201
src/Aurora.cu
Normal file
201
src/Aurora.cu
Normal file
@@ -0,0 +1,201 @@
|
||||
#include "/usr/local/cuda-10.1/targets/x86_64-linux/include/cufft.h"
|
||||
#include </usr/local/cuda-10.1/targets/x86_64-linux/include/cuda_runtime.h>
|
||||
#include <cstdio>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/sort.h>
|
||||
|
||||
#include "Aurora.h"
|
||||
#include "AuroraDefs.h"
|
||||
#include "CudaMatrix.h"
|
||||
#include <iostream>
|
||||
#include "log/log.h"
|
||||
#include <cuda_texture_types.h>
|
||||
|
||||
__global__ void doubleToComplexKernel(const double* input, cufftDoubleComplex* output, int size)
|
||||
{
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < size) {
|
||||
output[idx].x = input[idx];
|
||||
output[idx].y = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Aurora::doubleToComplex(const double* input, cufftDoubleComplex* output, int size)
|
||||
{
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
|
||||
doubleToComplexKernel<<<blocksPerGrid, threadsPerBlock>>>(input, output, size);
|
||||
cudaDeviceSynchronize(); // 等待GPU完成操作
|
||||
}
|
||||
|
||||
__global__ void maxKernel(const float* aInput, const float* aOutput, int aSize)
|
||||
{
|
||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int stride = gridDim.x*blockDim.x;
|
||||
float maxResult = aInput[0];
|
||||
while (index < aSize)
|
||||
{
|
||||
if(maxResult < aInput[index])
|
||||
{
|
||||
maxResult = aInput[index];
|
||||
}
|
||||
index += stride;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Aurora::max(const float* aInput, const float* aOutput, int aSize)
|
||||
{
|
||||
int threadsPerBlock = 1024;
|
||||
int blocksPerGrid = 68;
|
||||
//max<<<blocksPerGrid, threadsPerBlock>>>(aInput, aOutput, aSize);
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
__global__ void validKernel(const float* aData, const float* aValid, float* aOutput, int aOutputRowCount, int aOutputColumnCount)
|
||||
{
|
||||
int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int dataIndex = (int)aValid[threadIndex];
|
||||
if(threadIndex < aOutputColumnCount)
|
||||
{
|
||||
for(int i=0; i < aOutputRowCount; ++i)
|
||||
{
|
||||
aOutput[threadIndex * aOutputRowCount + i] = aData[dataIndex * aOutputRowCount + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// __global__ void validSubKernel(const double* aValid, double* aOutput, unsigned int* aCount, int aValidSize)
|
||||
// {
|
||||
// int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
// if(index == 0)
|
||||
// {
|
||||
// for(int i=0;i<aValidSize;++i)
|
||||
// {
|
||||
// if(aValid[i] == 1)
|
||||
// {
|
||||
// aOutput[*aCount] = i;
|
||||
// ++(*aCount);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// __syncthreads();
|
||||
// }
|
||||
|
||||
Aurora::CudaMatrix Aurora::valid(const Aurora::CudaMatrix aData, const Aurora::CudaMatrix aValid)
|
||||
{
|
||||
int validSize = aValid.getDataSize();
|
||||
int rowCount = aData.getDimSize(0);
|
||||
float* hostValid = new float[validSize];
|
||||
float* validProcessed = new float[validSize];
|
||||
float* validProcessedDevice = nullptr;
|
||||
cudaMemcpy(hostValid, aValid.getData(), sizeof(float) * validSize, cudaMemcpyDeviceToHost);
|
||||
int validColumnCount = 0;
|
||||
for(int i=0;i<validSize;++i)
|
||||
{
|
||||
if(hostValid[i] == 1)
|
||||
{
|
||||
validProcessed[validColumnCount] = i;
|
||||
++validColumnCount;
|
||||
}
|
||||
}
|
||||
cudaMalloc((void**)&validProcessedDevice, sizeof(float) * validColumnCount );
|
||||
cudaMemcpy(validProcessedDevice, validProcessed, sizeof(float) * validColumnCount, cudaMemcpyHostToDevice);
|
||||
|
||||
int threadPerBlock = 1024;
|
||||
int blockPerGrid = validColumnCount / threadPerBlock + 1;
|
||||
float* result = nullptr;
|
||||
cudaMalloc((void**)&result, sizeof(float) * validColumnCount * rowCount);
|
||||
validKernel<<<blockPerGrid, threadPerBlock>>>(aData.getData(), validProcessedDevice, result, rowCount, validColumnCount);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
cudaFree(validProcessedDevice);
|
||||
delete[] hostValid;
|
||||
delete[] validProcessed;
|
||||
return Aurora::CudaMatrix::fromRawData(result, rowCount, validColumnCount);
|
||||
}
|
||||
|
||||
texture<float, cudaTextureType2D, cudaReadModeElementType> tex;
|
||||
cudaArray* array;
|
||||
|
||||
__global__ void testKernel(float* aData,cudaTextureObject_t aTexObj, cudaSurfaceObject_t aSurface)
|
||||
{
|
||||
float a = tex2D(tex,5.5,5.5);
|
||||
float b = tex2D<float>(aTexObj,5.5,5.5);
|
||||
float2 c = tex2D<float2>(aSurface,1,1);
|
||||
printf("%f\n",a);
|
||||
printf("%f\n",b);
|
||||
printf("%f\n",c.x);
|
||||
printf("%f\n",c.y);
|
||||
}
|
||||
|
||||
__global__ void writeSurfaceKernel( cudaSurfaceObject_t aSurface)
|
||||
{
|
||||
float2 value;
|
||||
value.x = 100;
|
||||
value.y = 99;
|
||||
surf2Dwrite(value, aSurface, 1, 1 );
|
||||
|
||||
}
|
||||
|
||||
void subTest(cudaTextureObject_t& aTexture)
|
||||
{
|
||||
cudaResourceDesc resourceDesc;
|
||||
cudaTextureDesc textureDesc;
|
||||
memset(&resourceDesc, 0, sizeof(resourceDesc));
|
||||
resourceDesc.resType = cudaResourceTypeArray;
|
||||
resourceDesc.res.array.array = array; // 指向设备端的 CUDA 数组
|
||||
|
||||
// 在 textureDesc 中设置纹理描述
|
||||
memset(&textureDesc, 0, sizeof(textureDesc));
|
||||
textureDesc.addressMode[0] = cudaAddressModeClamp;
|
||||
textureDesc.addressMode[1] = cudaAddressModeClamp;
|
||||
textureDesc.filterMode = cudaFilterModeLinear;
|
||||
textureDesc.readMode = cudaReadModeElementType;
|
||||
textureDesc.normalizedCoords = false;
|
||||
//textureDesc.channelDesc = texChannelDescSpeedOfSoundField;
|
||||
cudaCreateTextureObject(&aTexture, &resourceDesc, &textureDesc, nullptr);
|
||||
}
|
||||
|
||||
void Aurora::test(float* aData)
|
||||
{
|
||||
tex.addressMode[0] = cudaAddressModeClamp; // Texturreferenz beschreiben
|
||||
tex.addressMode[1] = cudaAddressModeClamp;
|
||||
tex.filterMode = cudaFilterModeLinear;
|
||||
tex.normalized = 0;
|
||||
cudaChannelFormatDesc texChannelDescSpeedOfSoundField = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
cudaMallocArray(&array, &texChannelDescSpeedOfSoundField, 10, 9);
|
||||
cudaMemcpyToArray(array, 0, 0, aData,10 * 9 *sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaBindTextureToArray ( &tex, array, &texChannelDescSpeedOfSoundField );
|
||||
|
||||
cudaTextureObject_t textureObj;
|
||||
subTest(textureObj);
|
||||
|
||||
|
||||
|
||||
struct cudaResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
resDesc.resType = cudaResourceTypeArray;
|
||||
// Create the surface objects
|
||||
resDesc.res.array.array = array;
|
||||
cudaSurfaceObject_t inputSurfObj = 0;
|
||||
cudaCreateSurfaceObject(&inputSurfObj, &resDesc);
|
||||
|
||||
writeSurfaceKernel<<<1,1>>>(inputSurfObj);
|
||||
cudaDeviceSynchronize();
|
||||
testKernel<<<1, 1>>>(aData,textureObj, inputSurfObj);
|
||||
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
cudaUnbindTexture(&tex);
|
||||
}
|
||||
|
||||
void Aurora::sort(const Aurora::Matrix& aMatrix)
|
||||
{
|
||||
RECON_INFO("cuda start");
|
||||
thrust::sort(thrust::device, aMatrix.getData(), aMatrix.getData()+aMatrix.getDataSize(), thrust::greater<int>());
|
||||
RECON_INFO("cuda end");
|
||||
}
|
||||
|
||||
|
||||
24
src/Aurora.h
Normal file
24
src/Aurora.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef SUM_MATRIX_CU_H
|
||||
#define SUM_MATRIX_CU_H
|
||||
#include "/usr/local/cuda-10.1/targets/x86_64-linux/include/cufft.h"
|
||||
#include </usr/local/cuda-10.1/targets/x86_64-linux/include/cuda_runtime.h>
|
||||
|
||||
#include "Matrix.h"
|
||||
namespace Aurora
|
||||
{
|
||||
//__global__ void doubleToComplexKernel(const double* input, cufftDoubleComplex* output, int size);
|
||||
void doubleToComplex(const double* input, cufftDoubleComplex* output, int size);
|
||||
|
||||
//__global__ void maxKernel(const float* aInput, const float* aOutput, int aSize);
|
||||
void max(const float* aInput, const float* aOutput, int aSize);
|
||||
|
||||
Aurora::CudaMatrix valid(const Aurora::CudaMatrix aData, const Aurora::CudaMatrix aValid);
|
||||
void test(float* aData);
|
||||
|
||||
void sort(const Aurora::Matrix& aMatrix);
|
||||
|
||||
//Aurora::CudaMatrix getTransmissionDataSubFunction(const Aurora::CudaMatrix& aFxMatrix, const Aurora::CudaMatrix& aFhMatrix);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
@@ -5,98 +5,103 @@
|
||||
#include <immintrin.h>
|
||||
#include <sys/types.h>
|
||||
namespace {
|
||||
const ushort CONVERT_AND_VALUE = 15;
|
||||
// andblack
|
||||
const __m128i andBlock = _mm_set_epi16(15, 15, 15, 15, 15, 15, 15, 15);
|
||||
const __m128i andBlock2 =
|
||||
_mm_set_epi16(2047, 2047, 2047, 2047, 2047, 2047, 2047, 2047);
|
||||
const __m128i zeroBlock = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
|
||||
const __m128i oneBlock = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
|
||||
const __m128i twokBlock =
|
||||
_mm_set_epi16(2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048);
|
||||
const uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
|
||||
void convert(short * ptr, float* des,bool single = false){
|
||||
// 初始化值
|
||||
auto value = _mm_set_epi16(ptr[0], ptr[1], ptr[2], ptr[3], single?ptr[0]:ptr[4], single?ptr[0]:ptr[5],
|
||||
single?ptr[0]:ptr[6], single?ptr[0]:ptr[7]);
|
||||
auto uvalue = _mm_set_epi16(
|
||||
(ushort)ptr[0], (ushort)ptr[1], (ushort)ptr[2], (ushort)ptr[3],
|
||||
(ushort)(single?ptr[0]:ptr[4]), (ushort)(single?ptr[0]:ptr[5]),
|
||||
(ushort)(single?ptr[0]:ptr[6]), (ushort)(single?ptr[0]:ptr[7]));
|
||||
// 位移
|
||||
auto sign_bit = _mm_srli_epi16(value, 15); // 右移16位取符号位
|
||||
auto exponent = _mm_srli_epi16(uvalue, 11);
|
||||
// and
|
||||
exponent = _mm_and_si128(exponent, andBlock);
|
||||
// and ,then convert to int 32 bits
|
||||
auto fraction3 = _mm256_cvtepi16_epi32(_mm_and_si128(uvalue, andBlock2));
|
||||
auto hidden_bit_mask =
|
||||
(_mm_cmp_epi16_mask(sign_bit, oneBlock, _MM_CMPINT_EQ) &
|
||||
_mm_cmp_epi16_mask(exponent, zeroBlock, _MM_CMPINT_EQ)) |
|
||||
(_mm_cmp_epi16_mask(sign_bit, zeroBlock, _MM_CMPINT_EQ) &
|
||||
_mm_cmp_epi16_mask(exponent, zeroBlock, _MM_CMPINT_NE));
|
||||
auto hidden_bit16 = _mm_maskz_set1_epi16(hidden_bit_mask, 2048);
|
||||
auto hidden_bit32 = _mm256_cvtepi16_epi32(hidden_bit16);
|
||||
auto outputBlock = _mm256_add_epi32(fraction3, hidden_bit32);
|
||||
auto sign_bit_add_value = _mm256_maskz_set1_epi32(
|
||||
_mm_cmp_epi16_mask(sign_bit, oneBlock, _MM_CMPINT_EQ),
|
||||
CONVERT_ADD_VALUE);
|
||||
outputBlock = _mm256_add_epi32(outputBlock, sign_bit_add_value);
|
||||
auto exponent_mask =
|
||||
_mm_cmp_epi16_mask(oneBlock, exponent, _MM_CMPINT_LT);
|
||||
exponent = _mm_sub_epi16(exponent, oneBlock);
|
||||
auto exponent32 = _mm256_cvtepi16_epi32(exponent);
|
||||
auto zeroBlock32 = _mm256_cvtepi16_epi32(zeroBlock);
|
||||
auto offsetCount =
|
||||
_mm256_mask_blend_epi32(exponent_mask, zeroBlock32, exponent32);
|
||||
|
||||
outputBlock = _mm256_sllv_epi32(outputBlock, offsetCount);
|
||||
// const ushort CONVERT_AND_VALUE = 15;
|
||||
// // andblack
|
||||
// const __m128i andBlock = _mm_set_epi16(15, 15, 15, 15, 15, 15, 15, 15);
|
||||
// const __m128i andBlock2 =
|
||||
// _mm_set_epi16(2047, 2047, 2047, 2047, 2047, 2047, 2047, 2047);
|
||||
// const __m128i zeroBlock = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
|
||||
// const __m128i oneBlock = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
|
||||
// const __m128i twokBlock =
|
||||
// _mm_set_epi16(2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048);
|
||||
// const uint CONVERT_ADD_VALUE = UINT32_MAX - 4095;
|
||||
// void convert(short * ptr, double* des,bool single = false){
|
||||
// // 初始化值
|
||||
// auto value = _mm_set_epi16(ptr[0], ptr[1], ptr[2], ptr[3], single?ptr[0]:ptr[4], single?ptr[0]:ptr[5],
|
||||
// single?ptr[0]:ptr[6], single?ptr[0]:ptr[7]);
|
||||
// auto uvalue = _mm_set_epi16(
|
||||
// (ushort)ptr[0], (ushort)ptr[1], (ushort)ptr[2], (ushort)ptr[3],
|
||||
// (ushort)(single?ptr[0]:ptr[4]), (ushort)(single?ptr[0]:ptr[5]),
|
||||
// (ushort)(single?ptr[0]:ptr[6]), (ushort)(single?ptr[0]:ptr[7]));
|
||||
// // 位移
|
||||
// auto sign_bit = _mm_srli_epi16(value, 15); // 右移16位取符号位
|
||||
// auto exponent = _mm_srli_epi16(uvalue, 11);
|
||||
// // and
|
||||
// exponent = _mm_and_si128(exponent, andBlock);
|
||||
// // and ,then convert to int 32 bits
|
||||
// auto fraction3 = _mm256_cvtepi16_epi32(_mm_and_si128(uvalue, andBlock2));
|
||||
// auto hidden_bit_mask =
|
||||
// (_mm_cmp_epi16_mask(sign_bit, oneBlock, _MM_CMPINT_EQ) &
|
||||
// _mm_cmp_epi16_mask(exponent, zeroBlock, _MM_CMPINT_EQ)) |
|
||||
// (_mm_cmp_epi16_mask(sign_bit, zeroBlock, _MM_CMPINT_EQ) &
|
||||
// _mm_cmp_epi16_mask(exponent, zeroBlock, _MM_CMPINT_NE));
|
||||
// auto hidden_bit16 = _mm_maskz_set1_epi16(hidden_bit_mask, 2048);
|
||||
// auto hidden_bit32 = _mm256_cvtepi16_epi32(hidden_bit16);
|
||||
// auto outputBlock = _mm256_add_epi32(fraction3, hidden_bit32);
|
||||
// auto sign_bit_add_value = _mm256_maskz_set1_epi32(
|
||||
// _mm_cmp_epi16_mask(sign_bit, oneBlock, _MM_CMPINT_EQ),
|
||||
// CONVERT_ADD_VALUE);
|
||||
// outputBlock = _mm256_add_epi32(outputBlock, sign_bit_add_value);
|
||||
// auto exponent_mask =
|
||||
// _mm_cmp_epi16_mask(oneBlock, exponent, _MM_CMPINT_LT);
|
||||
// exponent = _mm_sub_epi16(exponent, oneBlock);
|
||||
// auto exponent32 = _mm256_cvtepi16_epi32(exponent);
|
||||
// auto zeroBlock32 = _mm256_cvtepi16_epi32(zeroBlock);
|
||||
// auto offsetCount =
|
||||
// _mm256_mask_blend_epi32(exponent_mask, zeroBlock32, exponent32);
|
||||
|
||||
|
||||
// outputBlock = _mm256_sllv_epi32(outputBlock, offsetCount);
|
||||
|
||||
des[3] = _mm256_extract_epi32(outputBlock, 4);
|
||||
des[2] = _mm256_extract_epi32(outputBlock, 5);
|
||||
des[1] = _mm256_extract_epi32(outputBlock, 6);
|
||||
des[0] = _mm256_extract_epi32(outputBlock, 7);
|
||||
if(single) return;
|
||||
des[7] = _mm256_extract_epi32(outputBlock, 0);
|
||||
des[6] = _mm256_extract_epi32(outputBlock, 1);
|
||||
des[5] = _mm256_extract_epi32(outputBlock, 2);
|
||||
des[4] = _mm256_extract_epi32(outputBlock, 3);
|
||||
// des[3] = _mm256_extract_epi32(outputBlock, 4);
|
||||
// des[2] = _mm256_extract_epi32(outputBlock, 5);
|
||||
// des[1] = _mm256_extract_epi32(outputBlock, 6);
|
||||
// des[0] = _mm256_extract_epi32(outputBlock, 7);
|
||||
// if(single) return;
|
||||
// des[7] = _mm256_extract_epi32(outputBlock, 0);
|
||||
// des[6] = _mm256_extract_epi32(outputBlock, 1);
|
||||
// des[5] = _mm256_extract_epi32(outputBlock, 2);
|
||||
// des[4] = _mm256_extract_epi32(outputBlock, 3);
|
||||
|
||||
}
|
||||
// }
|
||||
}
|
||||
|
||||
Aurora::Matrix Recon::convertfp16tofloat(Aurora::Matrix aMatrix) {
|
||||
auto input = aMatrix.getData();
|
||||
// uint16变换为float(32位)输出大小翻倍
|
||||
auto output = Aurora::malloc(aMatrix.getDataSize() * 4);
|
||||
size_t rows = aMatrix.getDataSize() * sizeof(float) / sizeof(short);
|
||||
size_t total_count = aMatrix.getDataSize();
|
||||
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < total_count; i += 8) {
|
||||
// 循环展开以避免过度的线程调用
|
||||
if (i < total_count) {
|
||||
auto ptr = (short *)(input + i);
|
||||
float *des = output + i * 4;
|
||||
::convert(ptr, des,i+1>total_count);
|
||||
}
|
||||
if (i+2 < total_count) {
|
||||
auto ptr = (short *)(input + i + 2);
|
||||
float *des = output + (i+2) * 4;
|
||||
::convert(ptr, des,i+3>total_count);
|
||||
}
|
||||
if (i+4 < total_count) {
|
||||
auto ptr = (short *)(input + i + 4);
|
||||
float *des = output + (i+4) * 4;
|
||||
::convert(ptr, des,i+5>total_count);
|
||||
}
|
||||
if (i+6 < total_count) {
|
||||
auto ptr = (short *)(input + i + 6);
|
||||
float *des = output + (i+6) * 4;
|
||||
::convert(ptr, des,i+7>total_count);
|
||||
}
|
||||
}
|
||||
return Aurora::Matrix::New(output, aMatrix.getDimSize(0),
|
||||
aMatrix.getDimSize(1), aMatrix.getDimSize(2));
|
||||
// auto input = aMatrix.getData();
|
||||
// // uint16变换为float(32位)输出大小翻倍
|
||||
// auto output = Aurora::malloc(aMatrix.getDataSize() * 4);
|
||||
// size_t rows = aMatrix.getDataSize() * sizeof(double) / sizeof(short);
|
||||
// size_t total_count = aMatrix.getDataSize();
|
||||
|
||||
|
||||
// #pragma omp parallel for
|
||||
// for (size_t i = 0; i < total_count; i += 8) {
|
||||
// // 循环展开以避免过度的线程调用
|
||||
// if (i < total_count) {
|
||||
// auto ptr = (short *)(input + i);
|
||||
// double *des = output + i * 4;
|
||||
// ::convert(ptr, des,i+1>total_count);
|
||||
// }
|
||||
// if (i+2 < total_count) {
|
||||
// auto ptr = (short *)(input + i + 2);
|
||||
// double *des = output + (i+2) * 4;
|
||||
// ::convert(ptr, des,i+3>total_count);
|
||||
// }
|
||||
// if (i+4 < total_count) {
|
||||
// auto ptr = (short *)(input + i + 4);
|
||||
// double *des = output + (i+4) * 4;
|
||||
// ::convert(ptr, des,i+5>total_count);
|
||||
// }
|
||||
// if (i+6 < total_count) {
|
||||
// auto ptr = (short *)(input + i + 6);
|
||||
// double *des = output + (i+6) * 4;
|
||||
// ::convert(ptr, des,i+7>total_count);
|
||||
// }
|
||||
// }
|
||||
// return Aurora::Matrix::New(output, aMatrix.getDimSize(0),
|
||||
// aMatrix.getDimSize(1), aMatrix.getDimSize(2));
|
||||
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "getAScanBlockPreprocessed.h"
|
||||
|
||||
#include "CudaMatrix.h"
|
||||
#include "Matrix.h"
|
||||
#include "blockingGeometryInfo.h"
|
||||
#include "removeDataFromArrays.h"
|
||||
@@ -10,15 +11,36 @@
|
||||
#include "src/transmissionReconstruction/dataFilter/dataFilter.h"
|
||||
#include "src/reflectionReconstruction/dataFilter.h"
|
||||
|
||||
#include "Aurora.h"
|
||||
|
||||
using namespace Aurora;
|
||||
using namespace Recon;
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <iostream>
|
||||
void printTime()
|
||||
{
|
||||
struct timeval tpend;
|
||||
gettimeofday(&tpend,NULL);
|
||||
int secofday = (tpend.tv_sec + 3600 * 8 ) % 86400;
|
||||
int hours = secofday / 3600;
|
||||
int minutes = (secofday - hours * 3600 ) / 60;
|
||||
int seconds = secofday % 60;
|
||||
int milliseconds = tpend.tv_usec/1000;
|
||||
std::cout<< hours << ":" <<minutes<<":"<<seconds<<"."<<milliseconds<<std::endl;
|
||||
}
|
||||
|
||||
AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const Aurora::Matrix& aMp, const Aurora::Matrix& aSl, const Aurora::Matrix& aSn,
|
||||
const Aurora::Matrix& aRl, const Aurora::Matrix& aRn, GeometryInfo& aGeom, const MeasurementInfo& aMeasInfo,
|
||||
bool aApplyFilter, bool aTransReco)
|
||||
{
|
||||
//std::cout<<"strart"<<std::endl;
|
||||
//printTime();
|
||||
//550ms
|
||||
AscanBlockPreprocessed result;
|
||||
AscanBlock ascanBlock = getAscanBlock(aParser, aMp, aSl, aSn, aRl, aRn);
|
||||
//printTime();
|
||||
//10ms
|
||||
result.gainBlock = ascanBlock.gainBlock;
|
||||
result.mpBlock = ascanBlock.mpBlock;
|
||||
result.rlBlock = ascanBlock.rlBlock;
|
||||
@@ -26,6 +48,8 @@ AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const A
|
||||
result.slBlock = ascanBlock.slBlock;
|
||||
result.snBlock = ascanBlock.snBlock;
|
||||
GeometryBlock geometryBlock = blockingGeometryInfos(aGeom, ascanBlock.rnBlock, ascanBlock.rlBlock, ascanBlock.snBlock, ascanBlock.slBlock, ascanBlock.mpBlock);
|
||||
//printTime();
|
||||
//3ms
|
||||
result.receiverPositionBlock = geometryBlock.receiverPositionBlock;
|
||||
result.senderPositionBlock = geometryBlock.senderPositionBlock;
|
||||
if(aApplyFilter)
|
||||
@@ -40,7 +64,8 @@ AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const A
|
||||
{
|
||||
usedData = filterReflectionData(geometryBlock.receiverPositionBlock, geometryBlock.senderPositionBlock, geometryBlock.senderNormalBlock, reflectParams::constrictReflectionAngles);
|
||||
}
|
||||
|
||||
//printTime();
|
||||
//150ms
|
||||
ascanBlock.ascanBlock = removeDataFromArrays(ascanBlock.ascanBlock, usedData);
|
||||
result.mpBlock = removeDataFromArrays(ascanBlock.mpBlock, usedData);
|
||||
result.slBlock = removeDataFromArrays(ascanBlock.slBlock, usedData);
|
||||
@@ -51,7 +76,8 @@ AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const A
|
||||
result.senderPositionBlock = removeDataFromArrays(geometryBlock.senderPositionBlock, usedData);
|
||||
result.receiverPositionBlock = removeDataFromArrays(geometryBlock.receiverPositionBlock, usedData);
|
||||
result.gainBlock = removeDataFromArrays(ascanBlock.gainBlock, usedData);
|
||||
|
||||
//printTime();
|
||||
//120ms
|
||||
}
|
||||
|
||||
if (ascanBlock.ascanBlock.getDataSize() > 0)
|
||||
@@ -62,6 +88,72 @@ AscanBlockPreprocessed Recon::getAscanBlockPreprocessed(Parser* aParser, const A
|
||||
{
|
||||
result.ascanBlockPreprocessed = ascanBlock.ascanBlock;
|
||||
}
|
||||
|
||||
//printTime();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AscanBlockPreprocessedCuda Recon::getAscanBlockPreprocessedCuda(Parser* aParser, const Aurora::Matrix& aMp, const Aurora::Matrix& aSl, const Aurora::Matrix& aSn,
|
||||
const Aurora::Matrix& aRl, const Aurora::Matrix& aRn, GeometryInfo& aGeom, const MeasurementInfo& aMeasInfo,
|
||||
bool aApplyFilter, bool aTransReco)
|
||||
{
|
||||
//std::cout<<"strart"<<std::endl;
|
||||
//printTime();
|
||||
//550ms
|
||||
AscanBlockPreprocessedCuda result;
|
||||
AscanBlock ascanBlock = getAscanBlock(aParser, aMp, aSl, aSn, aRl, aRn);
|
||||
//printTime();
|
||||
//300ms
|
||||
result.ascanBlockPreprocessed = ascanBlock.ascanBlock.toDeviceMatrix();
|
||||
result.gainBlock = ascanBlock.gainBlock.toDeviceMatrix();
|
||||
result.mpBlock = ascanBlock.mpBlock;
|
||||
result.rlBlock = ascanBlock.rlBlock;
|
||||
result.rnBlock = ascanBlock.rnBlock;
|
||||
result.slBlock = ascanBlock.slBlock;
|
||||
result.snBlock = ascanBlock.snBlock;
|
||||
GeometryBlock geometryBlock = blockingGeometryInfos(aGeom, ascanBlock.rnBlock, ascanBlock.rlBlock, ascanBlock.snBlock, ascanBlock.slBlock, ascanBlock.mpBlock);
|
||||
//printTime();
|
||||
//3ms
|
||||
result.receiverPositionBlock = geometryBlock.receiverPositionBlock;
|
||||
result.senderPositionBlock = geometryBlock.senderPositionBlock;
|
||||
if(aApplyFilter)
|
||||
{
|
||||
Matrix usedData;
|
||||
if(aTransReco)
|
||||
{
|
||||
usedData = filterTransmissionData(ascanBlock.slBlock, ascanBlock.snBlock, ascanBlock.rlBlock, ascanBlock.rnBlock,
|
||||
aGeom.sensData, geometryBlock.senderNormalBlock, geometryBlock.receiverNormalBlock);
|
||||
}
|
||||
else
|
||||
{
|
||||
usedData = filterReflectionData(geometryBlock.receiverPositionBlock, geometryBlock.senderPositionBlock, geometryBlock.senderNormalBlock, reflectParams::constrictReflectionAngles);
|
||||
}
|
||||
//printTime();
|
||||
//40ms
|
||||
CudaMatrix usedDataDevice = usedData.toDeviceMatrix();
|
||||
result.ascanBlockPreprocessed = valid(result.ascanBlockPreprocessed, usedDataDevice);
|
||||
result.mpBlock = removeDataFromArrays(ascanBlock.mpBlock, usedData);
|
||||
result.slBlock = removeDataFromArrays(ascanBlock.slBlock, usedData);
|
||||
result.snBlock = removeDataFromArrays(ascanBlock.snBlock, usedData);
|
||||
result.rlBlock = removeDataFromArrays(ascanBlock.rlBlock, usedData);
|
||||
result.rnBlock = removeDataFromArrays(ascanBlock.rnBlock, usedData);
|
||||
|
||||
result.senderPositionBlock = removeDataFromArrays(geometryBlock.senderPositionBlock, usedData);
|
||||
result.receiverPositionBlock = removeDataFromArrays(geometryBlock.receiverPositionBlock, usedData);
|
||||
result.gainBlock = valid(result.gainBlock, usedDataDevice);
|
||||
//printTime();
|
||||
//10ms
|
||||
}
|
||||
|
||||
if (ascanBlock.ascanBlock.getDataSize() > 0)
|
||||
{
|
||||
result.ascanBlockPreprocessed = preprocessAscanBlockCuda(result.ascanBlockPreprocessed, aMeasInfo);
|
||||
}
|
||||
// else
|
||||
// {
|
||||
// result.ascanBlockPreprocessed = ascanBlock.ascanBlock;
|
||||
// }
|
||||
//printTime();
|
||||
//std::cout<<"end"<<std::endl;
|
||||
return result;
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
#define GETASCANBLOCK_PREPROCESSED_H
|
||||
|
||||
#include "Matrix.h"
|
||||
#include "CudaMatrix.h"
|
||||
#include "src/common/getGeometryInfo.h"
|
||||
#include "src/common/getMeasurementMetaData.h"
|
||||
|
||||
@@ -25,6 +26,23 @@ namespace Recon
|
||||
AscanBlockPreprocessed getAscanBlockPreprocessed(Parser* aParser, const Aurora::Matrix& aMp, const Aurora::Matrix& aSl, const Aurora::Matrix& aSn,
|
||||
const Aurora::Matrix& aRl, const Aurora::Matrix& aRn, GeometryInfo& aGeom, const MeasurementInfo& aMeasInfo,
|
||||
bool aApplyFilter, bool aTransReco);
|
||||
|
||||
struct AscanBlockPreprocessedCuda
|
||||
{
|
||||
Aurora::CudaMatrix ascanBlockPreprocessed;
|
||||
Aurora::Matrix mpBlock;
|
||||
Aurora::Matrix slBlock;
|
||||
Aurora::Matrix snBlock;
|
||||
Aurora::Matrix rlBlock;
|
||||
Aurora::Matrix rnBlock;
|
||||
Aurora::Matrix senderPositionBlock;
|
||||
Aurora::Matrix receiverPositionBlock;
|
||||
Aurora::CudaMatrix gainBlock;
|
||||
};
|
||||
|
||||
AscanBlockPreprocessedCuda getAscanBlockPreprocessedCuda(Parser* aParser, const Aurora::Matrix& aMp, const Aurora::Matrix& aSl, const Aurora::Matrix& aSn,
|
||||
const Aurora::Matrix& aRl, const Aurora::Matrix& aRn, GeometryInfo& aGeom, const MeasurementInfo& aMeasInfo,
|
||||
bool aApplyFilter, bool aTransReco);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -70,13 +70,13 @@ AscanBlock Recon::getAscanBlock(Parser* aParser, const Aurora::Matrix& aMp, cons
|
||||
for(int slIndex=0; slIndex<aSl.getDataSize();++slIndex)
|
||||
{
|
||||
OneTasAScanData oneTasData = aParser->getOneTasAscanDataOfMotorPosition(aSl[slIndex]);
|
||||
|
||||
#pragma omp parallel for
|
||||
for(int snIndex=0; snIndex<aSn.getDataSize();++snIndex)
|
||||
{
|
||||
//int mapperIndex = 0;
|
||||
#pragma omp parallel for
|
||||
//#pragma omp parallel for
|
||||
for(int rlIndex=0; rlIndex<aRl.getDataSize();++rlIndex)
|
||||
{
|
||||
{
|
||||
for(int rnIndex=0; rnIndex<aRn.getDataSize(); ++rnIndex)
|
||||
{
|
||||
size_t mapperIndex = rnIndex + rlIndex*aRn.getDataSize();
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
namespace Recon
|
||||
{
|
||||
const std::string DEFAULT_CONFIG_PATH = "/home/UR/ConfigFiles/";
|
||||
const std::string DEFAULT_OUTPUT_PATH = "/home/UR/ReconResult/USCT_Result.mat";
|
||||
const std::string DEFAULT_OUTPUT_FILENAME = "USCT_Result.mat";
|
||||
const std::string DEFAULT_OUTPUT_PATH = "/home/UR/ReconResult/";
|
||||
const std::string DEFAULT_OUTPUT_FILENAME = "sun.mat";
|
||||
|
||||
std::string getPath(const std::string &aFullPath);
|
||||
bool endsWithMat(const std::string &aStr);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "preprocessAscanBlock.h"
|
||||
#include "Function1D.h"
|
||||
#include "Function1D.cuh"
|
||||
#include <cstddef>
|
||||
|
||||
Aurora::Matrix Recon::preprocessAscanBlock(const Aurora::Matrix& aAscans, const MeasurementInfo& aMeasInfo)
|
||||
@@ -20,4 +21,14 @@ Aurora::Matrix Recon::preprocessAscanBlock(const Aurora::Matrix& aAscans, const
|
||||
// end
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Aurora::CudaMatrix Recon::preprocessAscanBlockCuda(const Aurora::CudaMatrix& aAscans, const MeasurementInfo& aMeasInfo)
|
||||
{
|
||||
if(aMeasInfo.ascanDataType == "float16")
|
||||
{
|
||||
return Aurora::convertfp16tofloatCuda(aAscans, aAscans.getDimSize(0), aAscans.getDimSize(1));
|
||||
}
|
||||
|
||||
return aAscans;
|
||||
}
|
||||
@@ -7,6 +7,8 @@
|
||||
namespace Recon
|
||||
{
|
||||
Aurora::Matrix preprocessAscanBlock(const Aurora::Matrix& aAscans, const MeasurementInfo& aMeasInfo);
|
||||
|
||||
Aurora::CudaMatrix preprocessAscanBlockCuda(const Aurora::CudaMatrix& aAscans, const MeasurementInfo& aMeasInfo);
|
||||
}
|
||||
|
||||
#endif
|
||||
44
src/main.cxx
44
src/main.cxx
@@ -3,11 +3,17 @@
|
||||
#include "config/config.h"
|
||||
#include "log/notify.h"
|
||||
#include "log/notify.h"
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include "MatlabReader.h"
|
||||
#define EIGEN_USE_MKL_ALL
|
||||
|
||||
#include "src/common/fileHelper.h"
|
||||
#include "startReconstructions.h"
|
||||
|
||||
#include "transmissionReconstruction/detection/detection.h"
|
||||
#include "transmissionReconstruction/detection/detection.cuh"
|
||||
#include "startReconstructions.h"
|
||||
#include "log/log.h"
|
||||
/* 0 is data path.
|
||||
1 is dataRef path.
|
||||
@@ -20,8 +26,8 @@ int main(int argc, char *argv[])
|
||||
int argNum = 5;
|
||||
std::vector<std::string> args(argNum);
|
||||
args[0] = "";
|
||||
args[1] = "";
|
||||
args[2] = "";
|
||||
args[1] = "/home/sun/20230418T145123/";
|
||||
args[2] = "/home/sun/20230418T141000/";
|
||||
args[3] = Recon::DEFAULT_OUTPUT_PATH;
|
||||
args[4] = Recon::DEFAULT_CONFIG_PATH;
|
||||
argc = argc <= argNum? argc-1 : argNum;
|
||||
@@ -29,30 +35,29 @@ int main(int argc, char *argv[])
|
||||
{
|
||||
args[i] = argv[i+1];
|
||||
}
|
||||
if(args[0].empty())
|
||||
{
|
||||
printf("No recon id.");
|
||||
return -1;
|
||||
}
|
||||
std::string outPutPath = args[3];
|
||||
std::string directoryPath = outPutPath;
|
||||
auto defaultLogger = getLogger("Main",outPutPath.data());
|
||||
spdlog::set_default_logger(defaultLogger);
|
||||
std::string ReconID = args[0];
|
||||
ReconID = ReconID=="none"?"":ReconID;
|
||||
RECON_INFO("Read UR Args =====================");
|
||||
RECON_INFO("ReconID:{0}",ReconID);
|
||||
if(args[1].empty())
|
||||
{
|
||||
printf("No reconstruction data.");
|
||||
RECON_INFO("No reconstruction data.");
|
||||
return -2;
|
||||
}
|
||||
std::string configPath = Recon::fixPathSlash(args[4]);
|
||||
Recon::initalizeConfig(configPath);
|
||||
if( args[2].empty() && Recon::transParams::runTransmissionReco)
|
||||
{
|
||||
printf("Running transmission reconstruction, but no refrence data.");
|
||||
RECON_INFO("Running transmission reconstruction, but no refrence data.");
|
||||
return -3;
|
||||
}
|
||||
RECON_INFO("configPath:{0}",configPath);
|
||||
|
||||
std::string outPutPath = args[3];
|
||||
std::string directoryPath = outPutPath;
|
||||
auto defaultLogger = getLogger("Main",outPutPath.data());
|
||||
spdlog::set_default_logger(defaultLogger);
|
||||
|
||||
|
||||
outPutPath = Recon::fixPathSlash(outPutPath);
|
||||
|
||||
if(!Recon::isDirectory(directoryPath))
|
||||
@@ -60,15 +65,18 @@ int main(int argc, char *argv[])
|
||||
RECON_INFO("Output directory is not valid.");
|
||||
return -4;
|
||||
}
|
||||
|
||||
RECON_INFO("outPutPath:{0}",directoryPath);
|
||||
std::string dataPath = Recon::fixPathSlash(args[1]);
|
||||
RECON_INFO("dataPath:{0}",dataPath);
|
||||
std::string dataRefPath = Recon::fixPathSlash(args[2]);
|
||||
RECON_INFO("start");
|
||||
RECON_INFO("dataRefPath:{0}",dataRefPath);
|
||||
RECON_INFO("UR Args End=======================");
|
||||
RECON_INFO("UR Start");
|
||||
Recon::notifyStart(ReconID);
|
||||
int exitcode = Recon::startReconstructions(dataPath, dataRefPath, outPutPath);
|
||||
int exitcode = Recon::startReconstructions(dataPath, dataRefPath, outPutPath+Recon::DEFAULT_OUTPUT_FILENAME);
|
||||
if (exitcode == 0)
|
||||
{
|
||||
RECON_INFO("finish");
|
||||
RECON_INFO("UR Finish");
|
||||
if (!Recon::notifyFinish()) {
|
||||
RECON_ERROR("Notify Finish failed!");
|
||||
return -100;
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
#include "startReflectionReconstruction.h"
|
||||
#include "Function.h"
|
||||
#include "Function2D.h"
|
||||
#include "Function3D.h"
|
||||
#include "Matrix.h"
|
||||
#include "MatlabWriter.h"
|
||||
|
||||
#include "common/getGeometryInfo.h"
|
||||
#include "common/precalculateChannelList.h"
|
||||
#include "common/dataBlockCreation/getAScanBlockPreprocessed.h"
|
||||
#include "common/dataBlockCreation/removeDataFromArrays.h"
|
||||
#include "log/notify.h"
|
||||
#include "reflectionReconstruction/preprocessData/determineOptimalPulse.h"
|
||||
#include "reflectionReconstruction/reconstructionSAFT/reconstructionSAFT.h"
|
||||
#include "src/reflectionReconstruction/preprocessData/preprocessAScanBlockForReflection.h"
|
||||
#include "config/config.h"
|
||||
#include "log/log.h"
|
||||
|
||||
#include "CudaEnvInit.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
using namespace Aurora;
|
||||
using namespace Recon;
|
||||
|
||||
namespace
|
||||
{
|
||||
std::queue<preprocessAScanRResult> PRODUCER_PROCESSDATAS;
|
||||
std::queue<AscanBlockPreprocessed> PRODUCER_BLOCKDATAS;
|
||||
std::mutex PRODUCER_MUTEX;
|
||||
std::condition_variable PRODUCER_CONDITION;
|
||||
std::mutex CUSTOMER_MUTEX;
|
||||
std::condition_variable CUSTOMER_CONDITION;
|
||||
}
|
||||
|
||||
void producerThread( Parser* aParser, const Aurora::Matrix& aMotorPos,
|
||||
const Aurora::Matrix& aSlList, const Aurora::Matrix& aSnList,
|
||||
const Aurora::Matrix& aRlList, const Aurora::Matrix& aRnList,
|
||||
GeometryInfo& aGeom, MeasurementInfo& aExpInfo, PreComputes& aPreComputes)
|
||||
{
|
||||
if(reflectParams::useOptPulse==1 && reflectParams::runReflectionReco)
|
||||
{
|
||||
aPreComputes.sincPeak_ft = determineOptimalPulse(aPreComputes.timeInterval, aExpInfo.expectedAScanLength);
|
||||
}
|
||||
printf(" - channel list");
|
||||
auto channelList = precalculateChannelList(aRlList, aRnList, aExpInfo, aPreComputes);
|
||||
size_t numScans = aMotorPos.getDataSize() * aSlList.getDataSize() *
|
||||
aSnList.getDataSize() * aRlList.getDataSize() *
|
||||
aRnList.getDataSize();
|
||||
int numTakenScans = 0,numProcessedScans = 0,numPossibleScans = 0;
|
||||
for(int i=0; i<aMotorPos.getDataSize(); ++i)
|
||||
{
|
||||
//#pragma omp parallel for num_threads(24)
|
||||
for(int j=0; j<aSlList.getDataSize() / transParams::senderTASSize; ++j)
|
||||
{
|
||||
for(int k=0; k<aSnList.getDataSize() / transParams::senderElementSize; ++k)
|
||||
{
|
||||
Matrix mp = aMotorPos(i).toMatrix();
|
||||
Matrix sl = aSlList.block(0, transParams::senderTASSize*j, transParams::senderTASSize*j+transParams::senderTASSize - 1);
|
||||
Matrix sn = aSnList.block(0, transParams::senderElementSize*k, transParams::senderElementSize*k+transParams::senderElementSize - 1);
|
||||
auto blockData = getAscanBlockPreprocessed(aParser, mp, sl, sn, aRlList, aRnList, aGeom, aExpInfo, true, false);
|
||||
|
||||
float* channelListSizeData = Aurora::malloc(2);
|
||||
channelListSizeData[0] = channelList.getDimSize(0);
|
||||
channelListSizeData[1] = channelList.getDimSize(1);
|
||||
Matrix channelListSize = Matrix::New(channelListSizeData, 2, 1);
|
||||
Matrix ind = sub2ind(channelListSize, {blockData.rlBlock, blockData.rnBlock});
|
||||
size_t channelBlockSize = ind.getDataSize();
|
||||
float* channelBlockData = Aurora::malloc(channelBlockSize);
|
||||
for(size_t i=0; i<channelBlockSize; ++i)
|
||||
{
|
||||
channelBlockData[i] = channelList[ind[i] - 1];
|
||||
}
|
||||
Matrix channelBlock = Matrix::New(channelBlockData, 1, channelBlockSize);
|
||||
RECON_INFO("start cpu---------preprocessAScanBlockForReflection");
|
||||
auto preprocessData = preprocessAScanBlockForReflection(blockData.ascanBlockPreprocessed, blockData.mpBlock, blockData.slBlock,
|
||||
blockData.snBlock, blockData.rlBlock, blockData.rnBlock, blockData.senderPositionBlock,
|
||||
blockData.receiverPositionBlock, blockData.gainBlock, channelBlock, aExpInfo, aPreComputes);
|
||||
PRODUCER_BLOCKDATAS.push(blockData);
|
||||
PRODUCER_PROCESSDATAS.push(preprocessData);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Aurora::Matrix Recon::startReflectionReconstruction( Parser* aParser, int aSAFT_mode, const Aurora::Matrix& aMotorPos,
|
||||
const Aurora::Matrix& aSlList, const Aurora::Matrix& aSnList,
|
||||
const Aurora::Matrix& aRlList, const Aurora::Matrix& aRnList,
|
||||
GeometryInfo& aGeom, TransRecos& aTransRecos,
|
||||
MeasurementInfo& aExpInfo, PreComputes& aPreComputes)
|
||||
{
|
||||
printf("Reflection reconstruction is carried out.");
|
||||
|
||||
printf("Preperations for reconstructions.");
|
||||
|
||||
printf(" - reset GPUs");
|
||||
for (size_t i = 0; i < reflectParams::gpuSelectionList.getDataSize(); i++)
|
||||
{
|
||||
std::string msg;
|
||||
if (!resetGPUDevice((int)reflectParams::gpuSelectionList[i],msg))
|
||||
{
|
||||
std::cerr<<msg<<std::endl;
|
||||
}
|
||||
}
|
||||
std::thread thread = std::thread(producerThread, aParser, aMotorPos, aSlList, aSnList, aRlList, aRnList, aGeom, aExpInfo, aPreComputes);
|
||||
|
||||
Matrix Env = Aurora::zeros((int)reflectParams::imageXYZ[0],(int)reflectParams::imageXYZ[1],(int)reflectParams::imageXYZ[2]);
|
||||
|
||||
|
||||
|
||||
|
||||
for(int i=0; i<aMotorPos.getDataSize(); ++i)
|
||||
{
|
||||
//#pragma omp parallel for num_threads(24)
|
||||
for(int j=0; j<aSlList.getDataSize() / transParams::senderTASSize; ++j)
|
||||
{
|
||||
for(int k=0; k<aSnList.getDataSize() / transParams::senderElementSize; ++k)
|
||||
{
|
||||
RECON_INFO("start gpu---------recontructSAFT");
|
||||
Env = recontructSAFT(removeDataFromArrays(preprocessData.AscanBlock, preprocessData.usedData),
|
||||
removeDataFromArrays(blockData.senderPositionBlock, preprocessData.usedData),
|
||||
removeDataFromArrays(blockData.receiverPositionBlock, preprocessData.usedData),
|
||||
removeDataFromArrays(blockData.mpBlock, preprocessData.usedData),
|
||||
aSAFT_mode, aTransRecos, Env);
|
||||
std::cout<<Env[0]<<"-" << Env[1] <<"-" << Env[2] <<"-" << Env[3]<<std::endl;
|
||||
RECON_INFO("Reflection Reconstructon: " + std::to_string(j));
|
||||
}
|
||||
//Recon::notifyProgress(25+73*((j*i)/(aMotorPos.getDataSize() * aSlList.getDataSize())));
|
||||
}
|
||||
}
|
||||
|
||||
return Env;
|
||||
}
|
||||
@@ -96,7 +96,7 @@ int Recon::startReconstructions(const std::string& aDataPath, const std::string&
|
||||
TempInfo tempRef;
|
||||
CEInfo ceRef;
|
||||
Matrix transformationMatricesRef;
|
||||
Recon::notifyProgress(1);
|
||||
//Recon::notifyProgress(1);
|
||||
if(transParams::runTransmissionReco)
|
||||
{
|
||||
expInfoRef = loadMeasurementInfos(&refParser);
|
||||
@@ -129,7 +129,7 @@ int Recon::startReconstructions(const std::string& aDataPath, const std::string&
|
||||
transformationMatricesRef = Matrix();
|
||||
motorPosAvailableRef = Matrix();
|
||||
}
|
||||
Recon::notifyProgress(2);
|
||||
//Recon::notifyProgress(2);
|
||||
if(!ce.ce.isNull() && !ceRef.ce.isNull())
|
||||
{
|
||||
Matrix isEqual = (ce.ce == ceRef.ce);
|
||||
@@ -156,12 +156,12 @@ int Recon::startReconstructions(const std::string& aDataPath, const std::string&
|
||||
preComputes.matchedFilter = createMatchedFilter(ce.ceRef, ce.measuredCEUsed, reflectParams::findDefects, reconParams::removeOutliersFromCEMeasured, expInfo.Hardware);
|
||||
}
|
||||
}
|
||||
Recon::notifyProgress(3);
|
||||
//Recon::notifyProgress(3);
|
||||
if(expInfo.sampleRate != reflectParams::aScanReconstructionFrequency)
|
||||
{
|
||||
reflectParams::expectedAScanDataLength = ceil(expInfo.numberSamples * ((float)reflectParams::aScanReconstructionFrequency / expInfo.sampleRate));
|
||||
}
|
||||
Recon::notifyProgress(4);
|
||||
//Recon::notifyProgress(4);
|
||||
TransmissionReconstructionResult transmissionResult;
|
||||
bool sosAvailable = false;
|
||||
bool attAvailable = false;
|
||||
@@ -220,7 +220,7 @@ int Recon::startReconstructions(const std::string& aDataPath, const std::string&
|
||||
|
||||
GeometryInfo geomRef = getGeometryInfo(motorPosAvailableRef, transformationMatricesRef, rlList, rnList, slList, snList);
|
||||
RECON_INFO("Start transmissionRecostruction.");
|
||||
Recon::notifyProgress(5);
|
||||
//Recon::notifyProgress(5);
|
||||
transmissionResult = startTransmissionReconstruction(mp_inter, mpRef_inter, slList_inter, snList_inter, rlList_inter, rnList_inter, temp, tempRef, geom, geomRef, expInfo, expInfoRef, preComputes, &dataParser, &refParser);
|
||||
attAvailable = true;
|
||||
sosAvailable = true;
|
||||
@@ -235,11 +235,11 @@ int Recon::startReconstructions(const std::string& aDataPath, const std::string&
|
||||
Matrix recoSOS = transmissionResult.recoSOS;
|
||||
Matrix recoATT = transmissionResult.recoATT;
|
||||
precalcImageParameters(geom);
|
||||
Recon::notifyProgress(21);
|
||||
//Recon::notifyProgress(21);
|
||||
//检测可使用内存是否足够,输出警报用,todo
|
||||
//checkEnvAndMemory(reflectParams.imageInfos.IMAGE_XYZ);
|
||||
auto preProcessData = preprocessTransmissionReconstructionForReflection(recoSOS, recoATT, transmissionResult.ddmis, geom, temp);
|
||||
Recon::notifyProgress(22);
|
||||
//Recon::notifyProgress(22);
|
||||
Matrix mp_inter = intersect(motorPosAvailable, reflectParams::motorPos);
|
||||
Matrix slList_inter = intersect(slList, reflectParams::senderTasList);
|
||||
Matrix snList_inter = intersect(snList, reflectParams::senderElementList);
|
||||
@@ -252,12 +252,12 @@ int Recon::startReconstructions(const std::string& aDataPath, const std::string&
|
||||
preComputes.offset = estimateOffset(expInfo, ce, preComputes.matchedFilter);
|
||||
|
||||
reflectParams::gpuSelectionList = reconParams::gpuSelectionList;
|
||||
Recon::notifyProgress(25);
|
||||
//Recon::notifyProgress(25);
|
||||
RECON_INFO("Start reflectionRecostruction.");
|
||||
Matrix env = startReflectionReconstruction(&dataParser, preProcessData.saftMode, mp_inter, slList_inter, snList_inter, rlList_inter, rnList_inter, geom, preProcessData.transRecos, expInfo, preComputes);
|
||||
writer.setMatrix(env, "reflect");
|
||||
//exporter.exportDICOM(env, Recon::DICOMExporter::REFL);
|
||||
Recon::notifyProgress(99);
|
||||
//Recon::notifyProgress(99);
|
||||
}
|
||||
writer.write();
|
||||
return 0;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
namespace Recon
|
||||
{
|
||||
Aurora::Matrix distanceBetweenTwoPoints(Aurora::Matrix aPtsA, Aurora::Matrix aPtsB)
|
||||
Aurora::Matrix distanceBetweenTwoPoints(const Aurora::Matrix& aPtsA, const Aurora::Matrix& aPtsB)
|
||||
{
|
||||
return Aurora::sqrt(Aurora::sum((aPtsA-aPtsB)^2));
|
||||
}
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
#include "Matrix.h"
|
||||
namespace Recon {
|
||||
|
||||
Aurora::Matrix distanceBetweenTwoPoints(Aurora::Matrix aMPtsA,
|
||||
Aurora::Matrix aMPtsB);
|
||||
Aurora::Matrix distanceBetweenTwoPoints(const Aurora::Matrix& aMPtsA,
|
||||
const Aurora::Matrix& aMPtsB);
|
||||
|
||||
Aurora::Matrix calculateWaterTemperature(Aurora::Matrix aMWaterTempS,
|
||||
Aurora::Matrix aMWaterTempR,
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "common/getMeasurementMetaData.h"
|
||||
#include "config/config.h"
|
||||
#include "calculateBankDetectAndHilbertTransformation.hpp"
|
||||
#include "transmissionReconstruction/detection/detection.cuh"
|
||||
|
||||
using namespace Aurora;
|
||||
namespace Recon {
|
||||
@@ -414,34 +415,39 @@ namespace Recon {
|
||||
return result;
|
||||
}
|
||||
|
||||
DetectResult transmissionDetection(const Aurora::Matrix &AscanBlock,
|
||||
const Aurora::Matrix &AscanRefBlock,
|
||||
const Aurora::Matrix &distBlock,
|
||||
const Aurora::Matrix &distRefBlock,
|
||||
DetectResult transmissionDetection(const Aurora::CudaMatrix &AscanBlock,
|
||||
const Aurora::CudaMatrix &AscanRefBlock,
|
||||
const Aurora::CudaMatrix &distBlock,
|
||||
const Aurora::CudaMatrix &distRefBlock,
|
||||
const Aurora::Matrix &sosWaterBlock,
|
||||
const Aurora::Matrix &sosWaterRefBlock,
|
||||
float expectedSOSWater) {
|
||||
auto _sosWaterBlock = temperatureToSoundSpeed(sosWaterBlock, "marczak");
|
||||
auto _sosWaterRefBlock = temperatureToSoundSpeed(sosWaterRefBlock, "marczak");
|
||||
auto _sosWaterBlock = temperatureToSoundSpeed(sosWaterBlock, "marczak").toDeviceMatrix();
|
||||
auto _sosWaterRefBlock = temperatureToSoundSpeed(sosWaterRefBlock, "marczak").toDeviceMatrix();
|
||||
switch (Recon::transParams::version) {
|
||||
case 1: {
|
||||
return detectTofAndAttMex(
|
||||
AscanBlock, AscanRefBlock, distBlock, distRefBlock,
|
||||
_sosWaterBlock, _sosWaterRefBlock, Recon::transParams::resampleFactor, Recon::transParams::nThreads,
|
||||
expectedSOSWater, Recon::transParams::useTimeWindowing,
|
||||
Recon::transParams::aScanReconstructionFrequency, Recon::transParams::detectionWindowATT,
|
||||
Recon::transParams::offsetElectronic, Recon::transParams::detectionWindowSOS, Recon::transParams::minSpeedOfSound,
|
||||
Recon::transParams::maxSpeedOfSound, Recon::transParams::gaussWindow);
|
||||
}
|
||||
case 2:
|
||||
// case 1: {
|
||||
// return detectTofAndAttMex(
|
||||
// AscanBlock, AscanRefBlock, distBlock, distRefBlock,
|
||||
// _sosWaterBlock, _sosWaterRefBlock, Recon::transParams::resampleFactor, Recon::transParams::nThreads,
|
||||
// expectedSOSWater, Recon::transParams::useTimeWindowing,
|
||||
// Recon::transParams::aScanReconstructionFrequency, Recon::transParams::detectionWindowATT,
|
||||
// Recon::transParams::offsetElectronic, Recon::transParams::detectionWindowSOS, Recon::transParams::minSpeedOfSound,
|
||||
// Recon::transParams::maxSpeedOfSound, Recon::transParams::gaussWindow);
|
||||
// }
|
||||
// case 2:
|
||||
default:
|
||||
return detectTofAndAtt(
|
||||
auto r = detectTofAndAtt(
|
||||
AscanBlock, AscanRefBlock, distBlock, distRefBlock,
|
||||
_sosWaterBlock, _sosWaterRefBlock, Recon::transParams::resampleFactor, Recon::transParams::nThreads,
|
||||
expectedSOSWater, Recon::transParams::useTimeWindowing,
|
||||
Recon::transParams::aScanReconstructionFrequency, Recon::transParams::detectionWindowATT,
|
||||
Recon::transParams::offsetElectronic, Recon::transParams::detectionWindowSOS, Recon::transParams::minSpeedOfSound,
|
||||
Recon::transParams::maxSpeedOfSound, Recon::transParams::gaussWindow);
|
||||
DetectResult ret;
|
||||
ret.att = r.att.toHostMatrix();
|
||||
ret.sosValue = r.sosValue.toHostMatrix();
|
||||
ret.tof = r.tof.toHostMatrix();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
323
src/transmissionReconstruction/detection/detection.cu
Normal file
323
src/transmissionReconstruction/detection/detection.cu
Normal file
@@ -0,0 +1,323 @@
|
||||
#include "CudaMatrix.h"
|
||||
#include "detection.cuh"
|
||||
#include <math.h>
|
||||
#include <thrust/transform.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include "Function1D.cuh"
|
||||
#include "Function2D.cuh"
|
||||
#include "Function3D.h"
|
||||
|
||||
__global__ void copyMatrixKernel(float* aStartPos1, float* aEndPos1,
|
||||
float* aStartPos2, float* aEndPos2,
|
||||
float* aIn1, float* aOut1,
|
||||
float* aIn2, float* aOut2,
|
||||
unsigned int aColElements){
|
||||
unsigned int i = blockIdx.x;
|
||||
unsigned int base_offset = i * aColElements;
|
||||
unsigned int j = aStartPos1[i]-1+threadIdx.x;
|
||||
for (int offset = 0; j+offset<aEndPos1[i]; offset+=blockDim.x) {
|
||||
aOut1[j + base_offset + offset] = aIn1[j + base_offset + offset];
|
||||
}
|
||||
j = aStartPos2[i]-1+threadIdx.x;
|
||||
for (int offset = 0; j+offset<aEndPos2[i]; offset+=blockDim.x) {
|
||||
aOut2[j + base_offset + offset] = aIn2[j + base_offset + offset];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
__global__ void setPosKernel(float* aStartPos1, float* aEndPos1,
|
||||
float* aStartPos2, float* aEndPos2,
|
||||
float* tof, float* tof2,
|
||||
float* sizeAscan,
|
||||
int sampleRate,
|
||||
float offsetElectronicSamples,
|
||||
int detectionWindowATT)
|
||||
{
|
||||
unsigned int i = blockIdx.x;
|
||||
aStartPos1[i] = floor(fmaxf(tof[i] * sampleRate + offsetElectronicSamples, (float)1.0));
|
||||
aEndPos1[i] = ceilf(fminf(sizeAscan[0], tof[i] * sampleRate + offsetElectronicSamples +
|
||||
detectionWindowATT));
|
||||
aStartPos2[i] = floorf(fmaxf(tof2[i], 1.0f));
|
||||
aEndPos2[i] = ceilf(fminf(sizeAscan[1], tof2[i] + detectionWindowATT));
|
||||
}
|
||||
|
||||
|
||||
CudaMatrix Recon::calculateAttenuationCuda(const CudaMatrix &ascans,
|
||||
const CudaMatrix &startPos,
|
||||
const CudaMatrix &endPos,
|
||||
const CudaMatrix &ascansRef,
|
||||
const CudaMatrix &startPosRef,
|
||||
const CudaMatrix &endPosRef){
|
||||
auto ascans2 = Aurora::zerosCuda(ascans.getDimSize(0), ascans.getDimSize(1));
|
||||
auto ascansRef2 = Aurora::zerosCuda(ascansRef.getDimSize(0), ascansRef.getDimSize(1));
|
||||
copyMatrixKernel<<<ascans2.getDimSize(1),256>>>(startPos.getData(),endPos.getData(),
|
||||
startPosRef.getData(), endPosRef.getData(),
|
||||
ascans.getData(), ascans2.getData(),
|
||||
ascansRef.getData(), ascansRef2.getData(),
|
||||
ascans2.getDimSize(0));
|
||||
|
||||
auto pulseEnergy = Aurora::sum(ascans2^2);
|
||||
auto pulseEnergyEmpty = Aurora::sum(ascansRef2^2);
|
||||
|
||||
return Aurora::log(pulseEnergyEmpty/pulseEnergy);
|
||||
}
|
||||
|
||||
CudaMatrix
|
||||
Recon::detectAttVectorizedCuda(const CudaMatrix &Ascan, const CudaMatrix &AscanRef,
|
||||
const CudaMatrix &distRef,
|
||||
const CudaMatrix &sosWaterRef,
|
||||
const CudaMatrix &tof, int aScanReconstructionFrequency,
|
||||
float offsetElectronic, int detectionWindowATT) {
|
||||
auto sizeAscan = size(Ascan);
|
||||
|
||||
auto sampleRate = aScanReconstructionFrequency;
|
||||
float offsetElectronicSamples = offsetElectronic * sampleRate;
|
||||
|
||||
auto envelopeOfAScan = Aurora::abs(Aurora::hilbert(Ascan));
|
||||
auto envelopeOfReferenceAScan = Aurora::abs(hilbert(AscanRef));
|
||||
auto tof2 = (distRef / sosWaterRef) * sampleRate + offsetElectronicSamples;
|
||||
|
||||
auto startPos = zerosCuda(Ascan.getDimSize(1), 1);
|
||||
auto endPos = zerosCuda(Ascan.getDimSize(1), 1);
|
||||
auto startPosRef = zerosCuda(Ascan.getDimSize(1), 1);
|
||||
auto endPosRef = zerosCuda(Ascan.getDimSize(1), 1);
|
||||
|
||||
setPosKernel<<<Ascan.getDimSize(1),1>>>(startPos.getData(),
|
||||
endPos.getData(), startPosRef.getData(), endPosRef.getData(),
|
||||
tof.getData(), tof2.getData(), sizeAscan.getData(), sampleRate,
|
||||
offsetElectronicSamples,detectionWindowATT);
|
||||
return calculateAttenuationCuda(envelopeOfAScan, startPos, endPos,
|
||||
envelopeOfReferenceAScan, startPosRef,
|
||||
endPosRef);
|
||||
}
|
||||
|
||||
CudaMatrix Recon::calculateSOSOffset(const CudaMatrix &sosBlock, float referenceSOS, const CudaMatrix &distBlock, float sampleRate){
|
||||
auto offset = (distBlock / sosBlock - distBlock / referenceSOS) * sampleRate;
|
||||
return offset;
|
||||
}
|
||||
|
||||
Recon::SearchPositionC Recon::calculateStarEndSearchPosition(const CudaMatrix &aVDistBlock,
|
||||
float minSpeedOfSound, float maxSpeedOfSound,
|
||||
float sampleRate, float maxSample,
|
||||
const CudaMatrix &aVSosOffsetBlock,
|
||||
float startOffset, float segmentLenOffset)
|
||||
{
|
||||
auto startSearch = floor((aVDistBlock / maxSpeedOfSound)*sampleRate+aVSosOffsetBlock+startOffset);
|
||||
auto lambda = [=] __device__ (const float& v){
|
||||
float a = (uint)(v);
|
||||
if(a<1) return 1.0f;
|
||||
else if(a>maxSample) return maxSample;
|
||||
return a;
|
||||
};
|
||||
thrust::transform(thrust::device, startSearch.getData(),startSearch.getData()+startSearch.getDataSize(),
|
||||
startSearch.getData(),lambda);
|
||||
|
||||
auto endSearch = ceil(aVDistBlock/minSpeedOfSound*sampleRate+aVSosOffsetBlock+startOffset+segmentLenOffset);
|
||||
|
||||
thrust::transform(thrust::device, endSearch.getData(),endSearch.getData()+endSearch.getDataSize(),
|
||||
endSearch.getData(),lambda);
|
||||
Recon::SearchPositionC result;
|
||||
result.startSearch = startSearch;
|
||||
result.endSearch = endSearch;
|
||||
return result;
|
||||
}
|
||||
|
||||
__global__ void copyMatrixKernel(float* aStartPos1, float* aEndPos1,
|
||||
float* aIn1, float* aOut1,
|
||||
unsigned int aColElements){
|
||||
unsigned int i = blockIdx.x;
|
||||
unsigned int base_offset = i * aColElements;
|
||||
unsigned int j = aStartPos1[i]-1+threadIdx.x;
|
||||
for (int offset = 0; j+offset<aEndPos1[i]; offset+=blockDim.x) {
|
||||
aOut1[j + base_offset + offset] = aIn1[j + base_offset + offset];
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float getGuassValue(int aWidth,int aIndex){
|
||||
float v = (-5.0f+(10.0f/(float)(aWidth-1))*aIndex);
|
||||
return exp(-0.1f*(v*v));
|
||||
}
|
||||
|
||||
__global__ void guassWindowKernel(float* aStartSearch,float* aEndSearch,
|
||||
float* aIn1, float* aOut1, float* aExpectedPosWater,
|
||||
unsigned int aColElements){
|
||||
__shared__ unsigned int windowWidth;
|
||||
__shared__ unsigned int base_offset;
|
||||
__shared__ unsigned int j;
|
||||
__shared__ unsigned int begin;
|
||||
__shared__ unsigned int end ;
|
||||
|
||||
if(threadIdx.x == 0){
|
||||
windowWidth = aEndSearch[blockIdx.x]-aStartSearch[blockIdx.x];
|
||||
base_offset = blockIdx.x * aColElements;
|
||||
j = aStartSearch[blockIdx.x]-1+threadIdx.x;
|
||||
begin = roundf(aExpectedPosWater[blockIdx.x])- roundf(windowWidth/2)-1;
|
||||
end = roundf(aExpectedPosWater[blockIdx.x])-roundf(windowWidth/2)+windowWidth-1;
|
||||
}
|
||||
|
||||
for (int offset = 0; j+offset<aEndSearch[blockIdx.x]; offset+=blockDim.x) {
|
||||
unsigned int threadAccIdx = j + offset;
|
||||
if(threadAccIdx>=begin && threadAccIdx<end){
|
||||
aOut1[threadAccIdx + base_offset] = aIn1[threadAccIdx + base_offset]*getGuassValue(windowWidth, threadAccIdx - begin);
|
||||
}
|
||||
else{
|
||||
aOut1[threadAccIdx + base_offset] = aIn1[threadAccIdx + base_offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Recon::TimeWindowResultC Recon::applyTimeWindowing(const Aurora::CudaMatrix &AscanBlock, float sampleRate,
|
||||
const Aurora::CudaMatrix &distBlock, const Aurora::CudaMatrix &sosBlock,
|
||||
float expectedSOSWater, float startOffset, float segmentLenOffset,
|
||||
float minSpeedOfSound, float maxSpeedOfSound, bool gaussWindow)
|
||||
{
|
||||
auto sosOffset = calculateSOSOffset(sosBlock, expectedSOSWater, distBlock, sampleRate);
|
||||
|
||||
auto calcResult = calculateStarEndSearchPosition(distBlock, minSpeedOfSound, maxSpeedOfSound, sampleRate,AscanBlock.getDimSize(0), sosOffset, startOffset, segmentLenOffset);
|
||||
|
||||
auto AscanBlockProcessed = zerosCuda(AscanBlock.getDimSize(0),AscanBlock.getDimSize(1));
|
||||
|
||||
if(gaussWindow)
|
||||
{
|
||||
auto expectedPosWater = (distBlock / expectedSOSWater) * sampleRate + startOffset;
|
||||
guassWindowKernel<<<AscanBlock.getDimSize(1),256>>>(calcResult.startSearch.getData(),
|
||||
calcResult.endSearch.getData(), AscanBlock.getData(), AscanBlockProcessed.getData(),
|
||||
expectedPosWater.getData(), AscanBlock.getDimSize(0));
|
||||
}
|
||||
else{
|
||||
copyMatrixKernel<<<AscanBlock.getDimSize(1),256>>>(calcResult.startSearch.getData(),
|
||||
calcResult.endSearch.getData(),AscanBlock.getData(),AscanBlockProcessed.getData(),
|
||||
AscanBlock.getDimSize(0));
|
||||
}
|
||||
Recon::TimeWindowResultC result;
|
||||
result.startSearch = calcResult.startSearch;
|
||||
result.AscanBlockProcessed = AscanBlockProcessed;
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ struct KVPair{
|
||||
__device__ KVPair(){}
|
||||
__device__ KVPair(int aIndex,float aValue):index(aIndex),value(aValue){};
|
||||
__device__ KVPair(const KVPair& other){
|
||||
index = other.index;
|
||||
value = other.value;
|
||||
}
|
||||
int index;
|
||||
float value;
|
||||
};
|
||||
|
||||
__device__ KVPair maxKV(const KVPair& x, const KVPair& y){
|
||||
return x.value>y.value ? x:y;
|
||||
};
|
||||
|
||||
__global__ void findMaxIndexKernel(float* aInputData, unsigned int aColSize,float* aOutput, int aMaxlag){
|
||||
//确定每个thread的index
|
||||
unsigned int idx = blockIdx.x * aColSize + threadIdx.x;
|
||||
__shared__ KVPair shared_data[256];
|
||||
// 每个线程加载一个元素到共享内存
|
||||
shared_data[threadIdx.x] = (threadIdx.x< aColSize) ? KVPair(threadIdx.x,aInputData[idx]) : KVPair(0,-FLT_MAX);
|
||||
__syncthreads();
|
||||
// 每个线程规约自己的分段,将每个blockDim.x的值规约到数组最前面一段
|
||||
for (int offset = blockDim.x; offset<aColSize; offset+=blockDim.x) {
|
||||
if(threadIdx.x + offset<aColSize){
|
||||
shared_data[threadIdx.x] = maxKV(shared_data[threadIdx.x], KVPair(threadIdx.x+offset, aInputData[idx + offset]));
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
// 规约最前面一段
|
||||
for (int i = blockDim.x/2; i >0; i>>=1) {
|
||||
|
||||
if (threadIdx.x < i) {
|
||||
shared_data[threadIdx.x] = maxKV(shared_data[threadIdx.x], shared_data[i + threadIdx.x]);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// 第一个线程存储每个分段的最大值到全局内存
|
||||
if (threadIdx.x == 0) {
|
||||
aOutput[blockIdx.x] = shared_data[0].index - aMaxlag;
|
||||
}
|
||||
}
|
||||
int nextpow2(unsigned int value){
|
||||
unsigned int compareValue = 2;
|
||||
int result = 1;
|
||||
while(compareValue<=value){
|
||||
compareValue=compareValue<<1;
|
||||
result++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
Recon::DetectResultC Recon::detectTofAndAtt(
|
||||
const Aurora::CudaMatrix &AscanBlock, const Aurora::CudaMatrix &AscanRefBlock,
|
||||
const Aurora::CudaMatrix &distBlock, const Aurora::CudaMatrix &distRefBlock,
|
||||
const Aurora::CudaMatrix &sosWaterBlock,
|
||||
const Aurora::CudaMatrix &sosWaterRefBlock,
|
||||
int resampleFactor,int nthreads, float expectedSOSWater,
|
||||
int useTimeWindowing, int aScanReconstructionFrequency,int detectionWindowATT,
|
||||
float offsetElectronic, int detectionWindowSOS, float minSpeedOfSound,
|
||||
float maxSpeedOfSound, bool gaussWindow)
|
||||
{
|
||||
auto sampleRate = aScanReconstructionFrequency;
|
||||
float offsetElectronicSamples = offsetElectronic * sampleRate;
|
||||
CudaMatrix diffStartSearch;
|
||||
Recon::TimeWindowResultC timeResult1;
|
||||
timeResult1.AscanBlockProcessed = AscanBlock;
|
||||
Recon::TimeWindowResultC timeResult2;
|
||||
timeResult2.AscanBlockProcessed = AscanRefBlock;
|
||||
if (useTimeWindowing == 1) {
|
||||
timeResult1 = applyTimeWindowing(
|
||||
AscanBlock, sampleRate, distBlock, sosWaterBlock,
|
||||
expectedSOSWater, offsetElectronicSamples, detectionWindowSOS,
|
||||
minSpeedOfSound, maxSpeedOfSound, gaussWindow);
|
||||
timeResult2 = applyTimeWindowing(
|
||||
AscanRefBlock, sampleRate, distRefBlock, sosWaterRefBlock,
|
||||
expectedSOSWater, offsetElectronicSamples, detectionWindowSOS,
|
||||
minSpeedOfSound, maxSpeedOfSound, gaussWindow);
|
||||
|
||||
diffStartSearch = timeResult1.startSearch - timeResult2.startSearch;
|
||||
}
|
||||
auto _AscanBlock = timeResult1.AscanBlockProcessed;
|
||||
auto _AscanRefBlock = timeResult2.AscanBlockProcessed;
|
||||
int maxlag = 0;
|
||||
CudaMatrix c, c1;
|
||||
{
|
||||
auto m = std::max(size(_AscanBlock, 1), size(_AscanRefBlock, 1));
|
||||
maxlag =
|
||||
std::max(size(_AscanBlock, 1), size(_AscanRefBlock, 1)) - 1;
|
||||
auto mxl = std::min(maxlag, m - 1);
|
||||
auto ceilLog2 = nextpow2(2 * m - 1);
|
||||
auto m2 = pow(2, ceilLog2);
|
||||
{
|
||||
CudaMatrix c_1_2;
|
||||
{
|
||||
CudaMatrix c_1_1;
|
||||
{
|
||||
auto x = fft(_AscanBlock, m2);
|
||||
auto y = fft(_AscanRefBlock, m2);
|
||||
c_1_1 = x * conj(y);
|
||||
}
|
||||
c_1_2 = ifft(c_1_1);
|
||||
}
|
||||
c1 = real(c_1_2);
|
||||
}
|
||||
c = zerosCuda(mxl + mxl + 1, c1.getDimSize(1));
|
||||
c.setBlock(0, 0, mxl-1, c1.block(0, m2-mxl,m2-1));
|
||||
c.setBlock(0, mxl, mxl*2, c1.block(0, 0, mxl));
|
||||
}
|
||||
auto shiftInSamples = zerosCuda(1, c1.getDimSize(1));
|
||||
findMaxIndexKernel<<<c.getDimSize(1),256>>>(c.getData(),c.getDimSize(0),shiftInSamples.getData(),maxlag);
|
||||
if (useTimeWindowing) {
|
||||
shiftInSamples = shiftInSamples - diffStartSearch;
|
||||
}
|
||||
auto tof = shiftInSamples / sampleRate + distBlock / sosWaterBlock;
|
||||
auto sosValue = distBlock / tof;
|
||||
Recon::DetectResultC result;
|
||||
result.sosValue = sosValue;
|
||||
auto tofRel = tof - distBlock / sosWaterBlock;
|
||||
result.att = detectAttVectorizedCuda(
|
||||
_AscanBlock, _AscanRefBlock, distRefBlock, sosWaterRefBlock,
|
||||
tof, aScanReconstructionFrequency, offsetElectronic,
|
||||
detectionWindowATT);
|
||||
result.tof = tofRel;
|
||||
return result;
|
||||
}
|
||||
53
src/transmissionReconstruction/detection/detection.cuh
Normal file
53
src/transmissionReconstruction/detection/detection.cuh
Normal file
@@ -0,0 +1,53 @@
|
||||
#ifndef __DETECTION_CUH__
|
||||
#define __DETECTION_CUH__
|
||||
#include "CudaMatrix.h"
|
||||
using namespace Aurora;
|
||||
namespace Recon {
|
||||
struct SearchPositionC {
|
||||
Aurora::CudaMatrix startSearch;
|
||||
Aurora::CudaMatrix endSearch;
|
||||
};
|
||||
struct DetectResultC {
|
||||
Aurora::CudaMatrix tof;
|
||||
Aurora::CudaMatrix sosValue;
|
||||
Aurora::CudaMatrix att;
|
||||
};
|
||||
struct TimeWindowResultC {
|
||||
Aurora::CudaMatrix startSearch;
|
||||
Aurora::CudaMatrix AscanBlockProcessed;
|
||||
};
|
||||
CudaMatrix calculateAttenuationCuda(const CudaMatrix &ascans,
|
||||
const CudaMatrix &startPos,
|
||||
const CudaMatrix &endPos,
|
||||
const CudaMatrix &ascansRef,
|
||||
const CudaMatrix &startPosRef,
|
||||
const CudaMatrix &endPosRef);
|
||||
CudaMatrix
|
||||
detectAttVectorizedCuda(const CudaMatrix &Ascan, const CudaMatrix &AscanRef,
|
||||
const CudaMatrix &distRef,
|
||||
const CudaMatrix &sosWaterRef,
|
||||
const CudaMatrix &tof, int aScanReconstructionFrequency,
|
||||
float offsetElectronic, int detectionWindowATT);
|
||||
|
||||
TimeWindowResultC applyTimeWindowing(const Aurora::CudaMatrix &AscanBlock, float sampleRate,
|
||||
const Aurora::CudaMatrix &distBlock, const Aurora::CudaMatrix &sosBlock,
|
||||
float expectedSOSWater, float startOffset, float segmentLenOffset,
|
||||
float minSpeedOfSound, float maxSpeedOfSound, bool gaussWindow);
|
||||
SearchPositionC calculateStarEndSearchPosition(const CudaMatrix &aVDistBlock,
|
||||
float minSpeedOfSound, float maxSpeedOfSound,
|
||||
float sampleRate, float maxSample,
|
||||
const CudaMatrix &aVSosOffsetBlock,
|
||||
float startOffset, float segmentLenOffset);
|
||||
CudaMatrix calculateSOSOffset(const CudaMatrix &sosBlock, float referenceSOS, const CudaMatrix &distBlock, float sampleRate);
|
||||
DetectResultC detectTofAndAtt(
|
||||
const Aurora::CudaMatrix &AscanBlock, const Aurora::CudaMatrix &AscanRefBlock,
|
||||
const Aurora::CudaMatrix &distBlock, const Aurora::CudaMatrix &distRefBlock,
|
||||
const Aurora::CudaMatrix &sosWaterBlock,
|
||||
const Aurora::CudaMatrix &sosWaterRefBlock,
|
||||
int resampleFactor,int nthreads, float expectedSOSWater,
|
||||
int useTimeWindowing, int aScanReconstructionFrequency,int detectionWindowATT,
|
||||
float offsetElectronic, int detectionWindowSOS, float minSpeedOfSound,
|
||||
float maxSpeedOfSound, bool gaussWindow);
|
||||
};
|
||||
|
||||
#endif // __DETECTION_H__
|
||||
@@ -75,8 +75,8 @@ detectTofAndAttMex(
|
||||
|
||||
DetectResult
|
||||
transmissionDetection(
|
||||
const Aurora::Matrix &AscanBlock, const Aurora::Matrix &AscanRefBlock,
|
||||
const Aurora::Matrix &distBlock, const Aurora::Matrix &distRefBlock,
|
||||
const Aurora::CudaMatrix &AscanBlock, const Aurora::CudaMatrix &AscanRefBlock,
|
||||
const Aurora::CudaMatrix &distBlock, const Aurora::CudaMatrix &distRefBlock,
|
||||
const Aurora::Matrix &sosWaterBlock, const Aurora::Matrix &sosWaterRefBlock, float expectedSOSWater);
|
||||
|
||||
} // namespace Recon
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
#include "getTransmissionData.h"
|
||||
#include "getTransmissionData.cuh"
|
||||
#include "AuroraDefs.h"
|
||||
#include "CudaMatrix.h"
|
||||
#include "Function.h"
|
||||
#include "Function1D.h"
|
||||
#include "Function2D.h"
|
||||
@@ -28,6 +31,11 @@
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "Function1D.cuh"
|
||||
#include "Function2D.cuh"
|
||||
#include <sys/time.h>
|
||||
|
||||
using namespace Recon;
|
||||
using namespace Aurora;
|
||||
|
||||
@@ -42,8 +50,8 @@ namespace
|
||||
Matrix waterTempBlock;
|
||||
MetaInfos metaInfos;
|
||||
|
||||
Matrix ascanBlock;
|
||||
Matrix ascanBlockRef;
|
||||
CudaMatrix ascanBlock;
|
||||
CudaMatrix ascanBlockRef;
|
||||
Matrix dists;
|
||||
Matrix distRefBlock;
|
||||
Matrix waterTempRefBlock;
|
||||
@@ -56,28 +64,59 @@ namespace
|
||||
std::mutex PROCESS_BUFFER_MUTEX;
|
||||
std::condition_variable PROCESS_BUFFER_CONDITION;
|
||||
int BUFFER_COUNT = 0;
|
||||
int BUFFER_SIZE = 3;
|
||||
int BUFFER_SIZE = 4;//<=8
|
||||
|
||||
Matrix prepareAScansForTransmissionDetection(const Matrix& aAscanBlock, const Matrix& aGainBlock)
|
||||
void printTime()
|
||||
{
|
||||
struct timeval tpend;
|
||||
gettimeofday(&tpend,NULL);
|
||||
int secofday = (tpend.tv_sec + 3600 * 8 ) % 86400;
|
||||
int hours = secofday / 3600;
|
||||
int minutes = (secofday - hours * 3600 ) / 60;
|
||||
int seconds = secofday % 60;
|
||||
int milliseconds = tpend.tv_usec/1000;
|
||||
std::cout<< hours << ":" <<minutes<<":"<<seconds<<"."<<milliseconds<<std::endl;
|
||||
}
|
||||
|
||||
CudaMatrix prepareAScansForTransmissionDetection(const CudaMatrix& aAscanBlock, const CudaMatrix& aGainBlock)
|
||||
{
|
||||
Matrix result = aAscanBlock / repmat(aGainBlock, aAscanBlock.getDimSize(0), 1);
|
||||
CudaMatrix result = aAscanBlock / repmat(aGainBlock, aAscanBlock.getDimSize(0), 1);
|
||||
result = result - repmat(mean(result,FunctionDirection::Column), result.getDimSize(0), 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
Aurora::CudaMatrix calculateSnr(const Aurora::CudaMatrix &aMDataBlock,
|
||||
float aReferenceNoise) {
|
||||
auto maxSignal = max(abs(aMDataBlock));
|
||||
auto snrBlock = 10 * log(maxSignal / aReferenceNoise, 10);
|
||||
return snrBlock;
|
||||
}
|
||||
|
||||
BlockOfTransmissionData getBlockOfTransmissionData(const Matrix& aMp, const Matrix& aMpRef, const Matrix& aSl, const Matrix& aSn, const Matrix& aRlList, const Matrix& aRnList,
|
||||
const TasTemps& aTasTemps, const Matrix& aExpectedSOSWater, GeometryInfo aGeom, GeometryInfo& aGeomRef,
|
||||
const Matrix& aSnrRmsNoise, const Matrix& aSnrRmsNoiseRef, const MeasurementInfo& aExpInfo, const MeasurementInfo& aExpInfoRef,
|
||||
const PreComputes& aPreComputes, Parser* aParser, Parser* aParserRef)
|
||||
{
|
||||
BlockOfTransmissionData result;
|
||||
MetaInfos metaInfos;
|
||||
auto blockData = getAscanBlockPreprocessed(aParser, aMp, aSl, aSn, aRlList, aRnList, aGeom, aExpInfo, true, true);
|
||||
auto blockDataRef = getAscanBlockPreprocessed(aParserRef, aMpRef, aSl, aSn, aRlList, aRnList, aGeomRef, aExpInfoRef, true, true);
|
||||
Matrix ascanBlock = prepareAScansForTransmissionDetection(blockData.ascanBlockPreprocessed, blockData.gainBlock);
|
||||
Matrix ascanBlockRef = prepareAScansForTransmissionDetection(blockDataRef.ascanBlockPreprocessed, blockDataRef.gainBlock);
|
||||
blockData.ascanBlockPreprocessed = Matrix();
|
||||
blockDataRef.ascanBlockPreprocessed = Matrix();
|
||||
MetaInfos metaInfos;
|
||||
//printTime();
|
||||
//2500ms
|
||||
auto blockData = getAscanBlockPreprocessedCuda(aParser, aMp, aSl, aSn, aRlList, aRnList, aGeom, aExpInfo, true, true);
|
||||
auto blockDataRef = getAscanBlockPreprocessedCuda(aParserRef, aMpRef, aSl, aSn, aRlList, aRnList, aGeomRef, aExpInfoRef, true, true);
|
||||
//printTime();
|
||||
//180ms
|
||||
// auto t1 = blockData.ascanBlockPreprocessed.toDeviceMatrix();
|
||||
// auto t2 = blockDataRef.ascanBlockPreprocessed.toDeviceMatrix();
|
||||
// auto t3 = blockData.gainBlock.toDeviceMatrix();
|
||||
// auto t4 = blockDataRef.gainBlock.toDeviceMatrix();
|
||||
//printTime();
|
||||
//20ms
|
||||
CudaMatrix ascanBlock = prepareAScansForTransmissionDetection(blockData.ascanBlockPreprocessed,blockData.gainBlock);
|
||||
CudaMatrix ascanBlockRef = prepareAScansForTransmissionDetection(blockDataRef.ascanBlockPreprocessed,blockDataRef.gainBlock);
|
||||
//printTime();
|
||||
//20ms
|
||||
blockData.ascanBlockPreprocessed = CudaMatrix();
|
||||
blockDataRef.ascanBlockPreprocessed = CudaMatrix();
|
||||
if(aExpInfo.Hardware == "USCT3dv3")
|
||||
{
|
||||
Matrix channelList = precalculateChannelList(aRlList, aRnList, aExpInfo, aPreComputes);
|
||||
@@ -93,71 +132,48 @@ namespace
|
||||
channelListBlockData[i] = channelList[ind[i] - 1];
|
||||
}
|
||||
Matrix channelListBlock = Matrix::New(channelListBlockData, 1, channelListBlockSize);
|
||||
Matrix fx = fft(ascanBlock);
|
||||
float* fhData = Aurora::malloc(aExpInfo.matchedFilter.getDimSize(0) * channelListBlockSize, true);
|
||||
Matrix fh = Matrix::New(fhData, aExpInfo.matchedFilter.getDimSize(0), channelListBlockSize, 1, Aurora::Complex);
|
||||
size_t matchedFilterRowDataSize = aExpInfo.matchedFilter.getDimSize(0)*2;
|
||||
for(size_t i=0; i<channelListBlockSize; ++i)
|
||||
{
|
||||
cblas_scopy(matchedFilterRowDataSize, aExpInfo.matchedFilter.getData() + (size_t)(channelListBlock[i] - 1) * matchedFilterRowDataSize, 1 , fhData ,1);
|
||||
fhData += matchedFilterRowDataSize;
|
||||
}
|
||||
// Matrix fxReal = Aurora::real(fx);
|
||||
// Matrix fhReal = Aurora::real(fh);
|
||||
// Matrix fxImag = Aurora::imag(fx);
|
||||
// Matrix fhImag = Aurora::imag(fh);
|
||||
// Matrix real = fxReal * fhReal + fxImag * fhImag;
|
||||
// Matrix image = fxImag * fhReal - fxReal * fhImag;
|
||||
float* value1 = Aurora::malloc(fx.getDataSize());
|
||||
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData(), 2, value1, 1);
|
||||
float* value2 = Aurora::malloc(fx.getDataSize());
|
||||
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData() + 1, 2, value2, 1);
|
||||
float* realData = Aurora::malloc(fx.getDataSize());
|
||||
vsAdd(fx.getDataSize(), value1, value2, realData);
|
||||
Matrix real = Matrix::New(realData, fx.getDimSize(0), fx.getDimSize(1));
|
||||
|
||||
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData(), 2, value1, 1);
|
||||
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData() + 1, 2, value2, 1);
|
||||
float* imagData = Aurora::malloc(fx.getDataSize());
|
||||
vsSub(fx.getDataSize(), value1, value2, imagData);
|
||||
Matrix image = Matrix::New(imagData, fx.getDimSize(0), fx.getDimSize(1));
|
||||
|
||||
float* complexData = Aurora::malloc(real.getDataSize(), true);
|
||||
cblas_scopy(real.getDataSize(), real.getData(), 1 , complexData ,2);
|
||||
cblas_scopy(image.getDataSize(), image.getData(), 1 , complexData + 1 ,2);
|
||||
Matrix complex = Matrix::New(complexData, real.getDimSize(0), real.getDimSize(1), 1, Aurora::Complex);
|
||||
//printTime();
|
||||
//20ms
|
||||
CudaMatrix fx = fft(ascanBlock);
|
||||
//printTime();
|
||||
//50ms
|
||||
// float* fhData = nullptr;
|
||||
// cudaMalloc((void**)&fhData, sizeof(float) * aExpInfo.matchedFilter.getDimSize(0) * channelListBlockSize * Aurora::Complex);
|
||||
// CudaMatrix fh = CudaMatrix::fromRawData(fhData, aExpInfo.matchedFilter.getDimSize(0), channelListBlockSize, 1, Aurora::Complex);
|
||||
// size_t matchedFilterRowDataSize = aExpInfo.matchedFilter.getDimSize(0)*2;
|
||||
// for(size_t i=0; i<channelListBlockSize; ++i)
|
||||
// {
|
||||
// cudaMemcpy(fhData, aExpInfo.matchedFilter.getData() + (size_t)(channelListBlock[i] - 1) * matchedFilterRowDataSize, sizeof(float) * matchedFilterRowDataSize, cudaMemcpyHostToDevice);
|
||||
// fhData += matchedFilterRowDataSize;
|
||||
// }
|
||||
Aurora::CudaMatrix matchedFilterDevice = aExpInfo.matchedFilter.toDeviceMatrix();
|
||||
Aurora::CudaMatrix channelListBlockDevice = channelListBlock.toDeviceMatrix();
|
||||
Aurora::CudaMatrix fh = createFhMatrix(matchedFilterDevice, channelListBlockDevice);
|
||||
//printTime();
|
||||
//20ms
|
||||
CudaMatrix complex = getTransmissionDataSubFunction(fx, fh);
|
||||
ascanBlock = Aurora::real(ifft(complex));
|
||||
|
||||
//printTime();
|
||||
//20s
|
||||
fx = fft(ascanBlockRef);
|
||||
fhData = Aurora::malloc(aExpInfoRef.matchedFilter.getDimSize(0) * channelListBlockSize, true);
|
||||
fh = Matrix::New(fhData, aExpInfoRef.matchedFilter.getDimSize(0), channelListBlockSize, 1, Aurora::Complex);
|
||||
matchedFilterRowDataSize = aExpInfoRef.matchedFilter.getDimSize(0)*2;
|
||||
for(size_t i=0; i<channelListBlockSize; ++i)
|
||||
{
|
||||
cblas_scopy(matchedFilterRowDataSize, aExpInfoRef.matchedFilter.getData() + (size_t)(channelListBlock[i] - 1) * matchedFilterRowDataSize, 1 , fhData ,1);
|
||||
fhData += matchedFilterRowDataSize;
|
||||
}
|
||||
// real = Aurora::real(fx) * Aurora::real(fh) + Aurora::imag(fx) * Aurora::imag(fh);
|
||||
// image = Aurora::imag(fx) * Aurora::real(fh) - Aurora::real(fx) * Aurora::imag(fh);
|
||||
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData(), 2, value1, 1);
|
||||
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData() + 1, 2, value2, 1);
|
||||
realData = Aurora::malloc(fx.getDataSize());
|
||||
vsAdd(fx.getDataSize(), value1, value2, realData);
|
||||
real = Matrix::New(realData, fx.getDimSize(0), fx.getDimSize(1));
|
||||
|
||||
vsMulI(fx.getDataSize(), fx.getData() + 1, 2, fh.getData(), 2, value1, 1);
|
||||
vsMulI(fx.getDataSize(), fx.getData(), 2, fh.getData() + 1, 2, value2, 1);
|
||||
imagData = Aurora::malloc(fx.getDataSize());
|
||||
vsSub(fx.getDataSize(), value1, value2, imagData);
|
||||
image = Matrix::New(imagData, fx.getDimSize(0), fx.getDimSize(1));
|
||||
Aurora::free(value1);
|
||||
Aurora::free(value2);
|
||||
|
||||
complexData = Aurora::malloc(real.getDataSize(), true);
|
||||
cblas_scopy(real.getDataSize(), real.getData(), 1 , complexData ,2);
|
||||
cblas_scopy(image.getDataSize(), image.getData(), 1 , complexData + 1 ,2);
|
||||
complex = Matrix::New(complexData, real.getDimSize(0), real.getDimSize(1), 1, Aurora::Complex);
|
||||
//printTime();
|
||||
//50ms
|
||||
// cudaMalloc((void**)&fhData, sizeof(float) * aExpInfo.matchedFilter.getDimSize(0) * channelListBlockSize * Aurora::Complex);
|
||||
// fh = CudaMatrix::fromRawData(fhData, aExpInfoRef.matchedFilter.getDimSize(0), channelListBlockSize, 1, Aurora::Complex);
|
||||
// matchedFilterRowDataSize = aExpInfoRef.matchedFilter.getDimSize(0)*2;
|
||||
// for(size_t i=0; i<channelListBlockSize; ++i)
|
||||
// {
|
||||
// cudaMemcpy(fhData, aExpInfoRef.matchedFilter.getData() + (size_t)(channelListBlock[i] - 1) * matchedFilterRowDataSize, sizeof(float) * matchedFilterRowDataSize, cudaMemcpyHostToDevice);
|
||||
// fhData += matchedFilterRowDataSize;
|
||||
// }
|
||||
matchedFilterDevice = aExpInfoRef.matchedFilter.toDeviceMatrix();
|
||||
fh = createFhMatrix(matchedFilterDevice, channelListBlockDevice);
|
||||
//printTime();
|
||||
//20ms
|
||||
complex = getTransmissionDataSubFunction(fx, fh);
|
||||
ascanBlockRef = Aurora::real(ifft(complex));
|
||||
//printTime();
|
||||
//20ms
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -167,24 +183,27 @@ namespace
|
||||
|
||||
if(transParams::applyCalib)
|
||||
{
|
||||
metaInfos.snrValues = calculateSnr(ascanBlock, aSnrRmsNoise[0]);
|
||||
metaInfos.snrValuesRef = calculateSnr(ascanBlockRef, aSnrRmsNoiseRef[0]);
|
||||
metaInfos.snrValues = calculateSnr(ascanBlock, aSnrRmsNoise[0]).toHostMatrix();
|
||||
metaInfos.snrValuesRef = calculateSnr(ascanBlockRef, aSnrRmsNoiseRef[0]).toHostMatrix();
|
||||
}
|
||||
|
||||
// printTime();
|
||||
//3ms
|
||||
Matrix dists = distanceBetweenTwoPoints(blockData.senderPositionBlock, blockData.receiverPositionBlock);
|
||||
Matrix distRefBlock = distanceBetweenTwoPoints(blockDataRef.senderPositionBlock, blockDataRef.receiverPositionBlock);
|
||||
|
||||
//printTime();
|
||||
//2ms
|
||||
Matrix waterTempBlock = calculateWaterTemperature(aTasTemps.waterTempPreCalc_sl, aTasTemps.waterTempPreCalc_rl, blockData.slBlock, blockData.rlBlock, blockData.mpBlock);
|
||||
Matrix waterTempRefBlock = calculateWaterTemperature(aTasTemps.waterTempRefPreCalc_sl, aTasTemps.waterTempRefPreCalc_rl, blockData.slBlock, blockData.rlBlock, blockDataRef.mpBlock);
|
||||
|
||||
if(transParams::saveDetection || transParams::outlierOnTasDetection || transParams::saveDebugInfomation)
|
||||
{
|
||||
metaInfos.mpBlock = blockData.mpBlock;
|
||||
metaInfos.slBlock = blockData.slBlock;
|
||||
metaInfos.snBlock = blockData.snBlock;
|
||||
metaInfos.rlBlock = blockData.rlBlock;
|
||||
metaInfos.rnBlock = blockData.rnBlock;
|
||||
}
|
||||
// printTime();
|
||||
// 1ms
|
||||
// if(transParams::saveDetection || transParams::outlierOnTasDetection || transParams::saveDebugInfomation)
|
||||
// {
|
||||
// metaInfos.mpBlock = blockData.mpBlock;
|
||||
// metaInfos.slBlock = blockData.slBlock;
|
||||
// metaInfos.snBlock = blockData.snBlock;
|
||||
// metaInfos.rlBlock = blockData.rlBlock;
|
||||
// metaInfos.rnBlock = blockData.rnBlock;
|
||||
// }
|
||||
result.metaInfos = metaInfos;
|
||||
result.senderBlock = blockData.senderPositionBlock;
|
||||
result.receiverBlock = blockData.receiverPositionBlock;
|
||||
@@ -199,7 +218,7 @@ namespace
|
||||
// DetectResult detect = transmissionDetection(ascanBlock, ascanBlockRef, dists, distRefBlock, waterTempBlock, waterTempRefBlock, aExpectedSOSWater[0]);
|
||||
// result.attData = detect.att;
|
||||
// result.tofData = detect.tof;
|
||||
|
||||
//printTime();
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -208,8 +227,9 @@ namespace
|
||||
void getBlockOfTransmissionDataInThread(size_t aIndex, const Matrix& aMp, const Matrix& aMpRef, const Matrix& aSl, const Matrix& aSn, const Matrix& aRlList, const Matrix& aRnList,
|
||||
const TasTemps& aTasTemps, const Matrix& aExpectedSOSWater, GeometryInfo aGeom, GeometryInfo aGeomRef,
|
||||
const Matrix& aSnrRmsNoise, const Matrix& aSnrRmsNoiseRef, const MeasurementInfo& aExpInfo, const MeasurementInfo& aExpInfoRef,
|
||||
const PreComputes& aPreComputes, Parser* aParser, Parser* aParserRef)
|
||||
const PreComputes& aPreComputes, Parser* aParser, Parser* aParserRef, unsigned int aGPUId)
|
||||
{
|
||||
cudaSetDevice(aGPUId);
|
||||
auto buffer = getBlockOfTransmissionData(aMp, aMpRef, aSl, aSn, aRlList, aRnList, aTasTemps,
|
||||
aExpectedSOSWater, aGeom, aGeomRef, aSnrRmsNoise, aSnrRmsNoiseRef,
|
||||
aExpInfo, aExpInfoRef, aPreComputes, aParser, aParserRef);
|
||||
@@ -245,7 +265,7 @@ void createThreadForGetBlockOfTransmissionData(const Matrix& aMotorPos, const M
|
||||
CREATE_BUFFER_CONDITION.wait(lock, []{return BUFFER_COUNT<BUFFER_SIZE;});
|
||||
++BUFFER_COUNT;
|
||||
lock.unlock();
|
||||
speedUpThread[index] = std::thread(getBlockOfTransmissionDataInThread,index,mp,mpRef,sl,sn,aRlList,aRnList,aTasTemps,aExpectedSOSWater,aGeom,aGeomRef,aSnrRmsNoise,aSnrRmsNoiseRef,aExpInfo,aExpInfoRef,aPreComputes,aParser, aParserRef);
|
||||
speedUpThread[index] = std::thread(getBlockOfTransmissionDataInThread,index,mp,mpRef,sl,sn,aRlList,aRnList,aTasTemps,aExpectedSOSWater,aGeom,aGeomRef,aSnrRmsNoise,aSnrRmsNoiseRef,aExpInfo,aExpInfoRef,aPreComputes,aParser, aParserRef, index % BUFFER_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -334,8 +354,9 @@ TransmissionData Recon::getTransmissionData(const Aurora::Matrix& aMotorPos, con
|
||||
lock.unlock();
|
||||
|
||||
auto blockData = BLOCK_OF_TRANSIMISSIONDARA_BUFFER[std::to_string(index)];
|
||||
cudaSetDevice(index % BUFFER_SIZE);
|
||||
DetectResult detect = transmissionDetection( blockData.ascanBlock, blockData.ascanBlockRef,
|
||||
blockData.dists, blockData.distRefBlock,
|
||||
blockData.dists.toDeviceMatrix(), blockData.distRefBlock.toDeviceMatrix(),
|
||||
blockData.waterTempBlock, blockData.waterTempRefBlock,
|
||||
aTemp.expectedSOSWater[0]);
|
||||
blockData.attData = detect.att;
|
||||
@@ -355,14 +376,14 @@ TransmissionData Recon::getTransmissionData(const Aurora::Matrix& aMotorPos, con
|
||||
cblas_scopy(numUsedData, transmissionBlock.attData.getData(), 1, attDataTotal.getData() + numData, 1);
|
||||
cblas_scopy(numUsedData, transmissionBlock.waterTempBlock.getData(), 1, waterTempList.getData() + numData, 1);
|
||||
|
||||
if(transParams::saveDetection || transParams::outlierOnTasDetection || transParams::saveDebugInfomation)
|
||||
{
|
||||
cblas_scopy(numUsedData, transmissionBlock.metaInfos.mpBlock.getData(), 1, mpBlockTotal.getData() + numData, 1);
|
||||
cblas_scopy(numUsedData, transmissionBlock.metaInfos.slBlock.getData(), 1, slBlockTotal.getData() + numData, 1);
|
||||
cblas_scopy(numUsedData, transmissionBlock.metaInfos.snBlock.getData(), 1, snBlockTotal.getData() + numData, 1);
|
||||
cblas_scopy(numUsedData, transmissionBlock.metaInfos.rlBlock.getData(), 1, rlBlockTotal.getData() + numData, 1);
|
||||
cblas_scopy(numUsedData, transmissionBlock.metaInfos.rnBlock.getData(), 1, rnBlockTotal.getData() + numData, 1);
|
||||
}
|
||||
// if(transParams::saveDetection || transParams::outlierOnTasDetection || transParams::saveDebugInfomation)
|
||||
// {
|
||||
// cblas_scopy(numUsedData, transmissionBlock.metaInfos.mpBlock.getData(), 1, mpBlockTotal.getData() + numData, 1);
|
||||
// cblas_scopy(numUsedData, transmissionBlock.metaInfos.slBlock.getData(), 1, slBlockTotal.getData() + numData, 1);
|
||||
// cblas_scopy(numUsedData, transmissionBlock.metaInfos.snBlock.getData(), 1, snBlockTotal.getData() + numData, 1);
|
||||
// cblas_scopy(numUsedData, transmissionBlock.metaInfos.rlBlock.getData(), 1, rlBlockTotal.getData() + numData, 1);
|
||||
// cblas_scopy(numUsedData, transmissionBlock.metaInfos.rnBlock.getData(), 1, rnBlockTotal.getData() + numData, 1);
|
||||
// }
|
||||
numData += numUsedData;
|
||||
std::unique_lock<std::mutex> lockBufferCount(CREATE_BUFFER_MUTEX);
|
||||
BLOCK_OF_TRANSIMISSIONDARA_BUFFER.erase(std::to_string(index));
|
||||
@@ -371,7 +392,7 @@ TransmissionData Recon::getTransmissionData(const Aurora::Matrix& aMotorPos, con
|
||||
std::cout<<"Remove: "<<index<<std::endl;
|
||||
CREATE_BUFFER_CONDITION.notify_one();
|
||||
}
|
||||
Recon::notifyProgress(6+10*((i+1)*(j+1)/(aMotorPos.getDataSize()*(aSlList.getDataSize()/ transParams::senderTASSize))));
|
||||
//Recon::notifyProgress(6+10*((i+1)*(j+1)/(aMotorPos.getDataSize()*(aSlList.getDataSize()/ transParams::senderTASSize))));
|
||||
}
|
||||
}
|
||||
speedUpThread.join();
|
||||
@@ -399,14 +420,14 @@ TransmissionData Recon::getTransmissionData(const Aurora::Matrix& aMotorPos, con
|
||||
tofDataTotal = removeDataFromArrays(tofDataTotal, filter);
|
||||
attDataTotal = removeDataFromArrays(attDataTotal, filter);
|
||||
waterTempList = removeDataFromArrays(waterTempList, filter);
|
||||
if(transParams::saveDebugInfomation || transParams::outlierOnTasDetection || transParams::saveDetection)
|
||||
{
|
||||
mpBlockTotal = removeDataFromArrays(mpBlockTotal, filter);
|
||||
slBlockTotal = removeDataFromArrays(slBlockTotal, filter);
|
||||
snBlockTotal = removeDataFromArrays(snBlockTotal, filter);
|
||||
rlBlockTotal = removeDataFromArrays(rlBlockTotal, filter);
|
||||
rnBlockTotal = removeDataFromArrays(rnBlockTotal, filter);
|
||||
}
|
||||
// if(transParams::saveDebugInfomation || transParams::outlierOnTasDetection || transParams::saveDetection)
|
||||
// {
|
||||
// mpBlockTotal = removeDataFromArrays(mpBlockTotal, filter);
|
||||
// slBlockTotal = removeDataFromArrays(slBlockTotal, filter);
|
||||
// snBlockTotal = removeDataFromArrays(snBlockTotal, filter);
|
||||
// rlBlockTotal = removeDataFromArrays(rlBlockTotal, filter);
|
||||
// rnBlockTotal = removeDataFromArrays(rnBlockTotal, filter);
|
||||
// }
|
||||
|
||||
Matrix valid;
|
||||
if(transParams::applyCalib)
|
||||
@@ -432,14 +453,14 @@ TransmissionData Recon::getTransmissionData(const Aurora::Matrix& aMotorPos, con
|
||||
}
|
||||
dataInfno.findDefect = Matrix::New(findDefectData, 1, findDefectDataIndex);
|
||||
|
||||
if(transParams::saveDebugInfomation)
|
||||
{
|
||||
dataInfno.sn = snBlockTotal;
|
||||
dataInfno.sl = slBlockTotal;
|
||||
dataInfno.rn = rnBlockTotal;
|
||||
dataInfno.rl = rlBlockTotal;
|
||||
dataInfno.mp = mpBlockTotal;
|
||||
}
|
||||
// if(transParams::saveDebugInfomation)
|
||||
// {
|
||||
// dataInfno.sn = snBlockTotal;
|
||||
// dataInfno.sl = slBlockTotal;
|
||||
// dataInfno.rn = rnBlockTotal;
|
||||
// dataInfno.rl = rlBlockTotal;
|
||||
// dataInfno.mp = mpBlockTotal;
|
||||
// }
|
||||
|
||||
tofDataTotal = removeDataFromArrays(tofDataTotal, valid);
|
||||
attDataTotal = removeDataFromArrays(attDataTotal, valid);
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
#include "AuroraDefs.h"
|
||||
#include "getTransmissionData.cuh"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
const int THREADS_PER_BLOCK = 256;
|
||||
|
||||
__global__ void getTransmissionDataSubFunctionKernel(float* aFx, float* aFh, float* aOutput, unsigned int aSize)
|
||||
{
|
||||
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < aSize)
|
||||
{
|
||||
unsigned int realIndex = idx * 2;
|
||||
unsigned int imagIndex = realIndex + 1;
|
||||
float fxReal = aFx[realIndex];
|
||||
float fhReal = aFh[realIndex];
|
||||
float fxImag = aFx[imagIndex];
|
||||
float fhImag = aFh[imagIndex];
|
||||
aOutput[realIndex] = fxReal * fhReal + fxImag * fhImag;
|
||||
aOutput[2*idx + 1] = fxImag * fhReal - fxReal * fhImag;
|
||||
}
|
||||
}
|
||||
|
||||
Aurora::CudaMatrix getTransmissionDataSubFunction(const Aurora::CudaMatrix& aFxMatrix, const Aurora::CudaMatrix& aFhMatrix)
|
||||
{
|
||||
size_t size = aFxMatrix.getDataSize();
|
||||
float* data = nullptr;
|
||||
cudaMalloc((void**)&data, sizeof(float) * size * Aurora::Complex);
|
||||
int blocksPerGrid = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
getTransmissionDataSubFunctionKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(aFxMatrix.getData(), aFhMatrix.getData(), data, size);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(data, aFxMatrix.getDimSize(0), aFxMatrix.getDimSize(1), aFxMatrix.getDimSize(2), Aurora::Complex);
|
||||
}
|
||||
|
||||
__global__ void createFhMatrixKernel(float* aMatchedFilter, float* aChannelListBlock, float* aOutput, unsigned int aRowSize)
|
||||
{
|
||||
for(unsigned int i=0; i< std::ceil((float)aRowSize / (float)THREADS_PER_BLOCK); ++i)
|
||||
{
|
||||
unsigned int index = i * THREADS_PER_BLOCK + threadIdx.x;
|
||||
if(index < aRowSize)
|
||||
{
|
||||
unsigned int outPutComplexIndex = 2 * (blockIdx.x * aRowSize + index);
|
||||
unsigned int inputComplexIndex = 2 * ((aChannelListBlock[blockIdx.x] - 1) * aRowSize + index);
|
||||
aOutput[outPutComplexIndex] = aMatchedFilter[inputComplexIndex];
|
||||
aOutput[outPutComplexIndex + 1] = aMatchedFilter[inputComplexIndex + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Aurora::CudaMatrix createFhMatrix(const Aurora::CudaMatrix& aMatchedFilter, const Aurora::CudaMatrix& aChannelListBlock)
|
||||
{
|
||||
size_t columnSize = aChannelListBlock.getDataSize();
|
||||
size_t rowSize = aMatchedFilter.getDimSize(0);
|
||||
float* fh = nullptr;
|
||||
cudaMalloc((void**)&fh, sizeof(float) * rowSize * columnSize * Aurora::Complex);
|
||||
createFhMatrixKernel<<<columnSize, THREADS_PER_BLOCK>>>(aMatchedFilter.getData(), aChannelListBlock.getData(), fh, rowSize);
|
||||
cudaDeviceSynchronize();
|
||||
return Aurora::CudaMatrix::fromRawData(fh, rowSize, columnSize, 1, Aurora::Complex);
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#ifndef GETTRANSMISSIONDATA_CU_H
|
||||
#define GETTRANSMISSIONDATA_CU_H
|
||||
|
||||
#include "CudaMatrix.h"
|
||||
|
||||
Aurora::CudaMatrix getTransmissionDataSubFunction(const Aurora::CudaMatrix& aFxMatrix, const Aurora::CudaMatrix& aFhMatrix);
|
||||
|
||||
Aurora::CudaMatrix createFhMatrix(const Aurora::CudaMatrix& aMatchedFilter, const Aurora::CudaMatrix& aChannelListBlock);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,246 @@
|
||||
#include "Function2D.cuh"
|
||||
|
||||
|
||||
#include <cstdio>
|
||||
#include <thrust/reduce.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "CudaMatrix.h"
|
||||
#include "buildMatrix.cuh"
|
||||
#include "Sparse.h"
|
||||
|
||||
|
||||
namespace {
|
||||
const int THREADS_PER_BLOCK = 256;
|
||||
}
|
||||
|
||||
|
||||
|
||||
__device__ float getPixelLengthKernel(float* aStartPt, float*aEndPt, float* aRes, int aPathLenDisc)
|
||||
{
|
||||
float temp1;
|
||||
float temp2;
|
||||
float temp3;
|
||||
|
||||
temp1 = aEndPt[0]- aStartPt[0];
|
||||
temp2 = aEndPt[1]- aStartPt[1];
|
||||
temp3 = aEndPt[2]- aStartPt[2];
|
||||
temp1 *= aRes[0];
|
||||
temp2 *= aRes[1];
|
||||
temp3 *= aRes[2];
|
||||
temp1 = temp1*temp1;
|
||||
temp2 = temp2*temp2;
|
||||
temp3 = temp3*temp3;
|
||||
return (sqrtf(temp3+temp2+temp1))/(float)aPathLenDisc;
|
||||
}
|
||||
|
||||
__global__ struct b3dStruct{
|
||||
int max, f, pathlen;
|
||||
float x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x, y, z, weight;
|
||||
};
|
||||
__device__ void getRayBLen(float *startPoint, float *endPoint, b3dStruct& aOut) {
|
||||
aOut.x = startPoint[0];
|
||||
aOut.y = startPoint[1];
|
||||
aOut.z = startPoint[2];
|
||||
|
||||
float dx = endPoint[0] - aOut.x;
|
||||
float dy = endPoint[1] - aOut.y;
|
||||
float dz = endPoint[2] - aOut.z;
|
||||
|
||||
aOut.x_inc = (dx < 0) ? -1 : 1;
|
||||
float l = abs(dx);
|
||||
aOut.y_inc = (dy < 0) ? -1 : 1;
|
||||
float m = abs(dy);
|
||||
aOut.z_inc = (dz < 0) ? -1 : 1;
|
||||
float n = abs(dz);
|
||||
aOut.dx2 = l*2;// << 1;
|
||||
aOut.dy2 = m*2;// << 1;
|
||||
aOut.dz2 = n*2;// << 1;
|
||||
float vmax ;
|
||||
if ((l >= m) && (l >= n)){
|
||||
aOut.max =ceilf(l);
|
||||
vmax = l;
|
||||
aOut.d = aOut.dx2;
|
||||
aOut.dx2 = 0;
|
||||
aOut.f = 0;
|
||||
}
|
||||
else if((m >= l) && (m >= n)){
|
||||
aOut.max = ceilf(m);
|
||||
vmax = m;
|
||||
aOut.d = aOut.dy2;
|
||||
aOut.dy2 = 0;
|
||||
aOut.f = 1;
|
||||
}
|
||||
else{
|
||||
aOut.max = ceilf(n);
|
||||
vmax = n;
|
||||
aOut.d = aOut.dz2;
|
||||
aOut.dz2 = 0;
|
||||
aOut.f = 2;
|
||||
}
|
||||
aOut.err_0 = aOut.f==0?0:(aOut.dx2 - vmax);
|
||||
aOut.err_1 = aOut.f==1?0:(aOut.dy2 - vmax);
|
||||
aOut.err_2 = aOut.f==2?0:(aOut.dz2 - vmax);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief 计算traceStraightRayBresenham的pathlen和weight的核函数
|
||||
*
|
||||
* @param aStratPt
|
||||
* @param aEndPt
|
||||
* @param aSize
|
||||
* @param aRes
|
||||
* @param aOuts
|
||||
* @return _
|
||||
*/
|
||||
__global__ void calcLenWeightKernel(float* aStratPt, float* aEndPt, float* aRes,int aSize, b3dStruct* aOuts ){
|
||||
unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx > aSize)return;
|
||||
getRayBLen(aStratPt + idx*3, aEndPt + idx*3, aOuts[idx]);
|
||||
|
||||
aOuts[idx].weight = getPixelLengthKernel(aStratPt + idx*3, aEndPt + idx*3, aRes,aOuts[idx].max);
|
||||
if (idx == 196){
|
||||
int v=0;
|
||||
for (int i =0;i<196; i++) {
|
||||
v+=aOuts[i].max;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
__global__ void calcPathOffsetKernel(b3dStruct* aStructs ,unsigned int* aPath, int aSize){
|
||||
aPath[0] = aStructs[0].max;
|
||||
for (int i=1; i<aSize; i++) {
|
||||
aPath[i] =aStructs[i].max+aPath[i-1];
|
||||
}
|
||||
}
|
||||
|
||||
__device__ float convertToLinearIndices(const float3& aDims, const float3& aPath){
|
||||
float matrixSize_1 = aDims.x;
|
||||
float matrixSize_2 = aDims.y;
|
||||
float coord_col1 = aPath.x;
|
||||
float coord_col2Sub1 = aPath.y - 1;
|
||||
float coord_col3Sub1 = aPath.z - 1;
|
||||
return coord_col1 + coord_col2Sub1 * matrixSize_1 + coord_col3Sub1 * matrixSize_1 * matrixSize_2 - 1 ;
|
||||
}
|
||||
|
||||
__global__ void calcRayBLPathKernel(struct b3dStruct* aInstruct,
|
||||
float* dims, unsigned int * aOffset,
|
||||
float* aJOut, unsigned int aSize) {
|
||||
unsigned int idx = blockIdx.x;
|
||||
if (idx > aSize)return;
|
||||
b3dStruct* in = aInstruct+idx;
|
||||
int f;
|
||||
float max, x_inc, y_inc, z_inc,err_0, err_1, err_2, dx2, dy2, dz2, d, x,y,z;
|
||||
x = in->x;
|
||||
y = in->y;
|
||||
z = in->z;
|
||||
x_inc = in->x_inc;
|
||||
y_inc = in->y_inc;
|
||||
z_inc = in->z_inc;
|
||||
|
||||
dx2 = in->dx2;
|
||||
dy2 = in->dy2;
|
||||
dz2 = in->dz2;
|
||||
d = in->d;
|
||||
|
||||
err_0 = in->err_0;
|
||||
err_1 = in->err_1;
|
||||
err_2 = in->err_2;
|
||||
max = in->max;
|
||||
f = in->f;
|
||||
|
||||
float3 dims3{dims[0],dims[1],dims[2]};
|
||||
float* output_ptr = aJOut + (idx ==0?0:aOffset[idx-1]);
|
||||
for (float i = 0; i < max; i++) {
|
||||
output_ptr[0]=convertToLinearIndices(dims3, {roundf(x),roundf(y),roundf(z)});
|
||||
// output_ptr[0]=i;
|
||||
|
||||
output_ptr ++;
|
||||
if (err_0 > 0) {
|
||||
x += x_inc;
|
||||
err_0 -= d;
|
||||
}
|
||||
if (err_1 > 0) {
|
||||
y += y_inc;
|
||||
err_1 -= d;
|
||||
}
|
||||
if (err_2 > 0) {
|
||||
z += z_inc;
|
||||
err_2 -= d;
|
||||
}
|
||||
err_0 += dx2;
|
||||
err_1 += dy2;
|
||||
err_2 += dz2;
|
||||
if (f == 0) x += x_inc;
|
||||
if (f == 1) y += y_inc;
|
||||
if (f == 2) z += z_inc;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void fillIAndSKernel(struct b3dStruct* aInstructs,unsigned int * aOffset,float* aIOut,float* aSOut){
|
||||
b3dStruct& s = aInstructs[blockIdx.x];
|
||||
__shared__ unsigned int beginIdx;
|
||||
__shared__ unsigned int length;
|
||||
__shared__ float weight;
|
||||
|
||||
if (threadIdx.x == 0){
|
||||
beginIdx = (blockIdx.x == 0) ? 0 : aOffset[blockIdx.x - 1];
|
||||
length = s.max;
|
||||
weight = s.weight;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
for (int i = 0; threadIdx.x + i < length; i+=blockDim.x) {
|
||||
aIOut[beginIdx + threadIdx.x + i] = blockIdx.x;
|
||||
aSOut[beginIdx + threadIdx.x + i] = weight;
|
||||
}
|
||||
}
|
||||
|
||||
Recon::BuildMatrixResult Recon::buildMatrix(const Aurora::CudaMatrix &senderList,
|
||||
const Aurora::CudaMatrix &receiverList,
|
||||
Aurora::CudaMatrix& res,
|
||||
const Aurora::CudaMatrix &dims, bool BENT,
|
||||
const Aurora::CudaMatrix &potentialMap){
|
||||
Recon::BuildMatrixResult result;
|
||||
unsigned int numDims = senderList.getDimSize(0);
|
||||
unsigned int nTotalRays = senderList.getDimSize(1);
|
||||
b3dStruct* structList= nullptr;
|
||||
cudaMalloc((void**)&structList, sizeof(b3dStruct)*nTotalRays);
|
||||
int blocksPerGrid = (nTotalRays + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
|
||||
calcLenWeightKernel<<<blocksPerGrid, THREADS_PER_BLOCK>>>(
|
||||
senderList.getData(), receiverList.getData(), res.getData(),nTotalRays,structList);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
unsigned int* offset= nullptr;
|
||||
cudaMalloc((void**)&offset, sizeof(unsigned int)*nTotalRays);
|
||||
calcPathOffsetKernel<<<1,1>>>(structList,offset,nTotalRays);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
unsigned int size;
|
||||
cudaMemcpy(&size, offset+(nTotalRays-1), sizeof(unsigned int), cudaMemcpyDeviceToHost);
|
||||
|
||||
float* iData = nullptr;
|
||||
float* jData = nullptr;
|
||||
float* sData = nullptr;
|
||||
|
||||
cudaMalloc((void**)&iData, sizeof(float)*size);
|
||||
cudaMalloc((void**)&jData, sizeof(float)*size);
|
||||
cudaMalloc((void**)&sData, sizeof(float)*size);
|
||||
|
||||
calcRayBLPathKernel<<<nTotalRays,1>>>(structList, dims.getData(), offset, jData, nTotalRays);
|
||||
fillIAndSKernel<<<nTotalRays,THREADS_PER_BLOCK>>>(structList, offset, iData, sData);
|
||||
cudaDeviceSynchronize();
|
||||
cudaFree(structList);
|
||||
|
||||
auto mI = Aurora::CudaMatrix::fromRawData(iData, 1, size ,1);
|
||||
auto mJ = Aurora::CudaMatrix::fromRawData(jData, 1, size,1);
|
||||
auto mS = Aurora::CudaMatrix::fromRawData(sData, 1, size,1);
|
||||
result.M = Aurora::Sparse(mI.toHostMatrix(), mJ.toHostMatrix(),mS.toHostMatrix(),nTotalRays, Aurora::prod(dims).getValue(0));
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
#ifndef __BUILDMATRIX_CUH__
|
||||
#define __BUILDMATRIX_CUH__
|
||||
#include "buildMatrix.h"
|
||||
namespace Recon {
|
||||
|
||||
BuildMatrixResult buildMatrix(const Aurora::CudaMatrix &senderList,
|
||||
const Aurora::CudaMatrix &receiverList,
|
||||
Aurora::CudaMatrix& res,
|
||||
const Aurora::CudaMatrix &dims, bool BENT,
|
||||
const Aurora::CudaMatrix &potentialMap);
|
||||
}
|
||||
#endif // __BUILDMATRIX_H__
|
||||
@@ -9,13 +9,19 @@
|
||||
#include "CudaEnvInit.h"
|
||||
#include "log/notify.h"
|
||||
#include "transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.h"
|
||||
#include "transmissionReconstruction/reconstruction/buildMatrix/buildMatrix.cuh"
|
||||
#include "src/transmissionReconstruction/reconstruction/solvingEquationSystem/solve.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
#include <future>
|
||||
using namespace Aurora;
|
||||
using solveParameterIteratorFunctionType = std::vector<std::vector<Aurora::Matrix>> (*)(Aurora::Sparse M, Aurora::Matrix &b,
|
||||
const Aurora::Matrix &dims, bool oneIter, bool nonNeg, int aDevice);
|
||||
using slownessToSOSFunctionType = Matrix (*)(Aurora::Matrix & aVF1, float aSOS_IN_WATER);
|
||||
namespace Recon {
|
||||
Aurora::Matrix calculateMinimalMaximalTransducerPositions(
|
||||
const Aurora::Matrix &aMSenderList, const Aurora::Matrix &aMReceiverList) {
|
||||
@@ -189,7 +195,9 @@ namespace Recon {
|
||||
BuildMatrixResult buildMatrixR;
|
||||
for(int iter=1; iter<=numIter; ++iter)
|
||||
{
|
||||
buildMatrixR = buildMatrix(senderList, receiverList, res, dims, bentRecon && (iter!=1), potentialMap);
|
||||
auto resDevice = res.toDeviceMatrix();
|
||||
//1200ms
|
||||
buildMatrixR = buildMatrix(senderList.toDeviceMatrix(), receiverList.toDeviceMatrix(), resDevice, dims.toDeviceMatrix(), bentRecon && (iter!=1), potentialMap.toDeviceMatrix());
|
||||
if(!data.isNull() && bentRecon && iter != numIter)
|
||||
{
|
||||
//与默认配置bentRecon不符,暂不实现todo
|
||||
@@ -222,25 +230,23 @@ namespace Recon {
|
||||
{
|
||||
allHitMaps.push_back(buildMatrixR.hitmap);
|
||||
}
|
||||
#pragma omp parallel for num_threads(2)
|
||||
for (int i =0; i<2; i++){
|
||||
if (i ==0){
|
||||
if(!data.isNull())
|
||||
{
|
||||
Matrix sosValue = solveParameterIterator(buildMatrixR.M, b, dims, false, transParams::nonNeg)[0][0];
|
||||
result.outSOS = slownessToSOS(sosValue, SOS_IN_WATER) ;
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(!dataAtt.isNull())
|
||||
{
|
||||
Matrix attValue = solveParameterIterator(buildMatrixR.M, bAtt, dims, false, transParams::nonNeg,1)[0][0];
|
||||
result.outATT = attValue/100 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!data.isNull())
|
||||
{
|
||||
//1500ms
|
||||
Matrix sosValue = solveParameterIterator(buildMatrixR.M, b, dims, false, transParams::nonNeg)[0][0];
|
||||
//1ms
|
||||
result.outSOS = slownessToSOS(sosValue, SOS_IN_WATER) ;
|
||||
}
|
||||
|
||||
if(!dataAtt.isNull())
|
||||
{
|
||||
//1500ms
|
||||
Matrix attValue = solveParameterIterator(buildMatrixR.M, bAtt, dims, false, transParams::nonNeg)[0][0];
|
||||
//1ms
|
||||
result.outATT = attValue/100 ;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,28 +16,33 @@
|
||||
#include <Spectra/MatOp/SparseGenMatProd.h>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/SparseCore>
|
||||
#include <Eigen/Eigenvalues>
|
||||
|
||||
#include <sys/time.h>
|
||||
namespace Recon
|
||||
{
|
||||
bool isEigsFinished = false;
|
||||
bool isEigsStatus = false;
|
||||
|
||||
double eigs(Aurora::Sparse& aM)
|
||||
float eigs(Aurora::Sparse& aM)
|
||||
{
|
||||
double result = NAN;
|
||||
size_t size = aM.getM();
|
||||
if(size < aM.getN())
|
||||
{
|
||||
size = aM.getN();
|
||||
}
|
||||
Eigen::SparseMatrix<double> M(size, size);
|
||||
std::vector<Eigen::Triplet<double>> triplets;
|
||||
Eigen::SparseMatrix<double> M(size,size);
|
||||
Aurora::Matrix rows = aM.getRowVector();
|
||||
Aurora::Matrix columns = aM.getColVector();
|
||||
Aurora::Matrix values = aM.getValVector();
|
||||
std::vector<Eigen::Triplet<double>> triplets(rows.getDataSize());
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < rows.getDataSize(); ++i)
|
||||
{
|
||||
triplets.push_back(Eigen::Triplet<double>(rows[i], columns[i], values[i]));
|
||||
triplets[i] = Eigen::Triplet<double>(rows[i], columns[i], values[i]);
|
||||
}
|
||||
|
||||
M.setFromTriplets(triplets.begin(), triplets.end());
|
||||
float result;
|
||||
Spectra::SparseGenMatProd<double> op(M);
|
||||
Spectra::GenEigsSolver<Spectra::SparseGenMatProd<double>> eigs(op, 1, 6);
|
||||
eigs.init();
|
||||
@@ -49,14 +54,24 @@ namespace Recon
|
||||
std::complex<double> complex = evalues[0];
|
||||
result = complex.real();
|
||||
}
|
||||
|
||||
isEigsFinished = true;
|
||||
if (result> 1 + 1e-10)
|
||||
{
|
||||
isEigsStatus = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n){
|
||||
bool isreal = M.getValVector().getValueType() == Aurora::Normal;
|
||||
auto s2 = eigs(M);
|
||||
if (s2> 1 + 1e-10)
|
||||
void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n)
|
||||
{
|
||||
float s2 = 0.0;
|
||||
if(!isEigsFinished)
|
||||
{
|
||||
s2 = eigs(M);
|
||||
return;;
|
||||
}
|
||||
|
||||
if (isEigsStatus)
|
||||
{
|
||||
b = b / sqrt(s2);
|
||||
M.getValVector() = M.getValVector() / sqrt( s2);
|
||||
@@ -65,23 +80,23 @@ namespace Recon
|
||||
|
||||
Aurora::Matrix callTval3(Aurora::Sparse& M, Aurora::Matrix& b,const Aurora::Matrix& dims,int device, TVALOptions& opt)
|
||||
{
|
||||
checkAndScale(M,b,(size_t)Aurora::prod(dims).getScalar());
|
||||
checkAndScale(M,b,(size_t)Aurora::prod(dims).getScalar());
|
||||
int * yIdxs = new int[M.getColVector().getDataSize()];
|
||||
std::copy(M.getColVector().getData(),M.getColVector().getData()+M.getColVector().getDataSize(),yIdxs);
|
||||
int * xIdxs = new int[M.getRowVector().getDataSize()];
|
||||
std::copy(M.getRowVector().getData(),M.getRowVector().getData()+M.getRowVector().getDataSize(),xIdxs);
|
||||
Aurora::Matrix values = M.getValVector();
|
||||
size_t cols = M.getM(), rows = M.getN();
|
||||
int nz = std::max(M.getColVector().getDataSize(),std::max(M.getRowVector().getDataSize(),M.getValVector().getDataSize()));
|
||||
size_t cols = M.getM(), rows = M.getN();
|
||||
int nz = std::max(M.getColVector().getDataSize(),std::max(M.getRowVector().getDataSize(),M.getValVector().getDataSize()));
|
||||
sparse_matrix_t A;
|
||||
sparse_matrix_t csrA;
|
||||
mkl_sparse_s_create_coo(&A, sparse_index_base_t::SPARSE_INDEX_BASE_ZERO, rows, cols, nz, yIdxs, xIdxs,values.getData());
|
||||
mkl_sparse_convert_csr(A, sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, &csrA);
|
||||
mkl_sparse_s_create_coo(&A, sparse_index_base_t::SPARSE_INDEX_BASE_ZERO, rows, cols, nz, yIdxs, xIdxs,values.getData());
|
||||
mkl_sparse_convert_csr(A, sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, &csrA);
|
||||
int n_rows,n_cols;
|
||||
int *rows_start,*rows_end,*col_indx;
|
||||
float * csrValues;
|
||||
sparse_index_base_t index;
|
||||
mkl_sparse_s_export_csr(csrA, &index, &n_rows, &n_cols, &rows_start, &rows_end, &col_indx, &csrValues);
|
||||
mkl_sparse_s_export_csr(csrA, &index, &n_rows, &n_cols, &rows_start, &rows_end, &col_indx, &csrValues);
|
||||
mkl_sparse_destroy(A);
|
||||
delete [] xIdxs;
|
||||
delete [] yIdxs;
|
||||
@@ -101,4 +116,4 @@ namespace Recon
|
||||
return Aurora::Matrix::fromRawData(result.data, result.dims[0],result.dims[1],result.dims[2]);
|
||||
// return Aurora::Matrix();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ namespace Recon {
|
||||
|
||||
void checkAndScale(Aurora::Sparse& M, Aurora::Matrix& b,size_t n);
|
||||
|
||||
double eigs(Aurora::Sparse& aM);
|
||||
float eigs(Aurora::Sparse& aM);
|
||||
|
||||
Aurora::Matrix callTval3(Aurora::Sparse &M, Aurora::Matrix &b,
|
||||
const Aurora::Matrix &dims, int device,
|
||||
|
||||
@@ -22,7 +22,7 @@ namespace Recon
|
||||
int deviceIndex ;
|
||||
};
|
||||
|
||||
Aurora::Matrix solve( Aurora::Sparse& M, Aurora::Matrix& b, const Aurora::Matrix& dims, int niter, TVAL3SolverOptions solverOptions){
|
||||
Aurora::Matrix solve( Aurora::Sparse& M, Aurora::Matrix& b, const Aurora::Matrix& dims, int niter, TVAL3SolverOptions solverOptions, int aDevice){
|
||||
if (Recon::transParams::name.empty()){
|
||||
Recon::transParams::name = "TVAL3";
|
||||
}
|
||||
@@ -40,7 +40,7 @@ namespace Recon
|
||||
opt.mu = solverOptions.TVAL3MU;
|
||||
opt.beta = solverOptions.TVAL3Beta;
|
||||
opt.beta0 = solverOptions.TVAL3Beta0;
|
||||
int device = (int)solverOptions.gpuSelectionList[solverOptions.deviceIndex];
|
||||
int device = aDevice;
|
||||
return callTval3(M, b, dims, device, opt);
|
||||
}
|
||||
//SART
|
||||
@@ -51,7 +51,7 @@ namespace Recon
|
||||
}
|
||||
|
||||
std::vector<std::vector<Aurora::Matrix>> solveParameterIterator(Aurora::Sparse M, Aurora::Matrix &b,
|
||||
const Aurora::Matrix &dims, bool oneIter, bool nonNeg, int index)
|
||||
const Aurora::Matrix &dims, bool oneIter, bool nonNeg, int aDevice)
|
||||
{
|
||||
if (Recon::transParams::name == "TVAL3"){
|
||||
std::vector<std::vector<Aurora::Matrix>> result(Recon::transParams::muValues.getDataSize());
|
||||
@@ -83,7 +83,7 @@ namespace Recon
|
||||
options.TVAL3Beta = Recon::transParams::betaValues[i];
|
||||
options.TVAL3Beta0 = Recon::transParams::betaValues[i];
|
||||
options.nonNeg = nonNeg;
|
||||
solveResult[j] = solve(M, b, dims, transParams::maxIter, options);
|
||||
solveResult[j] = solve(M, b, dims, transParams::maxIter, options, aDevice);
|
||||
solveResult[j].forceReshape(dims[0], dims[1], dims.getDataSize()<3?1:dims[2]);
|
||||
}
|
||||
result[i] = solveResult;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
namespace Recon {
|
||||
std::vector<std::vector<Aurora::Matrix>>
|
||||
solveParameterIterator(Aurora::Sparse M, Aurora::Matrix &b,
|
||||
const Aurora::Matrix &dims, bool oneIter = true, bool nonNeg = false,int index=0);
|
||||
const Aurora::Matrix &dims, bool oneIter = true, bool nonNeg = false, int aDevice = 0);
|
||||
} // namespace Recon
|
||||
|
||||
#endif // __SOLVE_H__
|
||||
@@ -1,6 +1,7 @@
|
||||
#include "startTransmissionReconstruction.h"
|
||||
#include "./detection/getTransmissionData.h"
|
||||
#include "Matrix.h"
|
||||
#include "CudaMatrix.h"
|
||||
#include "log/log.h"
|
||||
#include "common/dataBlockCreation/removeDataFromArrays.h"
|
||||
#include "log/notify.h"
|
||||
@@ -29,22 +30,22 @@ TransmissionReconstructionResult Recon::startTransmissionReconstruction(const Au
|
||||
aGeom, aGeomRef, aExpInfo, aExpInfoRef, aPreComputes, aParser, aParserRef);
|
||||
Matrix dists = Recon::distanceBetweenTwoPoints(transmissionData.senderList, transmissionData.receiverList);
|
||||
Matrix sosRef = Recon::temperatureToSoundSpeed(transmissionData.waterTempList, "marczak");
|
||||
Recon::notifyProgress(17);
|
||||
//Recon::notifyProgress(17);
|
||||
|
||||
Matrix valid = Recon::checkTofDetections(transmissionData.tofDataTotal, dists, sosRef,
|
||||
Recon::transParams::minSpeedOfSound,Recon::transParams::maxSpeedOfSound).valid;
|
||||
Recon::notifyProgress(18);
|
||||
//Recon::notifyProgress(18);
|
||||
if(transParams::qualityCheck)
|
||||
{
|
||||
qualityReview(sum(valid,Aurora::All)[0], transmissionData.dataInfo.numPossibleScans);
|
||||
}
|
||||
Recon::notifyProgress(19);
|
||||
//Recon::notifyProgress(19);
|
||||
DiscretizePositionValues positionValues = Recon::discretizePositions(transmissionData.senderList, transmissionData.receiverList, Recon::transParams::numPixelXY);
|
||||
Matrix tofData = removeDataFromArrays(transmissionData.tofDataTotal, valid);
|
||||
Matrix attData = removeDataFromArrays(transmissionData.attDataTotal, valid);
|
||||
Matrix senderList = removeDataFromArrays(positionValues.senderCoordList, valid);
|
||||
Matrix reveiverList = removeDataFromArrays(positionValues.receiverCoordList, valid);
|
||||
Recon::notifyProgress(20);
|
||||
//Recon::notifyProgress(20);
|
||||
RECON_INFO("Start reconstructArt.");
|
||||
auto transmissionReon = reconstructArt(tofData, attData, positionValues.dims, senderList, reveiverList, positionValues.res, aTemp.expectedSOSWater[0]);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user