Commit source
This commit is contained in:
424
eikonalGPU/src/StructuredEikonal.cu
Normal file
424
eikonalGPU/src/StructuredEikonal.cu
Normal file
@@ -0,0 +1,424 @@
|
||||
#include "StructuredEikonal.h"
|
||||
#include "omp.h"
|
||||
|
||||
StructuredEikonal::StructuredEikonal(bool verbose)
|
||||
:verbose_(verbose), isCudaMemCreated_(false),
|
||||
width_(256), height_(256), depth_(256),
|
||||
itersPerBlock_(10), solverType_(0) {
|
||||
cudaGetDevice(&gpuDevice_);
|
||||
}
|
||||
|
||||
StructuredEikonal::~StructuredEikonal() {}
|
||||
|
||||
void StructuredEikonal::writeNRRD(std::string filename) {
|
||||
std::fstream out(filename.c_str(), std::ios::out | std::ios::binary);
|
||||
out << "NRRD0001\n";
|
||||
out << "# Complete NRRD file format specification at:\n";
|
||||
out << "# http://teem.sourceforge.net/nrrd/format.html\n";
|
||||
out << "type: double\n";
|
||||
out << "dimension: 3\n";
|
||||
out << "sizes: " << this->width_ << " " << this->height_ << " " << this->depth_ << "\n";
|
||||
out << "endian: little\n";
|
||||
out << "encoding: raw\n\n";
|
||||
for(size_t k = 0; k < this->depth_; k++) {
|
||||
for(size_t j = 0; j < this->height_; j++) {
|
||||
for(size_t i = 0; i < this->width_; i++) {
|
||||
double d = this->answer_[i][j][k];
|
||||
out.write(reinterpret_cast<const char*>(&d),sizeof(double));
|
||||
}
|
||||
}
|
||||
}
|
||||
out.close();
|
||||
}
|
||||
|
||||
void StructuredEikonal::setDims(size_t x, size_t y, size_t z) {
|
||||
this->width_ = x;
|
||||
this->height_ = y;
|
||||
this->depth_ = z;
|
||||
}
|
||||
|
||||
void StructuredEikonal::error(char* msg) {
|
||||
printf("%s\n",msg);
|
||||
assert(false);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void StructuredEikonal::CheckCUDAMemory() {
|
||||
size_t freeMem, totalMem;
|
||||
cudaMemGetInfo(&freeMem, &totalMem);
|
||||
if (this->verbose_) {
|
||||
std::cout << "Total Memory : " << totalMem / (1024 * 1024) << "MB" << std::endl;
|
||||
std::cout << "Free Memory : " << freeMem / (1024 * 1024) << "MB" << std::endl;
|
||||
std::cout << "--" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void StructuredEikonal::init_cuda_mem() {
|
||||
assert(this->width_ > 0 && this->height_ > 0 && this->depth_ > 0);
|
||||
if(this->width_ <= 0 || this->height_ <= 0 || this->depth_ <= 0){
|
||||
printf("Volume dimension cannot be zero");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
this->CheckCUDAMemory();
|
||||
|
||||
// 1. Create /initialize GPU memory
|
||||
size_t nx, ny, nz;
|
||||
|
||||
nx = this->width_ + (BLOCK_LENGTH-this->width_%BLOCK_LENGTH)%BLOCK_LENGTH;
|
||||
ny = this->height_ + (BLOCK_LENGTH-this->height_%BLOCK_LENGTH)%BLOCK_LENGTH;
|
||||
nz = this->depth_ + (BLOCK_LENGTH-this->depth_%BLOCK_LENGTH)%BLOCK_LENGTH;
|
||||
if (this->verbose_) {
|
||||
printf("%d %d %d \n",nx,ny,nz);
|
||||
printf("BLOCK_LENGTH = %d\n",BLOCK_LENGTH);
|
||||
printf("width/height/depth = %d %d %d \n",this->width_,this->height_,this->depth_);
|
||||
}
|
||||
|
||||
auto volSize = nx*ny*nz;
|
||||
auto blkSize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
|
||||
|
||||
auto nBlkX = nx / BLOCK_LENGTH;
|
||||
auto nBlkY = ny / BLOCK_LENGTH;
|
||||
auto nBlkZ = nz / BLOCK_LENGTH;
|
||||
auto blockNum = nBlkX*nBlkY*nBlkZ;
|
||||
|
||||
this->memoryStruct_.xdim = static_cast<int>(nx);
|
||||
this->memoryStruct_.ydim = static_cast<int>(ny);
|
||||
this->memoryStruct_.zdim = static_cast<int>(nz);
|
||||
this->memoryStruct_.volsize = static_cast<uint>(volSize);
|
||||
this->memoryStruct_.blksize = static_cast<uint>(blkSize);
|
||||
this->memoryStruct_.blklength = BLOCK_LENGTH;
|
||||
this->memoryStruct_.blknum = static_cast<uint>(blockNum);
|
||||
this->memoryStruct_.nIter = static_cast<int>(this->itersPerBlock_); // iter per block
|
||||
|
||||
if(this->isCudaMemCreated_) // delete previous memory
|
||||
{
|
||||
free((DOUBLE*)this->memoryStruct_.h_sol);
|
||||
free((uint*)this->memoryStruct_.h_list);
|
||||
free((bool*)this->memoryStruct_.h_listed);
|
||||
free((bool*)this->memoryStruct_.h_listVol);
|
||||
free((int*)this->memoryStruct_.blockOrder);
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_spd) );
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_sol) );
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.t_sol) ); // temp solution for ping-pong
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_con) ); // convergence volume
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_list) );
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_listVol) );
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_mask) );
|
||||
}
|
||||
this->isCudaMemCreated_ = true;
|
||||
|
||||
this->memoryStruct_.h_sol = (DOUBLE*) malloc(volSize*sizeof(DOUBLE)); // initial solution
|
||||
this->memoryStruct_.h_list = (uint*) malloc(blockNum*sizeof(uint)); // linear list contains active block indices
|
||||
this->memoryStruct_.h_listed = (bool*) malloc(blockNum*sizeof(bool)); // whether block is added to the list
|
||||
this->memoryStruct_.h_listVol = (bool*) malloc(blockNum*sizeof(bool)); // volume list shows active/nonactive of corresponding block
|
||||
this->memoryStruct_.blockOrder = (int*) malloc(blockNum*sizeof(int));
|
||||
|
||||
this->CheckCUDAMemory();
|
||||
|
||||
//
|
||||
// create host/device memory using CUDA mem functions
|
||||
//
|
||||
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_spd), volSize*sizeof(double)) );
|
||||
this->CheckCUDAMemory();
|
||||
|
||||
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_sol), volSize*sizeof(DOUBLE)) );
|
||||
this->CheckCUDAMemory();
|
||||
|
||||
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.t_sol), volSize*sizeof(DOUBLE)) ); // temp solution for ping-pong
|
||||
this->CheckCUDAMemory();
|
||||
|
||||
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_con), volSize*sizeof(bool)) ); // convergence volume
|
||||
this->CheckCUDAMemory();
|
||||
|
||||
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_list), blockNum*sizeof(uint)) );
|
||||
this->CheckCUDAMemory();
|
||||
|
||||
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_listVol), blockNum*sizeof(bool)) );
|
||||
this->CheckCUDAMemory();
|
||||
|
||||
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_mask), volSize*sizeof(bool)) );
|
||||
this->CheckCUDAMemory();
|
||||
}
|
||||
|
||||
|
||||
void StructuredEikonal::freeMemory() {
|
||||
free((DOUBLE*)this->memoryStruct_.h_sol);
|
||||
free((uint*)this->memoryStruct_.h_list);
|
||||
free((bool*)this->memoryStruct_.h_listed);
|
||||
free((bool*)this->memoryStruct_.h_listVol);
|
||||
free((int*)this->memoryStruct_.blockOrder);
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_spd) );
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_sol) );
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.t_sol) ); // temp solution for ping-pong
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_con) ); // convergence volume
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_list) );
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_listVol) );
|
||||
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_mask) );
|
||||
}
|
||||
|
||||
void StructuredEikonal::set_attribute_mask() {
|
||||
uint volSize = this->memoryStruct_.volsize;
|
||||
|
||||
int nx, ny, nz, blklength;
|
||||
|
||||
nx = memoryStruct_.xdim;
|
||||
ny = memoryStruct_.ydim;
|
||||
nz = memoryStruct_.zdim;
|
||||
blklength = memoryStruct_.blklength;
|
||||
|
||||
// create host memory
|
||||
double *h_spd = new double[volSize]; // byte speed, host
|
||||
bool *h_mask = new bool[volSize];
|
||||
|
||||
// copy input volume to host memory
|
||||
// make each block to be stored contiguously in 1D memory space
|
||||
uint idx = 0;
|
||||
for(int zStr = 0; zStr < nz; zStr += blklength) {
|
||||
for(int yStr = 0; yStr < ny; yStr += blklength) {
|
||||
for(int xStr = 0; xStr < nx; xStr += blklength) {
|
||||
// for each block
|
||||
for(int z=zStr; z<zStr+blklength; z++) {
|
||||
for(int y=yStr; y<yStr+blklength; y++) {
|
||||
for(int x=xStr; x<xStr+blklength; x++) {
|
||||
if(x<width_ && y<height_ && z<depth_) {
|
||||
h_spd[idx] = this->speeds_[x][y][z];
|
||||
h_mask[idx] = true;
|
||||
} else {
|
||||
h_spd[idx] = speeds_[0][0][0];
|
||||
h_mask[idx] = false;
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// initialize GPU memory with host memory
|
||||
CUDA_SAFE_CALL( cudaMemcpy(memoryStruct_.d_spd, h_spd, volSize*sizeof(double), cudaMemcpyHostToDevice) );
|
||||
CUDA_SAFE_CALL( cudaMemcpy(memoryStruct_.d_mask, h_mask, volSize*sizeof(bool), cudaMemcpyHostToDevice) );
|
||||
|
||||
delete[] h_spd;
|
||||
delete[] h_mask;
|
||||
}
|
||||
|
||||
void StructuredEikonal::initialization() {
|
||||
// get / set CUDA device
|
||||
int deviceCount;
|
||||
cudaGetDeviceCount(&deviceCount);
|
||||
int device;
|
||||
for(device = 0; device < deviceCount; ++device) {
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, device);
|
||||
}
|
||||
|
||||
device = this->gpuDevice_;
|
||||
cudaSetDevice(device);
|
||||
|
||||
this->CheckCUDAMemory();
|
||||
this->init_cuda_mem();
|
||||
this->set_attribute_mask();
|
||||
this->CheckCUDAMemory();
|
||||
}
|
||||
|
||||
void StructuredEikonal::map_generator() {
|
||||
double pi = 3.141592653589793238462643383;
|
||||
this->speeds_ = std::vector<std::vector<std::vector<double> > >(
|
||||
this->width_, std::vector<std::vector<double> >(
|
||||
this->height_, std::vector<double>(this->depth_,1.)));
|
||||
switch(this->solverType_){
|
||||
case 0 :
|
||||
//Constant Speed Map
|
||||
break;
|
||||
case 1 :
|
||||
//Sinusoid Speed Map
|
||||
for (int k = 0 ; k < this->depth_ ; ++k) {
|
||||
for (int j = 0 ; j < this->height_; ++j) {
|
||||
for ( int i = 0 ; i < this->width_ ; ++i) {
|
||||
this->speeds_[i][j][k] =
|
||||
(6 + 5*(sin((i*pi)/this->width_ *2))*
|
||||
sin((j*pi)/this->height_*2)*
|
||||
sin((k*pi)/this->depth_*2));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void StructuredEikonal::setGpuDevice(int device){
|
||||
this->gpuDevice_=device;
|
||||
}
|
||||
|
||||
|
||||
void StructuredEikonal::setSeeds(std::vector<std::array<size_t, 3> > seeds) {
|
||||
this->seeds_ = seeds;
|
||||
}
|
||||
|
||||
void StructuredEikonal::setSpeeds(std::vector<std::vector<std::vector<double> > > speeds) {
|
||||
this->speeds_ = speeds;
|
||||
}
|
||||
|
||||
|
||||
void StructuredEikonal::useSeeds() {
|
||||
if (this->verbose_) {
|
||||
std::cout << "Loading seed volume..." << std::endl;
|
||||
}
|
||||
uint volSize, blockNum;
|
||||
int nx, ny, nz, blklength;
|
||||
|
||||
nx = this->memoryStruct_.xdim;
|
||||
ny = this->memoryStruct_.ydim;
|
||||
nz = this->memoryStruct_.zdim;
|
||||
volSize = this->memoryStruct_.volsize;
|
||||
blklength = this->memoryStruct_.blklength;
|
||||
blockNum = this->memoryStruct_.blknum;
|
||||
|
||||
// copy input volume to host memory
|
||||
// make each block to be stored contiguously in 1D memory space
|
||||
uint idx = 0;
|
||||
uint blk_idx = 0;
|
||||
uint list_idx = 0;
|
||||
uint nActiveBlock = 0;
|
||||
//#pragma omp parallel num_threads(8)
|
||||
//{
|
||||
//#pragma omp for
|
||||
|
||||
|
||||
//printf("vol size = %i\n",volSize);
|
||||
|
||||
for(int zStr = 0; zStr < nz; zStr += blklength) {
|
||||
for(int yStr = 0; yStr < ny; yStr += blklength) {
|
||||
for(int xStr = 0; xStr < nx; xStr += blklength) {
|
||||
// for each block
|
||||
bool isSeedBlock = false;
|
||||
|
||||
for(int z=zStr; z<zStr+blklength; z++) {
|
||||
for(int y=yStr; y<yStr+blklength; y++) {
|
||||
for(int x=xStr; x<xStr+blklength; x++) {
|
||||
//printf("idx = %i\n",idx);
|
||||
|
||||
this->memoryStruct_.h_sol[idx] = INF;
|
||||
|
||||
if (this->seeds_.empty()) {
|
||||
if (x == nx/2 && y == ny/2 && z == nz/2) {
|
||||
this->memoryStruct_.h_sol[idx] = 0;
|
||||
isSeedBlock = true;
|
||||
if (this->verbose_) {
|
||||
printf("%d is Selected bt source (if) \n",idx);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(size_t i = 0; i < this->seeds_.size(); i++) {
|
||||
if (this->seeds_[i][0] == x &&
|
||||
this->seeds_[i][1] == y &&
|
||||
this->seeds_[i][2] == z) {
|
||||
this->memoryStruct_.h_sol[idx] = 0;
|
||||
isSeedBlock = true;
|
||||
if (this->verbose_) {
|
||||
printf("%d is Selected bt source (else) \n",idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
///////////////////////////////////////////////
|
||||
if(isSeedBlock) {
|
||||
if (this->verbose_) {
|
||||
printf("%d,%d,%d is Seed Block \n",zStr,yStr,xStr);
|
||||
}
|
||||
this->memoryStruct_.h_listVol[blk_idx] = true;
|
||||
this->memoryStruct_.h_listed[blk_idx] = true;
|
||||
this->memoryStruct_.h_list[list_idx] = blk_idx;
|
||||
list_idx++;
|
||||
nActiveBlock++;
|
||||
} else {
|
||||
this->memoryStruct_.h_listVol[blk_idx] = false;
|
||||
this->memoryStruct_.h_listed[blk_idx] = false;
|
||||
}
|
||||
blk_idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
//}
|
||||
this->memoryStruct_.nActiveBlock = nActiveBlock;
|
||||
// initialize GPU memory with host memory
|
||||
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.d_sol, this->memoryStruct_.h_sol, volSize*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
|
||||
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.t_sol, this->memoryStruct_.h_sol, volSize*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
|
||||
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.d_list, this->memoryStruct_.h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
|
||||
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.d_listVol, this->memoryStruct_.h_listVol, blockNum*sizeof(bool), cudaMemcpyHostToDevice) );
|
||||
// initialize GPU memory with constant value
|
||||
CUDA_SAFE_CALL( cudaMemset(this->memoryStruct_.d_con, 1, volSize*sizeof(bool)) );
|
||||
}
|
||||
|
||||
void StructuredEikonal::setMapType(size_t t) {
|
||||
this->solverType_ = t;
|
||||
}
|
||||
|
||||
void StructuredEikonal::solveEikonal() {
|
||||
if (this->speeds_.empty()) {
|
||||
this->map_generator();
|
||||
}
|
||||
this->isCudaMemCreated_ = false;
|
||||
this->initialization();
|
||||
this->useSeeds();
|
||||
runEikonalSolverSimple(this->memoryStruct_, this->verbose_);
|
||||
this->get_solution();
|
||||
this->freeMemory();
|
||||
}
|
||||
|
||||
std::vector< std::vector< std::vector<double> > >
|
||||
StructuredEikonal::getFinalResult() {
|
||||
return this->answer_;
|
||||
}
|
||||
|
||||
void StructuredEikonal::get_solution() {
|
||||
// copy solution from GPU
|
||||
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.h_sol,
|
||||
this->memoryStruct_.d_sol, this->memoryStruct_.volsize*sizeof(DOUBLE),
|
||||
cudaMemcpyDeviceToHost) );
|
||||
//put the data where it belongs in the grand scheme of data!
|
||||
this->answer_ = std::vector<std::vector<std::vector<double> > >(
|
||||
this->width_, std::vector<std::vector<double> >(
|
||||
this->height_, std::vector<double>(this->depth_,0)));
|
||||
|
||||
//#pragma omp parallel num_threads(8)
|
||||
//{
|
||||
//#pragma omp for
|
||||
for(size_t blockID = 0; blockID < this->memoryStruct_.blknum; blockID++) {
|
||||
size_t baseAddr = blockID * this->memoryStruct_.blksize;
|
||||
size_t xgridlength = this->memoryStruct_.xdim/BLOCK_LENGTH;
|
||||
size_t ygridlength = this->memoryStruct_.ydim/BLOCK_LENGTH;
|
||||
// compute block index
|
||||
size_t bx = blockID%xgridlength;
|
||||
size_t tmpIdx = (blockID - bx)/xgridlength;
|
||||
size_t by = tmpIdx%ygridlength;
|
||||
size_t bz = (tmpIdx-by)/ygridlength;
|
||||
//translate back to real space
|
||||
for(int k = 0; k < BLOCK_LENGTH; k++) {
|
||||
for(int j = 0; j < BLOCK_LENGTH; j++) {
|
||||
for(int i = 0; i < BLOCK_LENGTH; i++) {
|
||||
double d = this->memoryStruct_.h_sol[baseAddr +
|
||||
k * BLOCK_LENGTH * BLOCK_LENGTH +
|
||||
j * BLOCK_LENGTH + i];
|
||||
if ((i + bx * BLOCK_LENGTH) < this->width_ &&
|
||||
(j + by * BLOCK_LENGTH) < this->height_ &&
|
||||
(k + bz * BLOCK_LENGTH) < this->depth_) {
|
||||
this->answer_[(i + bx * BLOCK_LENGTH)][(j +
|
||||
by * BLOCK_LENGTH)][k + bz * BLOCK_LENGTH] = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//}
|
||||
}
|
||||
|
||||
void StructuredEikonal::setItersPerBlock(size_t t) {
|
||||
this->itersPerBlock_ = t;
|
||||
}
|
||||
55
eikonalGPU/src/StructuredEikonal.h
Normal file
55
eikonalGPU/src/StructuredEikonal.h
Normal file
@@ -0,0 +1,55 @@
|
||||
#ifndef __STRUCTUREDEIKONAL_H__
|
||||
#define __STRUCTUREDEIKONAL_H__
|
||||
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include "common_def.h"
|
||||
#include "cuda_fim.h"
|
||||
|
||||
/** The class that represents all of the available options for StructuredEikonal */
|
||||
class StructuredEikonal {
|
||||
public:
|
||||
StructuredEikonal(bool verbose = false);
|
||||
virtual ~StructuredEikonal();
|
||||
void setDims(size_t w, size_t h, size_t d);
|
||||
void setMapType(size_t t);
|
||||
void setItersPerBlock(size_t t);
|
||||
void setSpeeds(std::vector<std::vector<std::vector<double> > > speed);
|
||||
void setSeeds(std::vector<std::array<size_t, 3> > seeds);
|
||||
void setGpuDevice(int device);
|
||||
void writeNRRD(std::string filename);
|
||||
void freeMemory();
|
||||
std::vector< std::vector< std::vector<double> > > getFinalResult();
|
||||
/**
|
||||
* Runs the algorithm.
|
||||
*
|
||||
* @data The set of options for the Eikonal algorithm.
|
||||
* The defaults are used if nothing is provided.
|
||||
*/
|
||||
void solveEikonal();
|
||||
//public member for answer
|
||||
std::vector<std::vector<std::vector<double> > > answer_;
|
||||
|
||||
private:
|
||||
void error(char* msg);
|
||||
void CheckCUDAMemory();
|
||||
void init_cuda_mem();
|
||||
void set_attribute_mask();
|
||||
void initialization();
|
||||
void map_generator();
|
||||
void get_solution();
|
||||
void useSeeds();
|
||||
//data
|
||||
bool verbose_;
|
||||
bool isCudaMemCreated_;
|
||||
size_t width_, height_, depth_;
|
||||
size_t itersPerBlock_, solverType_;
|
||||
int gpuDevice_;
|
||||
std::vector<std::array<size_t, 3> > seeds_;
|
||||
CUDAMEMSTRUCT memoryStruct_;
|
||||
std::vector<std::vector<std::vector<double> > > speeds_;
|
||||
};
|
||||
|
||||
#endif
|
||||
78
eikonalGPU/src/common_def.h
Normal file
78
eikonalGPU/src/common_def.h
Normal file
@@ -0,0 +1,78 @@
|
||||
//
|
||||
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
|
||||
//
|
||||
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
|
||||
//
|
||||
// 2016. 2. 4
|
||||
//
|
||||
|
||||
//
|
||||
// Common to entire project
|
||||
//
|
||||
|
||||
#ifndef __COMMON_DEF_H__
|
||||
#define __COMMON_DEF_H__
|
||||
|
||||
#include "float.h"
|
||||
#include "math.h"
|
||||
#include "helper_timer.h"
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <assert.h>
|
||||
//
|
||||
// common definition for Eikonal solvers
|
||||
//
|
||||
#ifndef INF
|
||||
#define INF 1e20//FLT_MAX //
|
||||
#endif
|
||||
|
||||
#define BLOCK_LENGTH 4
|
||||
|
||||
#ifndef FLOAT
|
||||
|
||||
#define DOUBLE double
|
||||
#define EPS (DOUBLE)1e-16
|
||||
|
||||
#else
|
||||
|
||||
#define DOUBLE float
|
||||
#define EPS (DOUBLE)1e-6
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// itk image volume definition for 3D anisotropic eikonal solvers
|
||||
//
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
struct CUDA_MEM_STRUCTURE {
|
||||
// volsize/blksize : # of pixel in volume/block
|
||||
// blknum : # of block
|
||||
// blklength : # of pixel in one dimemsion of block
|
||||
uint nActiveBlock, blknum, volsize, blksize;
|
||||
int xdim, ydim, zdim, nIter, blklength; // new new x,y,z dim to aligh power of 4
|
||||
|
||||
// host memory
|
||||
uint *h_list;
|
||||
bool *h_listVol, *h_listed;
|
||||
|
||||
// device memory
|
||||
uint *d_list;
|
||||
double *d_spd;
|
||||
bool *d_mask, *d_listVol, *d_con;
|
||||
|
||||
DOUBLE *h_sol;//h_speedtable[256];
|
||||
DOUBLE *d_sol, *t_sol;
|
||||
|
||||
// GroupOrder
|
||||
int* blockOrder;
|
||||
int K;
|
||||
};
|
||||
|
||||
typedef struct CUDA_MEM_STRUCTURE CUDAMEMSTRUCT;
|
||||
|
||||
void CUT_SAFE_CALL(cudaError_t error);
|
||||
void CUDA_SAFE_CALL(cudaError_t error);
|
||||
|
||||
#endif
|
||||
52
eikonalGPU/src/cuda_compute_capability.c
Normal file
52
eikonalGPU/src/cuda_compute_capability.c
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (C) 2011 Florian Rathgeber, florian.rathgeber@gmail.com
|
||||
*
|
||||
* This code is licensed under the MIT License. See the FindCUDA.cmake script
|
||||
* for the text of the license.
|
||||
*
|
||||
* Based on code by Christopher Bruns published on Stack Overflow (CC-BY):
|
||||
* http://stackoverflow.com/questions/2285185
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
int main() {
|
||||
int deviceCount, device, major = 9999, minor = 9999;
|
||||
int gpuDeviceCount = 0;
|
||||
struct cudaDeviceProp properties;
|
||||
|
||||
if (cudaGetDeviceCount(&deviceCount) != cudaSuccess)
|
||||
{
|
||||
printf("Couldn't get device count: %s\n", cudaGetErrorString(cudaGetLastError()));
|
||||
return 1;
|
||||
}
|
||||
/* machines with no GPUs can still report one emulation device */
|
||||
for (device = 0; device < deviceCount; ++device) {
|
||||
cudaGetDeviceProperties(&properties, device);
|
||||
if (properties.major != 9999) {/* 9999 means emulation only */
|
||||
++gpuDeviceCount;
|
||||
/* get minimum compute capability of all devices */
|
||||
if (major > properties.major) {
|
||||
major = properties.major;
|
||||
minor = properties.minor;
|
||||
} else if (minor > properties.minor) {
|
||||
minor = properties.minor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* don't just return the number of gpus, because other runtime cuda
|
||||
errors can also yield non-zero return values */
|
||||
if (gpuDeviceCount > 0) {
|
||||
if (major == 2 && minor == 1)
|
||||
{
|
||||
// There is no --arch compute_21 flag for nvcc, so force minor to 0
|
||||
minor = 0;
|
||||
}
|
||||
/* this output will be parsed by FindCUDA.cmake */
|
||||
printf("%d%d", major, minor);
|
||||
return 0; /* success */
|
||||
}
|
||||
return 1; /* failure */
|
||||
}
|
||||
326
eikonalGPU/src/cuda_fim.cu
Normal file
326
eikonalGPU/src/cuda_fim.cu
Normal file
@@ -0,0 +1,326 @@
|
||||
//
|
||||
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
|
||||
//
|
||||
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
|
||||
//
|
||||
// 2016. 2. 4
|
||||
//
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <float.h>
|
||||
#include <assert.h>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "cuda_fim_kernel.h"
|
||||
#include "cuda_fim.h"
|
||||
|
||||
void CUT_SAFE_CALL(cudaError_t error) {
|
||||
if(error != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error! %d \n",error);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void CUDA_SAFE_CALL(cudaError_t error) {
|
||||
CUT_SAFE_CALL(error);
|
||||
}
|
||||
|
||||
void runEikonalSolverSimple(CUDAMEMSTRUCT &cmem, bool verbose)
|
||||
{
|
||||
int deviceID;
|
||||
cudaGetDevice(&deviceID);
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, deviceID);
|
||||
if (verbose) {
|
||||
printf("Current device id : %d, name : %s\n", deviceID, deviceProp.name);
|
||||
}
|
||||
|
||||
int xdim, ydim, zdim;
|
||||
xdim = cmem.xdim;
|
||||
ydim = cmem.ydim;
|
||||
zdim = cmem.zdim;
|
||||
|
||||
#ifdef __DEVICE_EMULATION__
|
||||
assert(xdim%BLOCK_LENGTH == 0);
|
||||
assert(ydim%BLOCK_LENGTH == 0);
|
||||
assert(zdim%BLOCK_LENGTH == 0);
|
||||
#endif
|
||||
|
||||
// create volumes
|
||||
uint volSize = cmem.volsize;
|
||||
uint blockNum = cmem.blknum;
|
||||
|
||||
//int nBlkZ = zdim/BLOCK_LENGTH;
|
||||
|
||||
if (verbose) {
|
||||
printf("# of total voxels : %d\n", volSize);
|
||||
printf("# of total blocks : %d\n", blockNum);
|
||||
}
|
||||
|
||||
// h_ : host memory, d_ : device memory
|
||||
|
||||
// copy speed table to constant variable
|
||||
//cudaMemcpyToSymbol((const char*)_speed, cmem.h_speedtable, sizeof(float)*256);
|
||||
|
||||
int nIter = cmem.nIter;
|
||||
uint nActiveBlock = cmem.nActiveBlock; // active list
|
||||
|
||||
double *d_spd = cmem.d_spd;
|
||||
DOUBLE *d_sol = cmem.d_sol;
|
||||
DOUBLE *t_sol = cmem.t_sol;
|
||||
|
||||
uint *d_list = cmem.d_list;
|
||||
bool *d_listVol = cmem.d_listVol;
|
||||
|
||||
bool *d_con = cmem.d_con;
|
||||
bool *d_mask = cmem.d_mask;
|
||||
|
||||
// copy so that original value should not be modified
|
||||
uint *h_list = (uint*) malloc(blockNum*sizeof(uint));
|
||||
bool *h_listed = (bool*) malloc(blockNum*sizeof(bool));
|
||||
bool *h_listVol = (bool*) malloc(blockNum*sizeof(bool));
|
||||
|
||||
// initialization
|
||||
memcpy(h_list, cmem.h_list, blockNum*sizeof(uint));
|
||||
memcpy(h_listed, cmem.h_listed, blockNum*sizeof(bool));
|
||||
memcpy(h_listVol, cmem.h_listVol, blockNum*sizeof(bool));
|
||||
|
||||
CUT_SAFE_CALL( cudaMemcpy(cmem.d_list, cmem.h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
|
||||
CUT_SAFE_CALL( cudaMemcpy(cmem.d_listVol, cmem.h_listVol, blockNum*sizeof(bool), cudaMemcpyHostToDevice) );
|
||||
CUT_SAFE_CALL( cudaMemcpy(cmem.d_sol, cmem.h_sol, volSize*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
|
||||
CUT_SAFE_CALL( cudaMemcpy(cmem.t_sol, cmem.h_sol, volSize*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
|
||||
CUT_SAFE_CALL( cudaMemset(cmem.d_con, 1, volSize*sizeof(bool)) );
|
||||
|
||||
// set dimension of block and entire grid size
|
||||
dim3 dimBlock(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH);
|
||||
dim3 dimEntireGrid(blockNum);
|
||||
dim3 dimGrid(nActiveBlock);
|
||||
|
||||
int nTotalIter = 0;
|
||||
//uint sharedmemsize = sizeof(float)*BLOCK_LENGTH*BLOCK_LENGTH*(3*BLOCK_LENGTH + 2);
|
||||
|
||||
std::vector<int> sourceList;
|
||||
sourceList.push_back((zdim/2)*ydim*xdim + (ydim/2)*xdim + (xdim/2));
|
||||
|
||||
// initialize & start timer
|
||||
StopWatchInterface *timer_total, *timer_solver, *timer_reduction, *timer_list, *timer_list2, *timer_coarse;
|
||||
timer_total = timer_solver = timer_reduction = timer_list = timer_list2 = timer_coarse = NULL;
|
||||
|
||||
sdkCreateTimer(&timer_total);
|
||||
sdkCreateTimer(&timer_solver);
|
||||
sdkCreateTimer(&timer_reduction);
|
||||
sdkCreateTimer(&timer_list);
|
||||
sdkCreateTimer(&timer_list2);
|
||||
sdkCreateTimer(&timer_coarse);
|
||||
|
||||
sdkStartTimer(&timer_total);
|
||||
|
||||
uint nTotalBlockProcessed = 0;
|
||||
|
||||
int maxIter = 1000;
|
||||
|
||||
// start solver
|
||||
while(nActiveBlock > 0 && nTotalIter < maxIter)
|
||||
{
|
||||
//CUT_SAFE_CALL( cudaMemcpy(cmem.d_sol, cmem.h_sol, 64*nActiveBlock*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
|
||||
assert(nActiveBlock < 4294967295);
|
||||
|
||||
nTotalBlockProcessed += nActiveBlock;
|
||||
|
||||
nTotalIter++;
|
||||
|
||||
//
|
||||
// solve current blocks in the active lists
|
||||
//
|
||||
|
||||
// printf("# of active tiles : %u\n", nActiveBlock);
|
||||
if (verbose) {
|
||||
printf("# of active tiles : %u\n", nActiveBlock);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// 1. run solver on current active tiles
|
||||
|
||||
sdkStartTimer(&timer_solver);
|
||||
|
||||
dimGrid.y = (unsigned int)floor(((double)nActiveBlock-1)/65535)+1;
|
||||
dimGrid.x = (unsigned int)ceil ((double)nActiveBlock/(double)dimGrid.y);
|
||||
|
||||
if (verbose) {
|
||||
printf("Grid size : %d x %d\n", dimGrid.x, dimGrid.y);
|
||||
}
|
||||
|
||||
CUT_SAFE_CALL( cudaMemcpy(d_list, h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
|
||||
|
||||
run_solver<<< dimGrid, dimBlock >>>(d_spd, d_mask, d_sol, t_sol, d_con, d_list, xdim, ydim, zdim, nIter, nActiveBlock);
|
||||
|
||||
CUT_SAFE_CALL(cudaGetLastError());
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
sdkStopTimer(&timer_solver);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// 2. reduction (only active tiles)
|
||||
|
||||
sdkStartTimer(&timer_reduction);
|
||||
|
||||
run_reduction<<< dimGrid, dim3(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH/2) >>>(d_con, d_listVol, d_list, nActiveBlock);
|
||||
|
||||
CUT_SAFE_CALL(cudaGetLastError());
|
||||
//CUT_CHECK_ERROR("Kernel execution failed");
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
sdkStopTimer(&timer_reduction);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// 3. check neighbor tiles of converged tile
|
||||
// Add any active block of neighbor of converged block is inserted
|
||||
// to the list
|
||||
|
||||
// read-back active list volume
|
||||
#ifdef TIMER
|
||||
sdkStartTimer(&timer_list);
|
||||
#endif
|
||||
CUT_SAFE_CALL( cudaMemcpy(h_listVol, d_listVol, blockNum*sizeof(bool), cudaMemcpyDeviceToHost) );
|
||||
|
||||
uint nOldActiveBlock = nActiveBlock;
|
||||
uint nBlkX = xdim/BLOCK_LENGTH;
|
||||
uint nBlkY = ydim/BLOCK_LENGTH;
|
||||
|
||||
for(uint i=0; i<nOldActiveBlock; i++)
|
||||
{
|
||||
// check 6-neighbor of current active tile
|
||||
uint currBlkIdx = h_list[i];
|
||||
|
||||
if(!h_listVol[currBlkIdx]) // not active : converged
|
||||
{
|
||||
uint nb[6];
|
||||
nb[0] = (currBlkIdx < nBlkX*nBlkY) ? currBlkIdx : (currBlkIdx - nBlkX*nBlkY); //tp
|
||||
nb[1] = ((currBlkIdx + nBlkX*nBlkY) >= blockNum) ? currBlkIdx : (currBlkIdx + nBlkX*nBlkY); //bt
|
||||
nb[2] = (currBlkIdx < nBlkX) ? currBlkIdx : (currBlkIdx - nBlkX); //up
|
||||
nb[3] = ((currBlkIdx + nBlkX) >= blockNum) ? currBlkIdx : (currBlkIdx + nBlkX); //dn
|
||||
nb[4] = (currBlkIdx%nBlkX == 0) ? currBlkIdx : currBlkIdx-1; //lf
|
||||
nb[5] = ((currBlkIdx+1)%nBlkX == 0) ? currBlkIdx : currBlkIdx+1; //rt
|
||||
|
||||
for(int nbIdx = 0; nbIdx < 6; nbIdx++)
|
||||
{
|
||||
uint currIdx = nb[nbIdx];
|
||||
|
||||
// assert(currIdx < volSize);
|
||||
|
||||
if(!h_listed[currIdx])
|
||||
{
|
||||
h_listed[currIdx] = true;
|
||||
h_list[nActiveBlock++] = currIdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
cudaDeviceSynchronize();
|
||||
#ifdef TIMER
|
||||
sdkStopTimer(&timer_list);
|
||||
#endif
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// 4. run solver only once for neighbor blocks of converged block
|
||||
// current active list contains active blocks and neighbor blocks of
|
||||
// any converged blocks.
|
||||
//
|
||||
|
||||
#ifdef TIMER
|
||||
sdkStartTimer(&timer_solver);
|
||||
#endif
|
||||
|
||||
// update grid dimension because nActiveBlock is changed
|
||||
dimGrid.y = (unsigned int)floor(((double)nActiveBlock-1)/65535)+1;
|
||||
dimGrid.x = (unsigned int)ceil((double)nActiveBlock/(double)dimGrid.y);
|
||||
|
||||
if (verbose) {
|
||||
printf("Grid size : %d x %d\n", dimGrid.x, dimGrid.y);
|
||||
}
|
||||
|
||||
CUT_SAFE_CALL(cudaMemcpy(d_list, h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
|
||||
run_check_neighbor<<< dimGrid, dimBlock >>>(d_spd, d_mask, t_sol, d_sol, d_con, d_list, xdim, ydim, zdim, nOldActiveBlock, nActiveBlock);
|
||||
CUT_SAFE_CALL(cudaGetLastError());
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
#ifdef TIMER
|
||||
sdkStopTimer(&timer_solver);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// 5. reduction
|
||||
|
||||
sdkStartTimer(&timer_reduction);
|
||||
|
||||
run_reduction<<< dimGrid, dim3(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH/2) >>>(d_con, d_listVol, d_list, nActiveBlock);
|
||||
CUT_SAFE_CALL(cudaGetLastError());
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
sdkStopTimer(&timer_reduction);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// 6. update active list
|
||||
// read back active volume from the device and add
|
||||
// active block to active list on the host memory
|
||||
|
||||
|
||||
#ifdef TIMER
|
||||
sdkStartTimer(&timer_list2);
|
||||
#endif
|
||||
nActiveBlock = 0;
|
||||
CUT_SAFE_CALL( cudaMemcpy(h_listVol, d_listVol, blockNum*sizeof(bool), cudaMemcpyDeviceToHost) );
|
||||
|
||||
for(uint i=0; i<blockNum; i++)
|
||||
{
|
||||
if(h_listVol[i]) // true : active block (not converged)
|
||||
{
|
||||
h_listed[i] = true;
|
||||
h_list[nActiveBlock++] = i;
|
||||
//printf("Block %d added\n", i);
|
||||
}
|
||||
else h_listed[i] = false;
|
||||
}
|
||||
cudaDeviceSynchronize();
|
||||
#ifdef TIMER
|
||||
sdkStopTimer(&timer_list2);
|
||||
#endif
|
||||
|
||||
|
||||
if (verbose) {
|
||||
printf("Iteration : %d\n", nTotalIter);
|
||||
}
|
||||
}
|
||||
sdkStopTimer(&timer_total);
|
||||
|
||||
if (verbose) {
|
||||
printf("Eikonal solver converged after %d iterations\n", nTotalIter);
|
||||
printf("Total Running Time: %f (sec)\n", sdkGetTimerValue(&timer_total) / 1000);
|
||||
printf("Time for solver : %f (sec)\n", sdkGetTimerValue(&timer_solver) / 1000);
|
||||
printf("Time for reduction : %f (sec)\n", sdkGetTimerValue(&timer_reduction) / 1000);
|
||||
#ifdef TIMER
|
||||
printf("Time for list update-1 (CPU) : %f (sec)\n", sdkGetTimerValue(&timer_list) / 1000);
|
||||
printf("Time for list update-2 (CPU) : %f (sec)\n", sdkGetTimerValue(&timer_list2) / 1000);
|
||||
|
||||
#endif
|
||||
printf("Total # of blocks processed : %d\n", nTotalBlockProcessed);
|
||||
}
|
||||
|
||||
sdkDeleteTimer(&timer_total);
|
||||
sdkDeleteTimer(&timer_solver);
|
||||
sdkDeleteTimer(&timer_reduction);
|
||||
sdkDeleteTimer(&timer_list);
|
||||
|
||||
// delete dynamically allocated host memory
|
||||
free(h_list);
|
||||
free(h_listed);
|
||||
free(h_listVol);
|
||||
}
|
||||
20
eikonalGPU/src/cuda_fim.h
Normal file
20
eikonalGPU/src/cuda_fim.h
Normal file
@@ -0,0 +1,20 @@
|
||||
//
|
||||
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
|
||||
//
|
||||
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
|
||||
//
|
||||
// 2016. 2. 4
|
||||
//
|
||||
#ifndef __CUDA_FIM_H__
|
||||
#define __CUDA_FIM_H__
|
||||
|
||||
#include <cstdlib>
|
||||
#include "common_def.h"
|
||||
|
||||
#define TIMER
|
||||
|
||||
void CUT_SAFE_CALL(cudaError_t error);
|
||||
void CUDA_SAFE_CALL(cudaError_t error);
|
||||
void runEikonalSolverSimple(CUDAMEMSTRUCT &cmem, bool verbose);
|
||||
|
||||
#endif
|
||||
431
eikonalGPU/src/cuda_fim_kernel.cu
Normal file
431
eikonalGPU/src/cuda_fim_kernel.cu
Normal file
@@ -0,0 +1,431 @@
|
||||
//
|
||||
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
|
||||
//
|
||||
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
|
||||
//
|
||||
// 2016. 2. 4
|
||||
//
|
||||
|
||||
#include "cuda_fim_kernel.h"
|
||||
|
||||
__device__ DOUBLE get_time_eikonal(DOUBLE a, DOUBLE b, DOUBLE c, DOUBLE s)
|
||||
{
|
||||
DOUBLE ret, tmp;
|
||||
|
||||
// a > b > c
|
||||
if(a < b) { tmp = a; a = b; b = tmp; }
|
||||
if(b < c) { tmp = b; b = c; c = tmp; }
|
||||
if(a < b) { tmp = a; a = b; b = tmp; }
|
||||
|
||||
ret = INF;
|
||||
|
||||
if(c < INF)
|
||||
{
|
||||
ret = c + s;
|
||||
|
||||
if(ret > b)
|
||||
{
|
||||
tmp = ((b+c) + sqrtf(2.0f*s*s-(b-c)*(b-c)))*0.5f;
|
||||
|
||||
if(tmp > b) ret = tmp;
|
||||
|
||||
if(ret > a) {
|
||||
tmp = (a+b+c)/3.0f + sqrtf(2.0f*(a*(b-a)+b*(c-b)+c*(a-c))+3.0f*s*s)/3.0f;
|
||||
|
||||
if(tmp > a) ret = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__global__ void run_solver(double* spd, bool* mask, const DOUBLE *sol_in, DOUBLE *sol_out, bool *con, uint* list, int xdim, int ydim, int zdim, int nIter, uint nActiveBlock)
|
||||
{
|
||||
uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
|
||||
|
||||
if(list_idx < nActiveBlock)
|
||||
{
|
||||
// retrieve actual block index from the active list
|
||||
uint block_idx = list[list_idx];
|
||||
|
||||
double F;
|
||||
bool isValid;
|
||||
uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
|
||||
uint base_addr = block_idx*blocksize;
|
||||
|
||||
uint xgridlength = xdim/BLOCK_LENGTH;
|
||||
uint ygridlength = ydim/BLOCK_LENGTH;
|
||||
uint zgridlength = zdim/BLOCK_LENGTH;
|
||||
|
||||
// compute block index
|
||||
uint bx = block_idx%xgridlength;
|
||||
uint tmpIdx = (block_idx - bx)/xgridlength;
|
||||
uint by = tmpIdx%ygridlength;
|
||||
uint bz = (tmpIdx-by)/ygridlength;
|
||||
|
||||
uint tx = threadIdx.x;
|
||||
uint ty = threadIdx.y;
|
||||
uint tz = threadIdx.z;
|
||||
uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
|
||||
|
||||
__shared__ DOUBLE _sol[BLOCK_LENGTH+2][BLOCK_LENGTH+2][BLOCK_LENGTH+2];
|
||||
|
||||
// copy global to shared memory
|
||||
dim3 idx(tx+1,ty+1,tz+1);
|
||||
|
||||
SOL(idx.x,idx.y,idx.z) = sol_in[base_addr + tIdx];
|
||||
//F = _speed[(int)spd[base_addr + tIdx]];
|
||||
F = spd[base_addr + tIdx];
|
||||
if(F > 0) F = 1.0/F; // F = 1/f
|
||||
isValid = mask[base_addr + tIdx];
|
||||
|
||||
uint new_base_addr, new_tIdx;
|
||||
|
||||
// 1-neighborhood values
|
||||
if(tx == 0)
|
||||
{
|
||||
if(bx == 0) // end of the grid
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx + BLOCK_LENGTH-1;
|
||||
new_base_addr = (block_idx - 1)*blocksize;
|
||||
}
|
||||
|
||||
SOL(tx,idx.y,idx.z) = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(tx == BLOCK_LENGTH-1)
|
||||
{
|
||||
if(bx == xgridlength-1) // end of the grid
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx - (BLOCK_LENGTH-1);
|
||||
new_base_addr = (block_idx + 1)*blocksize;
|
||||
}
|
||||
SOL(tx+2,idx.y,idx.z) = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(ty == 0)
|
||||
{
|
||||
if(by == 0)
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH;
|
||||
new_base_addr = (block_idx - xgridlength)*blocksize;
|
||||
}
|
||||
|
||||
SOL(idx.x,ty,idx.z) = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(ty == BLOCK_LENGTH-1)
|
||||
{
|
||||
if(by == ygridlength-1)
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH;
|
||||
new_base_addr = (block_idx + xgridlength)*blocksize;
|
||||
}
|
||||
|
||||
SOL(idx.x,ty+2,idx.z) = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(tz == 0)
|
||||
{
|
||||
if(bz == 0)
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
|
||||
new_base_addr = (block_idx - xgridlength*ygridlength)*blocksize;
|
||||
}
|
||||
|
||||
SOL(idx.x,idx.y,tz) = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(tz == BLOCK_LENGTH-1)
|
||||
{
|
||||
if(bz == zgridlength-1)
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
|
||||
new_base_addr = (block_idx + xgridlength*ygridlength)*blocksize;
|
||||
}
|
||||
|
||||
SOL(idx.x,idx.y,tz+2) = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
DOUBLE a,b,c,oldT,newT;
|
||||
|
||||
for(int iter=0; iter<nIter; iter++)
|
||||
{
|
||||
//
|
||||
// compute new value
|
||||
//
|
||||
oldT = newT = SOL(idx.x,idx.y,idx.z);
|
||||
|
||||
if(isValid)
|
||||
{
|
||||
a = min(SOL(tx,idx.y,idx.z),SOL(tx+2,idx.y,idx.z));
|
||||
b = min(SOL(idx.x,ty,idx.z),SOL(idx.x,ty+2,idx.z));
|
||||
c = min(SOL(idx.x,idx.y,tz),SOL(idx.x,idx.y,tz+2));
|
||||
|
||||
DOUBLE tmp = (DOUBLE) get_time_eikonal(a, b, c, F);
|
||||
|
||||
newT = min(tmp,oldT);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if(isValid) SOL(idx.x,idx.y,idx.z) = newT;
|
||||
|
||||
__syncthreads(); // this may not required
|
||||
}
|
||||
|
||||
|
||||
DOUBLE residue = oldT - newT;
|
||||
|
||||
// write back to global memory
|
||||
con[base_addr + tIdx] = (residue < EPS) ? true : false;
|
||||
sol_out[base_addr + tIdx] = newT;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void run_reduction(bool *con, bool *listVol, uint *list, uint nActiveBlock)
|
||||
{
|
||||
uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
|
||||
|
||||
if(list_idx < nActiveBlock)
|
||||
{
|
||||
uint block_idx = list[list_idx];
|
||||
|
||||
__shared__ bool conv[BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH];
|
||||
|
||||
uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH/2;
|
||||
uint base_addr = block_idx*blocksize*2;
|
||||
uint tx = threadIdx.x;
|
||||
uint ty = threadIdx.y;
|
||||
uint tz = threadIdx.z;
|
||||
uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
|
||||
|
||||
conv[tIdx] = con[base_addr + tIdx];
|
||||
conv[tIdx + blocksize] = con[base_addr + tIdx + blocksize];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for(uint i=blocksize; i>0; i/=2)
|
||||
{
|
||||
if(tIdx < i)
|
||||
{
|
||||
bool b1, b2;
|
||||
b1 = conv[tIdx];
|
||||
b2 = conv[tIdx+i];
|
||||
conv[tIdx] = (b1 && b2) ? true : false ;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if(tIdx == 0)
|
||||
{
|
||||
listVol[block_idx] = !conv[0]; // active list is negation of tile convergence (active = not converged)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void run_check_neighbor(double* spd, bool* mask, const DOUBLE *sol_in, DOUBLE *sol_out,
|
||||
bool *con, uint* list, int xdim, int ydim, int zdim,
|
||||
uint nActiveBlock, uint nTotalBlock)
|
||||
{
|
||||
|
||||
uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
|
||||
|
||||
if(list_idx < nTotalBlock)
|
||||
{
|
||||
double F;
|
||||
bool isValid;
|
||||
__shared__ DOUBLE _sol[BLOCK_LENGTH+2][BLOCK_LENGTH+2][BLOCK_LENGTH+2];
|
||||
|
||||
uint block_idx = list[list_idx];
|
||||
uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
|
||||
uint base_addr = block_idx*blocksize;
|
||||
|
||||
uint tx = threadIdx.x;
|
||||
uint ty = threadIdx.y;
|
||||
uint tz = threadIdx.z;
|
||||
uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
|
||||
|
||||
if(list_idx < nActiveBlock) // copy value
|
||||
{
|
||||
sol_out[base_addr + tIdx] = sol_in[base_addr + tIdx];
|
||||
}
|
||||
else
|
||||
{
|
||||
uint xgridlength = xdim/BLOCK_LENGTH;
|
||||
uint ygridlength = ydim/BLOCK_LENGTH;
|
||||
uint zgridlength = zdim/BLOCK_LENGTH;
|
||||
|
||||
// compute block index
|
||||
uint bx = block_idx%xgridlength;
|
||||
uint tmpIdx = (block_idx - bx)/xgridlength;
|
||||
uint by = tmpIdx%ygridlength;
|
||||
uint bz = (tmpIdx-by)/ygridlength;
|
||||
|
||||
#ifdef __DEVICE_EMULATION__
|
||||
assert(block_idx == bz*xgridlength*ygridlength + by*xgridlength + bx);
|
||||
printf("Block %d's index : %d, %d, %d\n", block_idx, bx, by, bz);
|
||||
printf("Thread %d's index : %d, %d, %d\n", tIdx, tx, ty, tz);
|
||||
#endif
|
||||
|
||||
// copy global to shared memory
|
||||
dim3 idx(tx+1,ty+1,tz+1);
|
||||
_sol[idx.x][idx.y][idx.z] = sol_in[base_addr + tIdx];
|
||||
F = spd[base_addr + tIdx];
|
||||
if(F > 0) F = 1.0/F;
|
||||
isValid = mask[base_addr + tIdx];
|
||||
|
||||
uint new_base_addr, new_tIdx;
|
||||
|
||||
// 1-neighborhood values
|
||||
if(tx == 0)
|
||||
{
|
||||
if(bx == 0) // end of the grid
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx + BLOCK_LENGTH-1;
|
||||
new_base_addr = (block_idx - 1)*blocksize;
|
||||
}
|
||||
_sol[tx][idx.y][idx.z] = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(tx == BLOCK_LENGTH-1)
|
||||
{
|
||||
if(bx == xgridlength-1) // end of the grid
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx - (BLOCK_LENGTH-1);
|
||||
new_base_addr = (block_idx + 1)*blocksize;
|
||||
}
|
||||
_sol[tx+2][idx.y][idx.z] = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(ty == 0)
|
||||
{
|
||||
if(by == 0)
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH;
|
||||
new_base_addr = (block_idx - xgridlength)*blocksize;
|
||||
}
|
||||
_sol[idx.x][ty][idx.z] = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(ty == BLOCK_LENGTH-1)
|
||||
{
|
||||
if(by == ygridlength-1)
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH;
|
||||
new_base_addr = (block_idx + xgridlength)*blocksize;
|
||||
}
|
||||
_sol[idx.x][ty+2][idx.z] = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(tz == 0)
|
||||
{
|
||||
if(bz == 0)
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
|
||||
new_base_addr = (block_idx - xgridlength*ygridlength)*blocksize;
|
||||
}
|
||||
_sol[idx.x][idx.y][tz] = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
if(tz == BLOCK_LENGTH-1)
|
||||
{
|
||||
if(bz == zgridlength-1)
|
||||
{
|
||||
new_tIdx = tIdx;
|
||||
new_base_addr = base_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
|
||||
new_base_addr = (block_idx + xgridlength*ygridlength)*blocksize;
|
||||
}
|
||||
_sol[idx.x][idx.y][tz+2] = sol_in[new_base_addr + new_tIdx];
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
|
||||
DOUBLE a,b,c,oldT,newT;
|
||||
|
||||
//
|
||||
// compute new value
|
||||
//
|
||||
oldT = newT = _sol[idx.x][idx.y][idx.z];
|
||||
|
||||
if(isValid)
|
||||
{
|
||||
a = min(_sol[tx][idx.y][idx.z],_sol[tx+2][idx.y][idx.z]);
|
||||
b = min(_sol[idx.x][ty][idx.z],_sol[idx.x][ty+2][idx.z]);
|
||||
c = min(_sol[idx.x][idx.y][tz],_sol[idx.x][idx.y][tz+2]);
|
||||
|
||||
DOUBLE tmp = (DOUBLE) get_time_eikonal(a, b, c, F);
|
||||
newT = min(tmp,oldT);
|
||||
|
||||
sol_out[base_addr + tIdx] = newT;
|
||||
}
|
||||
// write back to global memory
|
||||
DOUBLE residue = oldT - newT;
|
||||
con[base_addr + tIdx] = (residue < EPS) ? true : false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
53
eikonalGPU/src/cuda_fim_kernel.h
Normal file
53
eikonalGPU/src/cuda_fim_kernel.h
Normal file
@@ -0,0 +1,53 @@
|
||||
//
|
||||
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
|
||||
//
|
||||
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
|
||||
//
|
||||
// 2016. 2. 4
|
||||
//
|
||||
|
||||
#ifndef _cuda_fim_KERNEL_H_
|
||||
#define _cuda_fim_KERNEL_H_
|
||||
|
||||
#include <cstdio>
|
||||
#include "common_def.h"
|
||||
|
||||
// check bank confilct only when device emulation mode
|
||||
#ifdef __DEVICE_EMULATION__
|
||||
#define CHECK_BANK_CONFLICTS
|
||||
#endif
|
||||
|
||||
#ifdef CHECK_BANK_CONFLICTS
|
||||
#define MEM(index) CUT_BANK_CHECKER(_mem, index)
|
||||
#define SOL(i,j,k) CUT_BANK_CHECKER(((float*)&_sol[0][0][0]), (k*(BLOCK_LENGTH+2)*(BLOCK_LENGTH+2) + j*(BLOCK_LENGTH+2) + i))
|
||||
#define SPD(i,j,k) CUT_BANK_CHECKER(((float*)&_spd[0][0][0]), (k*(BLOCK_LENGTH)*(BLOCK_LENGTH) + j*(BLOCK_LENGTH) + i))
|
||||
#else
|
||||
#define MEM(index) _mem[index]
|
||||
#define SOL(i,j,k) _sol[i][j][k]
|
||||
#define SPD(i,j,k) _spd[i][j][k]
|
||||
#endif
|
||||
|
||||
__device__ DOUBLE get_time_eikonal(DOUBLE a, DOUBLE b, DOUBLE c, DOUBLE s);
|
||||
//
|
||||
// F : Input speed (positive)
|
||||
// if F =< 0, skip that pixel (masking out)
|
||||
//
|
||||
__global__ void run_solver(double* spd, bool* mask, const DOUBLE *sol_in,
|
||||
DOUBLE *sol_out, bool *con, uint* list, int xdim, int ydim, int zdim,
|
||||
int nIter, uint nActiveBlock);
|
||||
//
|
||||
// run_reduction
|
||||
//
|
||||
// con is pixelwise convergence. Do reduction on active tiles and write tile-wise
|
||||
// convergence to listVol. The implementation assumes that the block size is 4x4x4.
|
||||
//
|
||||
__global__ void run_reduction(bool *con, bool *listVol, uint *list, uint nActiveBlock);
|
||||
//
|
||||
// if block is active block, copy values
|
||||
// if block is neighbor, run solver once
|
||||
//
|
||||
__global__ void run_check_neighbor(double* spd, bool* mask, const DOUBLE *sol_in, DOUBLE *sol_out,
|
||||
bool *con, uint* list, int xdim, int ydim, int zdim,
|
||||
uint nActiveBlock, uint nTotalBlock);
|
||||
#endif // #ifndef _cuda_fim_KERNEL_H_
|
||||
|
||||
183
eikonalGPU/src/eikonal.cpp
Normal file
183
eikonalGPU/src/eikonal.cpp
Normal file
@@ -0,0 +1,183 @@
|
||||
#include "eikonal.h"
|
||||
|
||||
#include <vector>
|
||||
#include <time.h>
|
||||
|
||||
#include "StructuredEikonal.h"
|
||||
|
||||
#define mwSize int
|
||||
|
||||
// #define timeMeasure 1
|
||||
// #define verbose 1
|
||||
|
||||
double* eikonal(double* volume,int* dims,double* startpoint) {
|
||||
|
||||
// for time measurement
|
||||
#ifdef timeMeasure
|
||||
clock_t begin, end;
|
||||
float z;
|
||||
#endif
|
||||
|
||||
// default parameters for number of threads and iters per block
|
||||
size_t itersPerBlock = 10;
|
||||
int nthreads=8;
|
||||
int gpuSelection;
|
||||
bool structuredEikonalVerbose = false;
|
||||
#ifdef verbose
|
||||
structuredEikonalVerbose = true;
|
||||
#endif
|
||||
|
||||
#ifdef timeMeasure
|
||||
begin = clock();
|
||||
#endif
|
||||
|
||||
|
||||
/* check if 3D and adapted dimension to size = 1 if 2D
|
||||
(mxGetDimensions reduces dim[2] to 0 if 2D map is given) */
|
||||
mwSize dimsAdapted[3] = {0,0,0};
|
||||
|
||||
|
||||
if(dims[0] == 0) {
|
||||
dimsAdapted[0] = 1;
|
||||
} else {
|
||||
dimsAdapted[0] = dims[0];
|
||||
}
|
||||
if(dims[1] == 0) {
|
||||
dimsAdapted[1] = 1;
|
||||
}else {
|
||||
dimsAdapted[1] = dims[1];
|
||||
}
|
||||
if(dims[2] == 0) {
|
||||
dimsAdapted[2] = 1;
|
||||
}else {
|
||||
dimsAdapted[2] = dims[2];
|
||||
}
|
||||
|
||||
|
||||
/* Create output data structures */
|
||||
double* out = new double[dimsAdapted[0]*dimsAdapted[1]*dimsAdapted[2]]{0};
|
||||
|
||||
|
||||
|
||||
#ifdef timeMeasure
|
||||
end = clock();
|
||||
z=end - begin;
|
||||
z/=CLOCKS_PER_SEC;
|
||||
printf("Time init: %f sec.\n", z);
|
||||
|
||||
begin = clock();
|
||||
#endif
|
||||
|
||||
|
||||
/* vector data structure for input volume */
|
||||
std::vector<std::vector<std::vector<double> > > input;
|
||||
input = std::vector<std::vector<std::vector<double> > >(
|
||||
dimsAdapted[0], std::vector<std::vector<double> >(
|
||||
dimsAdapted[1], std::vector<double>(dimsAdapted[2],1.)));
|
||||
|
||||
|
||||
int linInd;
|
||||
int linInd_k;
|
||||
int linInd_j;
|
||||
|
||||
/* sort input volume input vector data structure */
|
||||
#pragma omp parallel num_threads(nthreads)
|
||||
{
|
||||
#pragma omp for
|
||||
for(size_t k = 0; k < dimsAdapted[2]; k++) {
|
||||
linInd_k = k*dimsAdapted[0]*dimsAdapted[1];
|
||||
for(size_t j = 0; j < dimsAdapted[1]; j++) {
|
||||
linInd_j = linInd_k+j*dimsAdapted[0];
|
||||
for(size_t i = 0; i < dimsAdapted[0]; i++) {
|
||||
linInd = linInd_j + i;
|
||||
input[i][j][k] = volume[linInd];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef timeMeasure
|
||||
end = clock();
|
||||
z=end - begin;
|
||||
z/=CLOCKS_PER_SEC;
|
||||
printf("Time vector init and data copy: %f sec.\n", z);
|
||||
|
||||
begin = clock();
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* init the Eikonal solver */
|
||||
// create object
|
||||
StructuredEikonal data(structuredEikonalVerbose);
|
||||
|
||||
// set the dimensions of the input volume
|
||||
data.setDims(dimsAdapted[0],dimsAdapted[1],dimsAdapted[2]);
|
||||
|
||||
// setting the number of iterations per block (default = 10)
|
||||
data.setItersPerBlock(itersPerBlock);
|
||||
|
||||
// setting the start point for the Eikonal propagation
|
||||
data.setSeeds({ { { { (size_t) startpoint[0], (size_t) startpoint[1], (size_t) startpoint[2] } } } });
|
||||
|
||||
// setting the input data (aka speeds) to the Eikonal solver
|
||||
data.setSpeeds(input);
|
||||
|
||||
|
||||
#ifdef timeMeasure
|
||||
end = clock();
|
||||
z=end - begin;
|
||||
z/=CLOCKS_PER_SEC;
|
||||
printf("Time Eikonal init: %f sec.\n", z);
|
||||
|
||||
begin = clock();
|
||||
#endif
|
||||
|
||||
|
||||
/* Solve Eikonal and get the results */
|
||||
data.solveEikonal();
|
||||
|
||||
|
||||
#ifdef timeMeasure
|
||||
end = clock();
|
||||
z=end - begin;
|
||||
z/=CLOCKS_PER_SEC;
|
||||
printf("Time Eikonal solve: %f sec.\n", z);
|
||||
|
||||
begin = clock();
|
||||
#endif
|
||||
|
||||
|
||||
/* sort vector into output array. */
|
||||
#pragma omp parallel num_threads(nthreads)
|
||||
{
|
||||
#pragma omp for
|
||||
for(size_t k = 0; k < dimsAdapted[2]; k++) {
|
||||
linInd_k = k*dimsAdapted[0]*dimsAdapted[1];
|
||||
for(size_t j = 0; j < dimsAdapted[1]; j++) {
|
||||
linInd_j = linInd_k+j*dimsAdapted[0];
|
||||
for(size_t i = 0; i < dimsAdapted[0]; i++) {
|
||||
linInd = linInd_j + i;
|
||||
out[linInd] = data.answer_[i][j][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef timeMeasure
|
||||
end = clock();
|
||||
z=end - begin;
|
||||
z/=CLOCKS_PER_SEC;
|
||||
printf("Time output conversion: %f sec.\n", z);
|
||||
#endif
|
||||
|
||||
/* and return to MATLAB */
|
||||
return out;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
1
eikonalGPU/src/eikonal.h
Normal file
1
eikonalGPU/src/eikonal.h
Normal file
@@ -0,0 +1 @@
|
||||
extern double* eikonal(double* volume,int* dims,double* startpoint);
|
||||
303
eikonalGPU/src/helper_timer.cpp
Normal file
303
eikonalGPU/src/helper_timer.cpp
Normal file
@@ -0,0 +1,303 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
//
|
||||
// Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
// with this source code for terms and conditions that govern your use of
|
||||
// this software. Any use, reproduction, disclosure, or distribution of
|
||||
// this software and related documentation outside the terms of the EULA
|
||||
// is strictly prohibited.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// includes, project
|
||||
#include "helper_timer.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Begin Stopwatch timer class definitions for all OS platforms //
|
||||
//////////////////////////////////////////////////////////////////
|
||||
#ifdef WIN32
|
||||
StopWatchWin::StopWatchWin() :
|
||||
start_time(), end_time(),
|
||||
diff_time(0.0f), total_time(0.0f),
|
||||
running(false), clock_sessions(0), freq(0), freq_set(false)
|
||||
{
|
||||
if (!freq_set) {
|
||||
// helper variable
|
||||
LARGE_INTEGER temp;
|
||||
|
||||
// get the tick frequency from the OS
|
||||
QueryPerformanceFrequency((LARGE_INTEGER*)&temp);
|
||||
|
||||
// convert to type in which it is needed
|
||||
freq = ((double)temp.QuadPart) / 1000.0;
|
||||
|
||||
// rememeber query
|
||||
freq_set = true;
|
||||
}
|
||||
}
|
||||
|
||||
StopWatchWin::~StopWatchWin() { }
|
||||
// functions, d
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Start time measurement
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
StopWatchWin::start()
|
||||
{
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&start_time);
|
||||
running = true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Stop time measurement and increment add to the current diff_time summation
|
||||
//! variable. Also increment the number of times this clock has been run.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
StopWatchWin::stop()
|
||||
{
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&end_time);
|
||||
diff_time = (float)
|
||||
(((double)end_time.QuadPart - (double)start_time.QuadPart) / freq);
|
||||
|
||||
total_time += diff_time;
|
||||
clock_sessions++;
|
||||
running = false;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Reset the timer to 0. Does not change the timer running state but does
|
||||
//! recapture this point in time as the current start time if it is running.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
StopWatchWin::reset()
|
||||
{
|
||||
diff_time = 0;
|
||||
total_time = 0;
|
||||
clock_sessions = 0;
|
||||
if (running)
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&start_time);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned added to the
|
||||
//! current diff_time sum, otherwise the current summed time difference alone
|
||||
//! is returned.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float
|
||||
StopWatchWin::getTime()
|
||||
{
|
||||
// Return the TOTAL time to date
|
||||
float retval = total_time;
|
||||
if (running)
|
||||
{
|
||||
LARGE_INTEGER temp;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&temp);
|
||||
retval += (float)
|
||||
(((double)(temp.QuadPart - start_time.QuadPart)) / freq);
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Time in msec. for a single run based on the total number of COMPLETED runs
|
||||
//! and the total time.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float
|
||||
StopWatchWin::getAverageTime()
|
||||
{
|
||||
return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f;
|
||||
}
|
||||
#else
|
||||
StopWatchLinux::StopWatchLinux() :
|
||||
start_time(), diff_time(0.0), total_time(0.0),
|
||||
running(false), clock_sessions(0)
|
||||
{ }
|
||||
|
||||
// Destructor
|
||||
StopWatchLinux::~StopWatchLinux() { }
|
||||
|
||||
// functions, d
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Start time measurement
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
StopWatchLinux::start() {
|
||||
gettimeofday(&start_time, 0);
|
||||
running = true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Stop time measurement and increment add to the current diff_time summation
|
||||
//! variable. Also increment the number of times this clock has been run.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
StopWatchLinux::stop() {
|
||||
diff_time = getDiffTime();
|
||||
total_time += diff_time;
|
||||
running = false;
|
||||
clock_sessions++;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Reset the timer to 0. Does not change the timer running state but does
|
||||
//! recapture this point in time as the current start time if it is running.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
StopWatchLinux::reset()
|
||||
{
|
||||
diff_time = 0;
|
||||
total_time = 0;
|
||||
clock_sessions = 0;
|
||||
if (running)
|
||||
gettimeofday(&start_time, 0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned added to the
|
||||
//! current diff_time sum, otherwise the current summed time difference alone
|
||||
//! is returned.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float
|
||||
StopWatchLinux::getTime()
|
||||
{
|
||||
// Return the TOTAL time to date
|
||||
float retval = total_time;
|
||||
if (running) {
|
||||
retval += getDiffTime();
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Time in msec. for a single run based on the total number of COMPLETED runs
|
||||
//! and the total time.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float
|
||||
StopWatchLinux::getAverageTime()
|
||||
{
|
||||
return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float
|
||||
StopWatchLinux::getDiffTime()
|
||||
{
|
||||
struct timeval t_time;
|
||||
gettimeofday(&t_time, 0);
|
||||
|
||||
// time difference in milli-seconds
|
||||
return (float)(1000.0 * (t_time.tv_sec - start_time.tv_sec)
|
||||
+ (0.001 * (t_time.tv_usec - start_time.tv_usec)));
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Timer functionality exported
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Create a new timer
|
||||
//! @return true if a time has been created, otherwise false
|
||||
//! @param name of the new timer, 0 if the creation failed
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool
|
||||
sdkCreateTimer(StopWatchInterface **timer_interface)
|
||||
{
|
||||
//printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
|
||||
#ifdef _WIN32
|
||||
*timer_interface = (StopWatchInterface *)new StopWatchWin();
|
||||
#else
|
||||
*timer_interface = (StopWatchInterface *)new StopWatchLinux();
|
||||
#endif
|
||||
return (*timer_interface != NULL) ? true : false;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Delete a timer
|
||||
//! @return true if a time has been deleted, otherwise false
|
||||
//! @param name of the timer to delete
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool
|
||||
sdkDeleteTimer(StopWatchInterface **timer_interface)
|
||||
{
|
||||
//printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) delete *timer_interface;
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Start the time with name \a name
|
||||
//! @param name name of the timer to start
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool
|
||||
sdkStartTimer(StopWatchInterface **timer_interface)
|
||||
{
|
||||
//printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) (*timer_interface)->start();
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Stop the time with name \a name. Does not reset.
|
||||
//! @param name name of the timer to stop
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool
|
||||
sdkStopTimer(StopWatchInterface **timer_interface)
|
||||
{
|
||||
// printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) (*timer_interface)->stop();
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Resets the timer's counter.
|
||||
//! @param name name of the timer to reset.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool
|
||||
sdkResetTimer(StopWatchInterface **timer_interface)
|
||||
{
|
||||
// printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface) (*timer_interface)->reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Return the average time for timer execution as the total time
|
||||
//! for the timer dividied by the number of completed (stopped) runs the timer
|
||||
//! has made.
|
||||
//! Excludes the current running time if the timer is currently running.
|
||||
//! @param name name of the timer to return the time of
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float
|
||||
sdkGetAverageTimerValue(StopWatchInterface **timer_interface)
|
||||
{
|
||||
// printf("sdkGetAverageTimerValue called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface)
|
||||
return (*timer_interface)->getAverageTime();
|
||||
else
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Total execution time for the timer over all runs since the last reset
|
||||
//! or timer creation.
|
||||
//! @param name name of the timer to obtain the value of.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float
|
||||
sdkGetTimerValue(StopWatchInterface **timer_interface)
|
||||
{
|
||||
// printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
|
||||
if (*timer_interface)
|
||||
return (*timer_interface)->getTime();
|
||||
else
|
||||
return 0.0f;
|
||||
}
|
||||
228
eikonalGPU/src/helper_timer.h
Normal file
228
eikonalGPU/src/helper_timer.h
Normal file
@@ -0,0 +1,228 @@
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
//
|
||||
// Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
// with this source code for terms and conditions that govern your use of
|
||||
// this software. Any use, reproduction, disclosure, or distribution of
|
||||
// this software and related documentation outside the terms of the EULA
|
||||
// is strictly prohibited.
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Helper Timer Functions (this is the inlined version)
|
||||
|
||||
#ifndef HELPER_TIMER_H
|
||||
#define HELPER_TIMER_H
|
||||
|
||||
// includes, system
|
||||
#include <vector>
|
||||
|
||||
// includes, project
|
||||
#include "my_exception.h"
|
||||
|
||||
// Definition of the StopWatch Interface, this is used if we don't want to use the CUT functions
|
||||
// But rather in a self contained class interface
|
||||
class StopWatchInterface
|
||||
{
|
||||
public:
|
||||
StopWatchInterface() {};
|
||||
virtual ~StopWatchInterface() {};
|
||||
|
||||
public:
|
||||
//! Start time measurement
|
||||
virtual void start() = 0;
|
||||
|
||||
//! Stop time measurement
|
||||
virtual void stop() = 0;
|
||||
|
||||
//! Reset time counters to zero
|
||||
virtual void reset() = 0;
|
||||
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned, otherwise the
|
||||
//! time between the last start() and stop call is returned
|
||||
virtual float getTime() = 0;
|
||||
|
||||
//! Mean time to date based on the number of times the stopwatch has been
|
||||
//! _stopped_ (ie finished sessions) and the current total time
|
||||
virtual float getAverageTime() = 0;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Begin Stopwatch timer class definitions for all OS platforms //
|
||||
//////////////////////////////////////////////////////////////////
|
||||
#ifdef WIN32
|
||||
// includes, system
|
||||
#define WINDOWS_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#undef min
|
||||
#undef max
|
||||
|
||||
//! Windows specific implementation of StopWatch
|
||||
class StopWatchWin : public StopWatchInterface
|
||||
{
|
||||
public:
|
||||
//! Constructor, default
|
||||
StopWatchWin();
|
||||
|
||||
// Destructor
|
||||
~StopWatchWin();
|
||||
|
||||
public:
|
||||
//! Start time measurement
|
||||
void start();
|
||||
|
||||
//! Stop time measurement
|
||||
void stop();
|
||||
|
||||
//! Reset time counters to zero
|
||||
void reset();
|
||||
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned, otherwise the
|
||||
//! time between the last start() and stop call is returned
|
||||
float getTime();
|
||||
|
||||
//! Mean time to date based on the number of times the stopwatch has been
|
||||
//! _stopped_ (ie finished sessions) and the current total time
|
||||
float getAverageTime();
|
||||
|
||||
private:
|
||||
// member variables
|
||||
|
||||
//! Start of measurement
|
||||
LARGE_INTEGER start_time;
|
||||
//! End of measurement
|
||||
LARGE_INTEGER end_time;
|
||||
|
||||
//! Time difference between the last start and stop
|
||||
float diff_time;
|
||||
|
||||
//! TOTAL time difference between starts and stops
|
||||
float total_time;
|
||||
|
||||
//! flag if the stop watch is running
|
||||
bool running;
|
||||
|
||||
//! Number of times clock has been started
|
||||
//! and stopped to allow averaging
|
||||
int clock_sessions;
|
||||
|
||||
//! tick frequency
|
||||
double freq;
|
||||
|
||||
//! flag if the frequency has been set
|
||||
bool freq_set;
|
||||
};
|
||||
#else
|
||||
// Declarations for Stopwatch on Linux and Mac OSX
|
||||
// includes, system
|
||||
#include <ctime>
|
||||
#include <sys/time.h>
|
||||
|
||||
//! Windows specific implementation of StopWatch
|
||||
class StopWatchLinux : public StopWatchInterface
|
||||
{
|
||||
public:
|
||||
//! Constructor, default
|
||||
StopWatchLinux();
|
||||
|
||||
// Destructor
|
||||
virtual ~StopWatchLinux();
|
||||
public:
|
||||
//! Start time measurement
|
||||
void start();
|
||||
|
||||
//! Stop time measurement
|
||||
void stop();
|
||||
|
||||
//! Reset time counters to zero
|
||||
void reset();
|
||||
|
||||
//! Time in msec. after start. If the stop watch is still running (i.e. there
|
||||
//! was no call to stop()) then the elapsed time is returned, otherwise the
|
||||
//! time between the last start() and stop call is returned
|
||||
float getTime();
|
||||
|
||||
//! Mean time to date based on the number of times the stopwatch has been
|
||||
//! _stopped_ (ie finished sessions) and the current total time
|
||||
float getAverageTime();
|
||||
|
||||
private:
|
||||
|
||||
// helper functions
|
||||
|
||||
//! Get difference between start time and current time
|
||||
float getDiffTime();
|
||||
|
||||
private:
|
||||
|
||||
// member variables
|
||||
|
||||
//! Start of measurement
|
||||
struct timeval start_time;
|
||||
|
||||
//! Time difference between the last start and stop
|
||||
float diff_time;
|
||||
|
||||
//! TOTAL time difference between starts and stops
|
||||
float total_time;
|
||||
|
||||
//! flag if the stop watch is running
|
||||
bool running;
|
||||
|
||||
//! Number of times clock has been started
|
||||
//! and stopped to allow averaging
|
||||
int clock_sessions;
|
||||
};
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Timer functionality exported
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Create a new timer
|
||||
//! @return true if a time has been created, otherwise false
|
||||
//! @param name of the new timer, 0 if the creation failed
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool sdkCreateTimer(StopWatchInterface **timer_interface);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Delete a timer
|
||||
//! @return true if a time has been deleted, otherwise false
|
||||
//! @param name of the timer to delete
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool sdkDeleteTimer(StopWatchInterface **timer_interface);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Start the time with name \a name
|
||||
//! @param name name of the timer to start
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool sdkStartTimer(StopWatchInterface **timer_interface);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Stop the time with name \a name. Does not reset.
|
||||
//! @param name name of the timer to stop
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool sdkStopTimer(StopWatchInterface **timer_interface);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Resets the timer's counter.
|
||||
//! @param name name of the timer to reset.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool sdkResetTimer(StopWatchInterface **timer_interface);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Return the average time for timer execution as the total time
|
||||
//! for the timer dividied by the number of completed (stopped) runs the timer
|
||||
//! has made.
|
||||
//! Excludes the current running time if the timer is currently running.
|
||||
//! @param name name of the timer to return the time of
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float sdkGetAverageTimerValue(StopWatchInterface **timer_interface);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Total execution time for the timer over all runs since the last reset
|
||||
//! or timer creation.
|
||||
//! @param name name of the timer to obtain the value of.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float sdkGetTimerValue(StopWatchInterface **timer_interface);
|
||||
|
||||
#endif // HELPER_TIMER_H
|
||||
151
eikonalGPU/src/my_exception.h
Normal file
151
eikonalGPU/src/my_exception.h
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||
* with this source code for terms and conditions that govern your use of
|
||||
* this software. Any use, reproduction, disclosure, or distribution of
|
||||
* this software and related documentation outside the terms of the EULA
|
||||
* is strictly prohibited.
|
||||
*
|
||||
*/
|
||||
|
||||
/* CUda UTility Library */
|
||||
#ifndef _MY_EXCEPTION_H_
|
||||
#define _MY_EXCEPTION_H_
|
||||
|
||||
// includes, system
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
|
||||
//! Exception wrapper.
|
||||
//! @param Std_Exception Exception out of namespace std for easy typing.
|
||||
template<class Std_Exception>
|
||||
class Exception : public Std_Exception
|
||||
{
|
||||
public:
|
||||
|
||||
//! @brief Static construction interface
|
||||
//! @return Alwayss throws ( Located_Exception<Exception>)
|
||||
//! @param file file in which the Exception occurs
|
||||
//! @param line line in which the Exception occurs
|
||||
//! @param detailed details on the code fragment causing the Exception
|
||||
static void throw_it( const char* file,
|
||||
const int line,
|
||||
const char* detailed = "-" );
|
||||
|
||||
//! Static construction interface
|
||||
//! @return Alwayss throws ( Located_Exception<Exception>)
|
||||
//! @param file file in which the Exception occurs
|
||||
//! @param line line in which the Exception occurs
|
||||
//! @param detailed details on the code fragment causing the Exception
|
||||
static void throw_it( const char* file,
|
||||
const int line,
|
||||
const std::string& detailed);
|
||||
|
||||
//! Destructor
|
||||
virtual ~Exception() throw();
|
||||
|
||||
private:
|
||||
|
||||
//! Constructor, default (private)
|
||||
Exception();
|
||||
|
||||
//! Constructor, standard
|
||||
//! @param str string returned by what()
|
||||
Exception( const std::string& str);
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Exception handler function for arbitrary exceptions
|
||||
//! @param ex exception to handle
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Exception_Typ>
|
||||
inline void
|
||||
handleException( const Exception_Typ& ex)
|
||||
{
|
||||
std::cerr << ex.what() << std::endl;
|
||||
|
||||
exit( EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//! Convenience macros
|
||||
|
||||
//! Exception caused by dynamic program behavior, e.g. file does not exist
|
||||
#define RUNTIME_EXCEPTION( msg) \
|
||||
Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
//! Logic exception in program, e.g. an assert failed
|
||||
#define LOGIC_EXCEPTION( msg) \
|
||||
Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
//! Out of range exception
|
||||
#define RANGE_EXCEPTION( msg) \
|
||||
Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Implementation
|
||||
|
||||
// includes, system
|
||||
#include <sstream>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Static construction interface.
|
||||
//! @param Exception causing code fragment (file and line) and detailed infos.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ template<class Std_Exception>
|
||||
void
|
||||
Exception<Std_Exception>::
|
||||
throw_it( const char* file, const int line, const char* detailed)
|
||||
{
|
||||
std::stringstream s;
|
||||
|
||||
// Quiet heavy-weight but exceptions are not for
|
||||
// performance / release versions
|
||||
s << "Exception in file '" << file << "' in line " << line << "\n"
|
||||
<< "Detailed description: " << detailed << "\n";
|
||||
|
||||
throw Exception( s.str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Static construction interface.
|
||||
//! @param Exception causing code fragment (file and line) and detailed infos.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*static*/ template<class Std_Exception>
|
||||
void
|
||||
Exception<Std_Exception>::
|
||||
throw_it( const char* file, const int line, const std::string& msg)
|
||||
{
|
||||
throw_it( file, line, msg.c_str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, default (private).
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::Exception() :
|
||||
Exception("Unknown Exception.\n")
|
||||
{ }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Constructor, standard (private).
|
||||
//! String returned by what().
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::Exception( const std::string& s) :
|
||||
Std_Exception( s)
|
||||
{ }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Destructor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Std_Exception>
|
||||
Exception<Std_Exception>::~Exception() throw() { }
|
||||
|
||||
// functions, exported
|
||||
|
||||
#endif // #ifndef _EXCEPTION_H_
|
||||
|
||||
Reference in New Issue
Block a user