Commit source

This commit is contained in:
kradchen
2023-05-18 16:04:27 +08:00
parent 88cf81e4ea
commit c6cd188732
83 changed files with 39921 additions and 0 deletions

View File

@@ -0,0 +1,424 @@
#include "StructuredEikonal.h"
#include "omp.h"
StructuredEikonal::StructuredEikonal(bool verbose)
:verbose_(verbose), isCudaMemCreated_(false),
width_(256), height_(256), depth_(256),
itersPerBlock_(10), solverType_(0) {
cudaGetDevice(&gpuDevice_);
}
StructuredEikonal::~StructuredEikonal() {}
void StructuredEikonal::writeNRRD(std::string filename) {
std::fstream out(filename.c_str(), std::ios::out | std::ios::binary);
out << "NRRD0001\n";
out << "# Complete NRRD file format specification at:\n";
out << "# http://teem.sourceforge.net/nrrd/format.html\n";
out << "type: double\n";
out << "dimension: 3\n";
out << "sizes: " << this->width_ << " " << this->height_ << " " << this->depth_ << "\n";
out << "endian: little\n";
out << "encoding: raw\n\n";
for(size_t k = 0; k < this->depth_; k++) {
for(size_t j = 0; j < this->height_; j++) {
for(size_t i = 0; i < this->width_; i++) {
double d = this->answer_[i][j][k];
out.write(reinterpret_cast<const char*>(&d),sizeof(double));
}
}
}
out.close();
}
void StructuredEikonal::setDims(size_t x, size_t y, size_t z) {
this->width_ = x;
this->height_ = y;
this->depth_ = z;
}
void StructuredEikonal::error(char* msg) {
printf("%s\n",msg);
assert(false);
exit(0);
}
void StructuredEikonal::CheckCUDAMemory() {
size_t freeMem, totalMem;
cudaMemGetInfo(&freeMem, &totalMem);
if (this->verbose_) {
std::cout << "Total Memory : " << totalMem / (1024 * 1024) << "MB" << std::endl;
std::cout << "Free Memory : " << freeMem / (1024 * 1024) << "MB" << std::endl;
std::cout << "--" << std::endl;
}
}
void StructuredEikonal::init_cuda_mem() {
assert(this->width_ > 0 && this->height_ > 0 && this->depth_ > 0);
if(this->width_ <= 0 || this->height_ <= 0 || this->depth_ <= 0){
printf("Volume dimension cannot be zero");
exit(1);
}
this->CheckCUDAMemory();
// 1. Create /initialize GPU memory
size_t nx, ny, nz;
nx = this->width_ + (BLOCK_LENGTH-this->width_%BLOCK_LENGTH)%BLOCK_LENGTH;
ny = this->height_ + (BLOCK_LENGTH-this->height_%BLOCK_LENGTH)%BLOCK_LENGTH;
nz = this->depth_ + (BLOCK_LENGTH-this->depth_%BLOCK_LENGTH)%BLOCK_LENGTH;
if (this->verbose_) {
printf("%d %d %d \n",nx,ny,nz);
printf("BLOCK_LENGTH = %d\n",BLOCK_LENGTH);
printf("width/height/depth = %d %d %d \n",this->width_,this->height_,this->depth_);
}
auto volSize = nx*ny*nz;
auto blkSize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
auto nBlkX = nx / BLOCK_LENGTH;
auto nBlkY = ny / BLOCK_LENGTH;
auto nBlkZ = nz / BLOCK_LENGTH;
auto blockNum = nBlkX*nBlkY*nBlkZ;
this->memoryStruct_.xdim = static_cast<int>(nx);
this->memoryStruct_.ydim = static_cast<int>(ny);
this->memoryStruct_.zdim = static_cast<int>(nz);
this->memoryStruct_.volsize = static_cast<uint>(volSize);
this->memoryStruct_.blksize = static_cast<uint>(blkSize);
this->memoryStruct_.blklength = BLOCK_LENGTH;
this->memoryStruct_.blknum = static_cast<uint>(blockNum);
this->memoryStruct_.nIter = static_cast<int>(this->itersPerBlock_); // iter per block
if(this->isCudaMemCreated_) // delete previous memory
{
free((DOUBLE*)this->memoryStruct_.h_sol);
free((uint*)this->memoryStruct_.h_list);
free((bool*)this->memoryStruct_.h_listed);
free((bool*)this->memoryStruct_.h_listVol);
free((int*)this->memoryStruct_.blockOrder);
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_spd) );
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_sol) );
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.t_sol) ); // temp solution for ping-pong
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_con) ); // convergence volume
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_list) );
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_listVol) );
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_mask) );
}
this->isCudaMemCreated_ = true;
this->memoryStruct_.h_sol = (DOUBLE*) malloc(volSize*sizeof(DOUBLE)); // initial solution
this->memoryStruct_.h_list = (uint*) malloc(blockNum*sizeof(uint)); // linear list contains active block indices
this->memoryStruct_.h_listed = (bool*) malloc(blockNum*sizeof(bool)); // whether block is added to the list
this->memoryStruct_.h_listVol = (bool*) malloc(blockNum*sizeof(bool)); // volume list shows active/nonactive of corresponding block
this->memoryStruct_.blockOrder = (int*) malloc(blockNum*sizeof(int));
this->CheckCUDAMemory();
//
// create host/device memory using CUDA mem functions
//
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_spd), volSize*sizeof(double)) );
this->CheckCUDAMemory();
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_sol), volSize*sizeof(DOUBLE)) );
this->CheckCUDAMemory();
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.t_sol), volSize*sizeof(DOUBLE)) ); // temp solution for ping-pong
this->CheckCUDAMemory();
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_con), volSize*sizeof(bool)) ); // convergence volume
this->CheckCUDAMemory();
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_list), blockNum*sizeof(uint)) );
this->CheckCUDAMemory();
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_listVol), blockNum*sizeof(bool)) );
this->CheckCUDAMemory();
CUDA_SAFE_CALL( cudaMalloc((void**)&(this->memoryStruct_.d_mask), volSize*sizeof(bool)) );
this->CheckCUDAMemory();
}
void StructuredEikonal::freeMemory() {
free((DOUBLE*)this->memoryStruct_.h_sol);
free((uint*)this->memoryStruct_.h_list);
free((bool*)this->memoryStruct_.h_listed);
free((bool*)this->memoryStruct_.h_listVol);
free((int*)this->memoryStruct_.blockOrder);
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_spd) );
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_sol) );
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.t_sol) ); // temp solution for ping-pong
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_con) ); // convergence volume
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_list) );
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_listVol) );
CUDA_SAFE_CALL( cudaFree(this->memoryStruct_.d_mask) );
}
void StructuredEikonal::set_attribute_mask() {
uint volSize = this->memoryStruct_.volsize;
int nx, ny, nz, blklength;
nx = memoryStruct_.xdim;
ny = memoryStruct_.ydim;
nz = memoryStruct_.zdim;
blklength = memoryStruct_.blklength;
// create host memory
double *h_spd = new double[volSize]; // byte speed, host
bool *h_mask = new bool[volSize];
// copy input volume to host memory
// make each block to be stored contiguously in 1D memory space
uint idx = 0;
for(int zStr = 0; zStr < nz; zStr += blklength) {
for(int yStr = 0; yStr < ny; yStr += blklength) {
for(int xStr = 0; xStr < nx; xStr += blklength) {
// for each block
for(int z=zStr; z<zStr+blklength; z++) {
for(int y=yStr; y<yStr+blklength; y++) {
for(int x=xStr; x<xStr+blklength; x++) {
if(x<width_ && y<height_ && z<depth_) {
h_spd[idx] = this->speeds_[x][y][z];
h_mask[idx] = true;
} else {
h_spd[idx] = speeds_[0][0][0];
h_mask[idx] = false;
}
idx++;
}
}
}
}
}
}
// initialize GPU memory with host memory
CUDA_SAFE_CALL( cudaMemcpy(memoryStruct_.d_spd, h_spd, volSize*sizeof(double), cudaMemcpyHostToDevice) );
CUDA_SAFE_CALL( cudaMemcpy(memoryStruct_.d_mask, h_mask, volSize*sizeof(bool), cudaMemcpyHostToDevice) );
delete[] h_spd;
delete[] h_mask;
}
void StructuredEikonal::initialization() {
// get / set CUDA device
int deviceCount;
cudaGetDeviceCount(&deviceCount);
int device;
for(device = 0; device < deviceCount; ++device) {
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, device);
}
device = this->gpuDevice_;
cudaSetDevice(device);
this->CheckCUDAMemory();
this->init_cuda_mem();
this->set_attribute_mask();
this->CheckCUDAMemory();
}
void StructuredEikonal::map_generator() {
double pi = 3.141592653589793238462643383;
this->speeds_ = std::vector<std::vector<std::vector<double> > >(
this->width_, std::vector<std::vector<double> >(
this->height_, std::vector<double>(this->depth_,1.)));
switch(this->solverType_){
case 0 :
//Constant Speed Map
break;
case 1 :
//Sinusoid Speed Map
for (int k = 0 ; k < this->depth_ ; ++k) {
for (int j = 0 ; j < this->height_; ++j) {
for ( int i = 0 ; i < this->width_ ; ++i) {
this->speeds_[i][j][k] =
(6 + 5*(sin((i*pi)/this->width_ *2))*
sin((j*pi)/this->height_*2)*
sin((k*pi)/this->depth_*2));
}
}
}
break;
}
}
void StructuredEikonal::setGpuDevice(int device){
this->gpuDevice_=device;
}
void StructuredEikonal::setSeeds(std::vector<std::array<size_t, 3> > seeds) {
this->seeds_ = seeds;
}
void StructuredEikonal::setSpeeds(std::vector<std::vector<std::vector<double> > > speeds) {
this->speeds_ = speeds;
}
void StructuredEikonal::useSeeds() {
if (this->verbose_) {
std::cout << "Loading seed volume..." << std::endl;
}
uint volSize, blockNum;
int nx, ny, nz, blklength;
nx = this->memoryStruct_.xdim;
ny = this->memoryStruct_.ydim;
nz = this->memoryStruct_.zdim;
volSize = this->memoryStruct_.volsize;
blklength = this->memoryStruct_.blklength;
blockNum = this->memoryStruct_.blknum;
// copy input volume to host memory
// make each block to be stored contiguously in 1D memory space
uint idx = 0;
uint blk_idx = 0;
uint list_idx = 0;
uint nActiveBlock = 0;
//#pragma omp parallel num_threads(8)
//{
//#pragma omp for
//printf("vol size = %i\n",volSize);
for(int zStr = 0; zStr < nz; zStr += blklength) {
for(int yStr = 0; yStr < ny; yStr += blklength) {
for(int xStr = 0; xStr < nx; xStr += blklength) {
// for each block
bool isSeedBlock = false;
for(int z=zStr; z<zStr+blklength; z++) {
for(int y=yStr; y<yStr+blklength; y++) {
for(int x=xStr; x<xStr+blklength; x++) {
//printf("idx = %i\n",idx);
this->memoryStruct_.h_sol[idx] = INF;
if (this->seeds_.empty()) {
if (x == nx/2 && y == ny/2 && z == nz/2) {
this->memoryStruct_.h_sol[idx] = 0;
isSeedBlock = true;
if (this->verbose_) {
printf("%d is Selected bt source (if) \n",idx);
}
}
} else {
for(size_t i = 0; i < this->seeds_.size(); i++) {
if (this->seeds_[i][0] == x &&
this->seeds_[i][1] == y &&
this->seeds_[i][2] == z) {
this->memoryStruct_.h_sol[idx] = 0;
isSeedBlock = true;
if (this->verbose_) {
printf("%d is Selected bt source (else) \n",idx);
}
}
}
}
idx++;
}
}
}
///////////////////////////////////////////////
if(isSeedBlock) {
if (this->verbose_) {
printf("%d,%d,%d is Seed Block \n",zStr,yStr,xStr);
}
this->memoryStruct_.h_listVol[blk_idx] = true;
this->memoryStruct_.h_listed[blk_idx] = true;
this->memoryStruct_.h_list[list_idx] = blk_idx;
list_idx++;
nActiveBlock++;
} else {
this->memoryStruct_.h_listVol[blk_idx] = false;
this->memoryStruct_.h_listed[blk_idx] = false;
}
blk_idx++;
}
}
}
//}
this->memoryStruct_.nActiveBlock = nActiveBlock;
// initialize GPU memory with host memory
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.d_sol, this->memoryStruct_.h_sol, volSize*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.t_sol, this->memoryStruct_.h_sol, volSize*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.d_list, this->memoryStruct_.h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.d_listVol, this->memoryStruct_.h_listVol, blockNum*sizeof(bool), cudaMemcpyHostToDevice) );
// initialize GPU memory with constant value
CUDA_SAFE_CALL( cudaMemset(this->memoryStruct_.d_con, 1, volSize*sizeof(bool)) );
}
void StructuredEikonal::setMapType(size_t t) {
this->solverType_ = t;
}
void StructuredEikonal::solveEikonal() {
if (this->speeds_.empty()) {
this->map_generator();
}
this->isCudaMemCreated_ = false;
this->initialization();
this->useSeeds();
runEikonalSolverSimple(this->memoryStruct_, this->verbose_);
this->get_solution();
this->freeMemory();
}
std::vector< std::vector< std::vector<double> > >
StructuredEikonal::getFinalResult() {
return this->answer_;
}
void StructuredEikonal::get_solution() {
// copy solution from GPU
CUDA_SAFE_CALL( cudaMemcpy(this->memoryStruct_.h_sol,
this->memoryStruct_.d_sol, this->memoryStruct_.volsize*sizeof(DOUBLE),
cudaMemcpyDeviceToHost) );
//put the data where it belongs in the grand scheme of data!
this->answer_ = std::vector<std::vector<std::vector<double> > >(
this->width_, std::vector<std::vector<double> >(
this->height_, std::vector<double>(this->depth_,0)));
//#pragma omp parallel num_threads(8)
//{
//#pragma omp for
for(size_t blockID = 0; blockID < this->memoryStruct_.blknum; blockID++) {
size_t baseAddr = blockID * this->memoryStruct_.blksize;
size_t xgridlength = this->memoryStruct_.xdim/BLOCK_LENGTH;
size_t ygridlength = this->memoryStruct_.ydim/BLOCK_LENGTH;
// compute block index
size_t bx = blockID%xgridlength;
size_t tmpIdx = (blockID - bx)/xgridlength;
size_t by = tmpIdx%ygridlength;
size_t bz = (tmpIdx-by)/ygridlength;
//translate back to real space
for(int k = 0; k < BLOCK_LENGTH; k++) {
for(int j = 0; j < BLOCK_LENGTH; j++) {
for(int i = 0; i < BLOCK_LENGTH; i++) {
double d = this->memoryStruct_.h_sol[baseAddr +
k * BLOCK_LENGTH * BLOCK_LENGTH +
j * BLOCK_LENGTH + i];
if ((i + bx * BLOCK_LENGTH) < this->width_ &&
(j + by * BLOCK_LENGTH) < this->height_ &&
(k + bz * BLOCK_LENGTH) < this->depth_) {
this->answer_[(i + bx * BLOCK_LENGTH)][(j +
by * BLOCK_LENGTH)][k + bz * BLOCK_LENGTH] = d;
}
}
}
}
}
//}
}
void StructuredEikonal::setItersPerBlock(size_t t) {
this->itersPerBlock_ = t;
}

View File

@@ -0,0 +1,55 @@
#ifndef __STRUCTUREDEIKONAL_H__
#define __STRUCTUREDEIKONAL_H__
#include <vector>
#include <array>
#include <string>
#include <fstream>
#include "common_def.h"
#include "cuda_fim.h"
/** The class that represents all of the available options for StructuredEikonal */
class StructuredEikonal {
public:
StructuredEikonal(bool verbose = false);
virtual ~StructuredEikonal();
void setDims(size_t w, size_t h, size_t d);
void setMapType(size_t t);
void setItersPerBlock(size_t t);
void setSpeeds(std::vector<std::vector<std::vector<double> > > speed);
void setSeeds(std::vector<std::array<size_t, 3> > seeds);
void setGpuDevice(int device);
void writeNRRD(std::string filename);
void freeMemory();
std::vector< std::vector< std::vector<double> > > getFinalResult();
/**
* Runs the algorithm.
*
* @data The set of options for the Eikonal algorithm.
* The defaults are used if nothing is provided.
*/
void solveEikonal();
//public member for answer
std::vector<std::vector<std::vector<double> > > answer_;
private:
void error(char* msg);
void CheckCUDAMemory();
void init_cuda_mem();
void set_attribute_mask();
void initialization();
void map_generator();
void get_solution();
void useSeeds();
//data
bool verbose_;
bool isCudaMemCreated_;
size_t width_, height_, depth_;
size_t itersPerBlock_, solverType_;
int gpuDevice_;
std::vector<std::array<size_t, 3> > seeds_;
CUDAMEMSTRUCT memoryStruct_;
std::vector<std::vector<std::vector<double> > > speeds_;
};
#endif

View File

@@ -0,0 +1,78 @@
//
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
//
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
//
// 2016. 2. 4
//
//
// Common to entire project
//
#ifndef __COMMON_DEF_H__
#define __COMMON_DEF_H__
#include "float.h"
#include "math.h"
#include "helper_timer.h"
#include <cuda.h>
#include <cuda_runtime.h>
#include <assert.h>
//
// common definition for Eikonal solvers
//
#ifndef INF
#define INF 1e20//FLT_MAX //
#endif
#define BLOCK_LENGTH 4
#ifndef FLOAT
#define DOUBLE double
#define EPS (DOUBLE)1e-16
#else
#define DOUBLE float
#define EPS (DOUBLE)1e-6
#endif
//
// itk image volume definition for 3D anisotropic eikonal solvers
//
typedef unsigned int uint;
typedef unsigned char uchar;
struct CUDA_MEM_STRUCTURE {
// volsize/blksize : # of pixel in volume/block
// blknum : # of block
// blklength : # of pixel in one dimemsion of block
uint nActiveBlock, blknum, volsize, blksize;
int xdim, ydim, zdim, nIter, blklength; // new new x,y,z dim to aligh power of 4
// host memory
uint *h_list;
bool *h_listVol, *h_listed;
// device memory
uint *d_list;
double *d_spd;
bool *d_mask, *d_listVol, *d_con;
DOUBLE *h_sol;//h_speedtable[256];
DOUBLE *d_sol, *t_sol;
// GroupOrder
int* blockOrder;
int K;
};
typedef struct CUDA_MEM_STRUCTURE CUDAMEMSTRUCT;
void CUT_SAFE_CALL(cudaError_t error);
void CUDA_SAFE_CALL(cudaError_t error);
#endif

View File

@@ -0,0 +1,52 @@
/*
* Copyright (C) 2011 Florian Rathgeber, florian.rathgeber@gmail.com
*
* This code is licensed under the MIT License. See the FindCUDA.cmake script
* for the text of the license.
*
* Based on code by Christopher Bruns published on Stack Overflow (CC-BY):
* http://stackoverflow.com/questions/2285185
*/
#include <stdio.h>
#include <cuda_runtime.h>
int main() {
int deviceCount, device, major = 9999, minor = 9999;
int gpuDeviceCount = 0;
struct cudaDeviceProp properties;
if (cudaGetDeviceCount(&deviceCount) != cudaSuccess)
{
printf("Couldn't get device count: %s\n", cudaGetErrorString(cudaGetLastError()));
return 1;
}
/* machines with no GPUs can still report one emulation device */
for (device = 0; device < deviceCount; ++device) {
cudaGetDeviceProperties(&properties, device);
if (properties.major != 9999) {/* 9999 means emulation only */
++gpuDeviceCount;
/* get minimum compute capability of all devices */
if (major > properties.major) {
major = properties.major;
minor = properties.minor;
} else if (minor > properties.minor) {
minor = properties.minor;
}
}
}
/* don't just return the number of gpus, because other runtime cuda
errors can also yield non-zero return values */
if (gpuDeviceCount > 0) {
if (major == 2 && minor == 1)
{
// There is no --arch compute_21 flag for nvcc, so force minor to 0
minor = 0;
}
/* this output will be parsed by FindCUDA.cmake */
printf("%d%d", major, minor);
return 0; /* success */
}
return 1; /* failure */
}

326
eikonalGPU/src/cuda_fim.cu Normal file
View File

@@ -0,0 +1,326 @@
//
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
//
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
//
// 2016. 2. 4
//
#include <cstdio>
#include <string>
#include <cmath>
#include <float.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include "cuda_fim_kernel.h"
#include "cuda_fim.h"
void CUT_SAFE_CALL(cudaError_t error) {
if(error != cudaSuccess)
{
printf("CUDA error! %d \n",error);
exit(EXIT_FAILURE);
}
}
void CUDA_SAFE_CALL(cudaError_t error) {
CUT_SAFE_CALL(error);
}
void runEikonalSolverSimple(CUDAMEMSTRUCT &cmem, bool verbose)
{
int deviceID;
cudaGetDevice(&deviceID);
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, deviceID);
if (verbose) {
printf("Current device id : %d, name : %s\n", deviceID, deviceProp.name);
}
int xdim, ydim, zdim;
xdim = cmem.xdim;
ydim = cmem.ydim;
zdim = cmem.zdim;
#ifdef __DEVICE_EMULATION__
assert(xdim%BLOCK_LENGTH == 0);
assert(ydim%BLOCK_LENGTH == 0);
assert(zdim%BLOCK_LENGTH == 0);
#endif
// create volumes
uint volSize = cmem.volsize;
uint blockNum = cmem.blknum;
//int nBlkZ = zdim/BLOCK_LENGTH;
if (verbose) {
printf("# of total voxels : %d\n", volSize);
printf("# of total blocks : %d\n", blockNum);
}
// h_ : host memory, d_ : device memory
// copy speed table to constant variable
//cudaMemcpyToSymbol((const char*)_speed, cmem.h_speedtable, sizeof(float)*256);
int nIter = cmem.nIter;
uint nActiveBlock = cmem.nActiveBlock; // active list
double *d_spd = cmem.d_spd;
DOUBLE *d_sol = cmem.d_sol;
DOUBLE *t_sol = cmem.t_sol;
uint *d_list = cmem.d_list;
bool *d_listVol = cmem.d_listVol;
bool *d_con = cmem.d_con;
bool *d_mask = cmem.d_mask;
// copy so that original value should not be modified
uint *h_list = (uint*) malloc(blockNum*sizeof(uint));
bool *h_listed = (bool*) malloc(blockNum*sizeof(bool));
bool *h_listVol = (bool*) malloc(blockNum*sizeof(bool));
// initialization
memcpy(h_list, cmem.h_list, blockNum*sizeof(uint));
memcpy(h_listed, cmem.h_listed, blockNum*sizeof(bool));
memcpy(h_listVol, cmem.h_listVol, blockNum*sizeof(bool));
CUT_SAFE_CALL( cudaMemcpy(cmem.d_list, cmem.h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
CUT_SAFE_CALL( cudaMemcpy(cmem.d_listVol, cmem.h_listVol, blockNum*sizeof(bool), cudaMemcpyHostToDevice) );
CUT_SAFE_CALL( cudaMemcpy(cmem.d_sol, cmem.h_sol, volSize*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
CUT_SAFE_CALL( cudaMemcpy(cmem.t_sol, cmem.h_sol, volSize*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
CUT_SAFE_CALL( cudaMemset(cmem.d_con, 1, volSize*sizeof(bool)) );
// set dimension of block and entire grid size
dim3 dimBlock(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH);
dim3 dimEntireGrid(blockNum);
dim3 dimGrid(nActiveBlock);
int nTotalIter = 0;
//uint sharedmemsize = sizeof(float)*BLOCK_LENGTH*BLOCK_LENGTH*(3*BLOCK_LENGTH + 2);
std::vector<int> sourceList;
sourceList.push_back((zdim/2)*ydim*xdim + (ydim/2)*xdim + (xdim/2));
// initialize & start timer
StopWatchInterface *timer_total, *timer_solver, *timer_reduction, *timer_list, *timer_list2, *timer_coarse;
timer_total = timer_solver = timer_reduction = timer_list = timer_list2 = timer_coarse = NULL;
sdkCreateTimer(&timer_total);
sdkCreateTimer(&timer_solver);
sdkCreateTimer(&timer_reduction);
sdkCreateTimer(&timer_list);
sdkCreateTimer(&timer_list2);
sdkCreateTimer(&timer_coarse);
sdkStartTimer(&timer_total);
uint nTotalBlockProcessed = 0;
int maxIter = 1000;
// start solver
while(nActiveBlock > 0 && nTotalIter < maxIter)
{
//CUT_SAFE_CALL( cudaMemcpy(cmem.d_sol, cmem.h_sol, 64*nActiveBlock*sizeof(DOUBLE), cudaMemcpyHostToDevice) );
assert(nActiveBlock < 4294967295);
nTotalBlockProcessed += nActiveBlock;
nTotalIter++;
//
// solve current blocks in the active lists
//
// printf("# of active tiles : %u\n", nActiveBlock);
if (verbose) {
printf("# of active tiles : %u\n", nActiveBlock);
}
//////////////////////////////////////////////////////////////////
// 1. run solver on current active tiles
sdkStartTimer(&timer_solver);
dimGrid.y = (unsigned int)floor(((double)nActiveBlock-1)/65535)+1;
dimGrid.x = (unsigned int)ceil ((double)nActiveBlock/(double)dimGrid.y);
if (verbose) {
printf("Grid size : %d x %d\n", dimGrid.x, dimGrid.y);
}
CUT_SAFE_CALL( cudaMemcpy(d_list, h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
run_solver<<< dimGrid, dimBlock >>>(d_spd, d_mask, d_sol, t_sol, d_con, d_list, xdim, ydim, zdim, nIter, nActiveBlock);
CUT_SAFE_CALL(cudaGetLastError());
cudaDeviceSynchronize();
sdkStopTimer(&timer_solver);
//////////////////////////////////////////////////////////////////
// 2. reduction (only active tiles)
sdkStartTimer(&timer_reduction);
run_reduction<<< dimGrid, dim3(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH/2) >>>(d_con, d_listVol, d_list, nActiveBlock);
CUT_SAFE_CALL(cudaGetLastError());
//CUT_CHECK_ERROR("Kernel execution failed");
cudaDeviceSynchronize();
sdkStopTimer(&timer_reduction);
//////////////////////////////////////////////////////////////////
// 3. check neighbor tiles of converged tile
// Add any active block of neighbor of converged block is inserted
// to the list
// read-back active list volume
#ifdef TIMER
sdkStartTimer(&timer_list);
#endif
CUT_SAFE_CALL( cudaMemcpy(h_listVol, d_listVol, blockNum*sizeof(bool), cudaMemcpyDeviceToHost) );
uint nOldActiveBlock = nActiveBlock;
uint nBlkX = xdim/BLOCK_LENGTH;
uint nBlkY = ydim/BLOCK_LENGTH;
for(uint i=0; i<nOldActiveBlock; i++)
{
// check 6-neighbor of current active tile
uint currBlkIdx = h_list[i];
if(!h_listVol[currBlkIdx]) // not active : converged
{
uint nb[6];
nb[0] = (currBlkIdx < nBlkX*nBlkY) ? currBlkIdx : (currBlkIdx - nBlkX*nBlkY); //tp
nb[1] = ((currBlkIdx + nBlkX*nBlkY) >= blockNum) ? currBlkIdx : (currBlkIdx + nBlkX*nBlkY); //bt
nb[2] = (currBlkIdx < nBlkX) ? currBlkIdx : (currBlkIdx - nBlkX); //up
nb[3] = ((currBlkIdx + nBlkX) >= blockNum) ? currBlkIdx : (currBlkIdx + nBlkX); //dn
nb[4] = (currBlkIdx%nBlkX == 0) ? currBlkIdx : currBlkIdx-1; //lf
nb[5] = ((currBlkIdx+1)%nBlkX == 0) ? currBlkIdx : currBlkIdx+1; //rt
for(int nbIdx = 0; nbIdx < 6; nbIdx++)
{
uint currIdx = nb[nbIdx];
// assert(currIdx < volSize);
if(!h_listed[currIdx])
{
h_listed[currIdx] = true;
h_list[nActiveBlock++] = currIdx;
}
}
}
}
cudaDeviceSynchronize();
#ifdef TIMER
sdkStopTimer(&timer_list);
#endif
//////////////////////////////////////////////////////////////////
// 4. run solver only once for neighbor blocks of converged block
// current active list contains active blocks and neighbor blocks of
// any converged blocks.
//
#ifdef TIMER
sdkStartTimer(&timer_solver);
#endif
// update grid dimension because nActiveBlock is changed
dimGrid.y = (unsigned int)floor(((double)nActiveBlock-1)/65535)+1;
dimGrid.x = (unsigned int)ceil((double)nActiveBlock/(double)dimGrid.y);
if (verbose) {
printf("Grid size : %d x %d\n", dimGrid.x, dimGrid.y);
}
CUT_SAFE_CALL(cudaMemcpy(d_list, h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
run_check_neighbor<<< dimGrid, dimBlock >>>(d_spd, d_mask, t_sol, d_sol, d_con, d_list, xdim, ydim, zdim, nOldActiveBlock, nActiveBlock);
CUT_SAFE_CALL(cudaGetLastError());
cudaDeviceSynchronize();
#ifdef TIMER
sdkStopTimer(&timer_solver);
#endif
//////////////////////////////////////////////////////////////////
// 5. reduction
sdkStartTimer(&timer_reduction);
run_reduction<<< dimGrid, dim3(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH/2) >>>(d_con, d_listVol, d_list, nActiveBlock);
CUT_SAFE_CALL(cudaGetLastError());
cudaDeviceSynchronize();
sdkStopTimer(&timer_reduction);
//////////////////////////////////////////////////////////////////
// 6. update active list
// read back active volume from the device and add
// active block to active list on the host memory
#ifdef TIMER
sdkStartTimer(&timer_list2);
#endif
nActiveBlock = 0;
CUT_SAFE_CALL( cudaMemcpy(h_listVol, d_listVol, blockNum*sizeof(bool), cudaMemcpyDeviceToHost) );
for(uint i=0; i<blockNum; i++)
{
if(h_listVol[i]) // true : active block (not converged)
{
h_listed[i] = true;
h_list[nActiveBlock++] = i;
//printf("Block %d added\n", i);
}
else h_listed[i] = false;
}
cudaDeviceSynchronize();
#ifdef TIMER
sdkStopTimer(&timer_list2);
#endif
if (verbose) {
printf("Iteration : %d\n", nTotalIter);
}
}
sdkStopTimer(&timer_total);
if (verbose) {
printf("Eikonal solver converged after %d iterations\n", nTotalIter);
printf("Total Running Time: %f (sec)\n", sdkGetTimerValue(&timer_total) / 1000);
printf("Time for solver : %f (sec)\n", sdkGetTimerValue(&timer_solver) / 1000);
printf("Time for reduction : %f (sec)\n", sdkGetTimerValue(&timer_reduction) / 1000);
#ifdef TIMER
printf("Time for list update-1 (CPU) : %f (sec)\n", sdkGetTimerValue(&timer_list) / 1000);
printf("Time for list update-2 (CPU) : %f (sec)\n", sdkGetTimerValue(&timer_list2) / 1000);
#endif
printf("Total # of blocks processed : %d\n", nTotalBlockProcessed);
}
sdkDeleteTimer(&timer_total);
sdkDeleteTimer(&timer_solver);
sdkDeleteTimer(&timer_reduction);
sdkDeleteTimer(&timer_list);
// delete dynamically allocated host memory
free(h_list);
free(h_listed);
free(h_listVol);
}

20
eikonalGPU/src/cuda_fim.h Normal file
View File

@@ -0,0 +1,20 @@
//
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
//
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
//
// 2016. 2. 4
//
#ifndef __CUDA_FIM_H__
#define __CUDA_FIM_H__
#include <cstdlib>
#include "common_def.h"
#define TIMER
void CUT_SAFE_CALL(cudaError_t error);
void CUDA_SAFE_CALL(cudaError_t error);
void runEikonalSolverSimple(CUDAMEMSTRUCT &cmem, bool verbose);
#endif

View File

@@ -0,0 +1,431 @@
//
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
//
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
//
// 2016. 2. 4
//
#include "cuda_fim_kernel.h"
__device__ DOUBLE get_time_eikonal(DOUBLE a, DOUBLE b, DOUBLE c, DOUBLE s)
{
DOUBLE ret, tmp;
// a > b > c
if(a < b) { tmp = a; a = b; b = tmp; }
if(b < c) { tmp = b; b = c; c = tmp; }
if(a < b) { tmp = a; a = b; b = tmp; }
ret = INF;
if(c < INF)
{
ret = c + s;
if(ret > b)
{
tmp = ((b+c) + sqrtf(2.0f*s*s-(b-c)*(b-c)))*0.5f;
if(tmp > b) ret = tmp;
if(ret > a) {
tmp = (a+b+c)/3.0f + sqrtf(2.0f*(a*(b-a)+b*(c-b)+c*(a-c))+3.0f*s*s)/3.0f;
if(tmp > a) ret = tmp;
}
}
}
return ret;
}
__global__ void run_solver(double* spd, bool* mask, const DOUBLE *sol_in, DOUBLE *sol_out, bool *con, uint* list, int xdim, int ydim, int zdim, int nIter, uint nActiveBlock)
{
uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
if(list_idx < nActiveBlock)
{
// retrieve actual block index from the active list
uint block_idx = list[list_idx];
double F;
bool isValid;
uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
uint base_addr = block_idx*blocksize;
uint xgridlength = xdim/BLOCK_LENGTH;
uint ygridlength = ydim/BLOCK_LENGTH;
uint zgridlength = zdim/BLOCK_LENGTH;
// compute block index
uint bx = block_idx%xgridlength;
uint tmpIdx = (block_idx - bx)/xgridlength;
uint by = tmpIdx%ygridlength;
uint bz = (tmpIdx-by)/ygridlength;
uint tx = threadIdx.x;
uint ty = threadIdx.y;
uint tz = threadIdx.z;
uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
__shared__ DOUBLE _sol[BLOCK_LENGTH+2][BLOCK_LENGTH+2][BLOCK_LENGTH+2];
// copy global to shared memory
dim3 idx(tx+1,ty+1,tz+1);
SOL(idx.x,idx.y,idx.z) = sol_in[base_addr + tIdx];
//F = _speed[(int)spd[base_addr + tIdx]];
F = spd[base_addr + tIdx];
if(F > 0) F = 1.0/F; // F = 1/f
isValid = mask[base_addr + tIdx];
uint new_base_addr, new_tIdx;
// 1-neighborhood values
if(tx == 0)
{
if(bx == 0) // end of the grid
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx + BLOCK_LENGTH-1;
new_base_addr = (block_idx - 1)*blocksize;
}
SOL(tx,idx.y,idx.z) = sol_in[new_base_addr + new_tIdx];
}
if(tx == BLOCK_LENGTH-1)
{
if(bx == xgridlength-1) // end of the grid
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx - (BLOCK_LENGTH-1);
new_base_addr = (block_idx + 1)*blocksize;
}
SOL(tx+2,idx.y,idx.z) = sol_in[new_base_addr + new_tIdx];
}
if(ty == 0)
{
if(by == 0)
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH;
new_base_addr = (block_idx - xgridlength)*blocksize;
}
SOL(idx.x,ty,idx.z) = sol_in[new_base_addr + new_tIdx];
}
if(ty == BLOCK_LENGTH-1)
{
if(by == ygridlength-1)
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH;
new_base_addr = (block_idx + xgridlength)*blocksize;
}
SOL(idx.x,ty+2,idx.z) = sol_in[new_base_addr + new_tIdx];
}
if(tz == 0)
{
if(bz == 0)
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
new_base_addr = (block_idx - xgridlength*ygridlength)*blocksize;
}
SOL(idx.x,idx.y,tz) = sol_in[new_base_addr + new_tIdx];
}
if(tz == BLOCK_LENGTH-1)
{
if(bz == zgridlength-1)
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
new_base_addr = (block_idx + xgridlength*ygridlength)*blocksize;
}
SOL(idx.x,idx.y,tz+2) = sol_in[new_base_addr + new_tIdx];
}
__syncthreads();
DOUBLE a,b,c,oldT,newT;
for(int iter=0; iter<nIter; iter++)
{
//
// compute new value
//
oldT = newT = SOL(idx.x,idx.y,idx.z);
if(isValid)
{
a = min(SOL(tx,idx.y,idx.z),SOL(tx+2,idx.y,idx.z));
b = min(SOL(idx.x,ty,idx.z),SOL(idx.x,ty+2,idx.z));
c = min(SOL(idx.x,idx.y,tz),SOL(idx.x,idx.y,tz+2));
DOUBLE tmp = (DOUBLE) get_time_eikonal(a, b, c, F);
newT = min(tmp,oldT);
}
__syncthreads();
if(isValid) SOL(idx.x,idx.y,idx.z) = newT;
__syncthreads(); // this may not required
}
DOUBLE residue = oldT - newT;
// write back to global memory
con[base_addr + tIdx] = (residue < EPS) ? true : false;
sol_out[base_addr + tIdx] = newT;
}
}
__global__ void run_reduction(bool *con, bool *listVol, uint *list, uint nActiveBlock)
{
uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
if(list_idx < nActiveBlock)
{
uint block_idx = list[list_idx];
__shared__ bool conv[BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH];
uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH/2;
uint base_addr = block_idx*blocksize*2;
uint tx = threadIdx.x;
uint ty = threadIdx.y;
uint tz = threadIdx.z;
uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
conv[tIdx] = con[base_addr + tIdx];
conv[tIdx + blocksize] = con[base_addr + tIdx + blocksize];
__syncthreads();
for(uint i=blocksize; i>0; i/=2)
{
if(tIdx < i)
{
bool b1, b2;
b1 = conv[tIdx];
b2 = conv[tIdx+i];
conv[tIdx] = (b1 && b2) ? true : false ;
}
__syncthreads();
}
if(tIdx == 0)
{
listVol[block_idx] = !conv[0]; // active list is negation of tile convergence (active = not converged)
}
}
}
__global__ void run_check_neighbor(double* spd, bool* mask, const DOUBLE *sol_in, DOUBLE *sol_out,
bool *con, uint* list, int xdim, int ydim, int zdim,
uint nActiveBlock, uint nTotalBlock)
{
uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
if(list_idx < nTotalBlock)
{
double F;
bool isValid;
__shared__ DOUBLE _sol[BLOCK_LENGTH+2][BLOCK_LENGTH+2][BLOCK_LENGTH+2];
uint block_idx = list[list_idx];
uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
uint base_addr = block_idx*blocksize;
uint tx = threadIdx.x;
uint ty = threadIdx.y;
uint tz = threadIdx.z;
uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
if(list_idx < nActiveBlock) // copy value
{
sol_out[base_addr + tIdx] = sol_in[base_addr + tIdx];
}
else
{
uint xgridlength = xdim/BLOCK_LENGTH;
uint ygridlength = ydim/BLOCK_LENGTH;
uint zgridlength = zdim/BLOCK_LENGTH;
// compute block index
uint bx = block_idx%xgridlength;
uint tmpIdx = (block_idx - bx)/xgridlength;
uint by = tmpIdx%ygridlength;
uint bz = (tmpIdx-by)/ygridlength;
#ifdef __DEVICE_EMULATION__
assert(block_idx == bz*xgridlength*ygridlength + by*xgridlength + bx);
printf("Block %d's index : %d, %d, %d\n", block_idx, bx, by, bz);
printf("Thread %d's index : %d, %d, %d\n", tIdx, tx, ty, tz);
#endif
// copy global to shared memory
dim3 idx(tx+1,ty+1,tz+1);
_sol[idx.x][idx.y][idx.z] = sol_in[base_addr + tIdx];
F = spd[base_addr + tIdx];
if(F > 0) F = 1.0/F;
isValid = mask[base_addr + tIdx];
uint new_base_addr, new_tIdx;
// 1-neighborhood values
if(tx == 0)
{
if(bx == 0) // end of the grid
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx + BLOCK_LENGTH-1;
new_base_addr = (block_idx - 1)*blocksize;
}
_sol[tx][idx.y][idx.z] = sol_in[new_base_addr + new_tIdx];
}
if(tx == BLOCK_LENGTH-1)
{
if(bx == xgridlength-1) // end of the grid
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx - (BLOCK_LENGTH-1);
new_base_addr = (block_idx + 1)*blocksize;
}
_sol[tx+2][idx.y][idx.z] = sol_in[new_base_addr + new_tIdx];
}
if(ty == 0)
{
if(by == 0)
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH;
new_base_addr = (block_idx - xgridlength)*blocksize;
}
_sol[idx.x][ty][idx.z] = sol_in[new_base_addr + new_tIdx];
}
if(ty == BLOCK_LENGTH-1)
{
if(by == ygridlength-1)
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH;
new_base_addr = (block_idx + xgridlength)*blocksize;
}
_sol[idx.x][ty+2][idx.z] = sol_in[new_base_addr + new_tIdx];
}
if(tz == 0)
{
if(bz == 0)
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
new_base_addr = (block_idx - xgridlength*ygridlength)*blocksize;
}
_sol[idx.x][idx.y][tz] = sol_in[new_base_addr + new_tIdx];
}
if(tz == BLOCK_LENGTH-1)
{
if(bz == zgridlength-1)
{
new_tIdx = tIdx;
new_base_addr = base_addr;
}
else
{
new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
new_base_addr = (block_idx + xgridlength*ygridlength)*blocksize;
}
_sol[idx.x][idx.y][tz+2] = sol_in[new_base_addr + new_tIdx];
}
__syncthreads();
DOUBLE a,b,c,oldT,newT;
//
// compute new value
//
oldT = newT = _sol[idx.x][idx.y][idx.z];
if(isValid)
{
a = min(_sol[tx][idx.y][idx.z],_sol[tx+2][idx.y][idx.z]);
b = min(_sol[idx.x][ty][idx.z],_sol[idx.x][ty+2][idx.z]);
c = min(_sol[idx.x][idx.y][tz],_sol[idx.x][idx.y][tz+2]);
DOUBLE tmp = (DOUBLE) get_time_eikonal(a, b, c, F);
newT = min(tmp,oldT);
sol_out[base_addr + tIdx] = newT;
}
// write back to global memory
DOUBLE residue = oldT - newT;
con[base_addr + tIdx] = (residue < EPS) ? true : false;
}
}
}

View File

@@ -0,0 +1,53 @@
//
// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
//
// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
//
// 2016. 2. 4
//
#ifndef _cuda_fim_KERNEL_H_
#define _cuda_fim_KERNEL_H_
#include <cstdio>
#include "common_def.h"
// check bank confilct only when device emulation mode
#ifdef __DEVICE_EMULATION__
#define CHECK_BANK_CONFLICTS
#endif
#ifdef CHECK_BANK_CONFLICTS
#define MEM(index) CUT_BANK_CHECKER(_mem, index)
#define SOL(i,j,k) CUT_BANK_CHECKER(((float*)&_sol[0][0][0]), (k*(BLOCK_LENGTH+2)*(BLOCK_LENGTH+2) + j*(BLOCK_LENGTH+2) + i))
#define SPD(i,j,k) CUT_BANK_CHECKER(((float*)&_spd[0][0][0]), (k*(BLOCK_LENGTH)*(BLOCK_LENGTH) + j*(BLOCK_LENGTH) + i))
#else
#define MEM(index) _mem[index]
#define SOL(i,j,k) _sol[i][j][k]
#define SPD(i,j,k) _spd[i][j][k]
#endif
__device__ DOUBLE get_time_eikonal(DOUBLE a, DOUBLE b, DOUBLE c, DOUBLE s);
//
// F : Input speed (positive)
// if F =< 0, skip that pixel (masking out)
//
__global__ void run_solver(double* spd, bool* mask, const DOUBLE *sol_in,
DOUBLE *sol_out, bool *con, uint* list, int xdim, int ydim, int zdim,
int nIter, uint nActiveBlock);
//
// run_reduction
//
// con is pixelwise convergence. Do reduction on active tiles and write tile-wise
// convergence to listVol. The implementation assumes that the block size is 4x4x4.
//
__global__ void run_reduction(bool *con, bool *listVol, uint *list, uint nActiveBlock);
//
// if block is active block, copy values
// if block is neighbor, run solver once
//
__global__ void run_check_neighbor(double* spd, bool* mask, const DOUBLE *sol_in, DOUBLE *sol_out,
bool *con, uint* list, int xdim, int ydim, int zdim,
uint nActiveBlock, uint nTotalBlock);
#endif // #ifndef _cuda_fim_KERNEL_H_

183
eikonalGPU/src/eikonal.cpp Normal file
View File

@@ -0,0 +1,183 @@
#include "eikonal.h"
#include <vector>
#include <time.h>
#include "StructuredEikonal.h"
#define mwSize int
// #define timeMeasure 1
// #define verbose 1
double* eikonal(double* volume,int* dims,double* startpoint) {
// for time measurement
#ifdef timeMeasure
clock_t begin, end;
float z;
#endif
// default parameters for number of threads and iters per block
size_t itersPerBlock = 10;
int nthreads=8;
int gpuSelection;
bool structuredEikonalVerbose = false;
#ifdef verbose
structuredEikonalVerbose = true;
#endif
#ifdef timeMeasure
begin = clock();
#endif
/* check if 3D and adapted dimension to size = 1 if 2D
(mxGetDimensions reduces dim[2] to 0 if 2D map is given) */
mwSize dimsAdapted[3] = {0,0,0};
if(dims[0] == 0) {
dimsAdapted[0] = 1;
} else {
dimsAdapted[0] = dims[0];
}
if(dims[1] == 0) {
dimsAdapted[1] = 1;
}else {
dimsAdapted[1] = dims[1];
}
if(dims[2] == 0) {
dimsAdapted[2] = 1;
}else {
dimsAdapted[2] = dims[2];
}
/* Create output data structures */
double* out = new double[dimsAdapted[0]*dimsAdapted[1]*dimsAdapted[2]]{0};
#ifdef timeMeasure
end = clock();
z=end - begin;
z/=CLOCKS_PER_SEC;
printf("Time init: %f sec.\n", z);
begin = clock();
#endif
/* vector data structure for input volume */
std::vector<std::vector<std::vector<double> > > input;
input = std::vector<std::vector<std::vector<double> > >(
dimsAdapted[0], std::vector<std::vector<double> >(
dimsAdapted[1], std::vector<double>(dimsAdapted[2],1.)));
int linInd;
int linInd_k;
int linInd_j;
/* sort input volume input vector data structure */
#pragma omp parallel num_threads(nthreads)
{
#pragma omp for
for(size_t k = 0; k < dimsAdapted[2]; k++) {
linInd_k = k*dimsAdapted[0]*dimsAdapted[1];
for(size_t j = 0; j < dimsAdapted[1]; j++) {
linInd_j = linInd_k+j*dimsAdapted[0];
for(size_t i = 0; i < dimsAdapted[0]; i++) {
linInd = linInd_j + i;
input[i][j][k] = volume[linInd];
}
}
}
}
#ifdef timeMeasure
end = clock();
z=end - begin;
z/=CLOCKS_PER_SEC;
printf("Time vector init and data copy: %f sec.\n", z);
begin = clock();
#endif
/* init the Eikonal solver */
// create object
StructuredEikonal data(structuredEikonalVerbose);
// set the dimensions of the input volume
data.setDims(dimsAdapted[0],dimsAdapted[1],dimsAdapted[2]);
// setting the number of iterations per block (default = 10)
data.setItersPerBlock(itersPerBlock);
// setting the start point for the Eikonal propagation
data.setSeeds({ { { { (size_t) startpoint[0], (size_t) startpoint[1], (size_t) startpoint[2] } } } });
// setting the input data (aka speeds) to the Eikonal solver
data.setSpeeds(input);
#ifdef timeMeasure
end = clock();
z=end - begin;
z/=CLOCKS_PER_SEC;
printf("Time Eikonal init: %f sec.\n", z);
begin = clock();
#endif
/* Solve Eikonal and get the results */
data.solveEikonal();
#ifdef timeMeasure
end = clock();
z=end - begin;
z/=CLOCKS_PER_SEC;
printf("Time Eikonal solve: %f sec.\n", z);
begin = clock();
#endif
/* sort vector into output array. */
#pragma omp parallel num_threads(nthreads)
{
#pragma omp for
for(size_t k = 0; k < dimsAdapted[2]; k++) {
linInd_k = k*dimsAdapted[0]*dimsAdapted[1];
for(size_t j = 0; j < dimsAdapted[1]; j++) {
linInd_j = linInd_k+j*dimsAdapted[0];
for(size_t i = 0; i < dimsAdapted[0]; i++) {
linInd = linInd_j + i;
out[linInd] = data.answer_[i][j][k];
}
}
}
}
#ifdef timeMeasure
end = clock();
z=end - begin;
z/=CLOCKS_PER_SEC;
printf("Time output conversion: %f sec.\n", z);
#endif
/* and return to MATLAB */
return out;
}

1
eikonalGPU/src/eikonal.h Normal file
View File

@@ -0,0 +1 @@
extern double* eikonal(double* volume,int* dims,double* startpoint);

View File

@@ -0,0 +1,303 @@
/////////////////////////////////////////////////////////////////////////////
//
// Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
//
// Please refer to the NVIDIA end user license agreement (EULA) associated
// with this source code for terms and conditions that govern your use of
// this software. Any use, reproduction, disclosure, or distribution of
// this software and related documentation outside the terms of the EULA
// is strictly prohibited.
//
/////////////////////////////////////////////////////////////////////////////
// includes, project
#include "helper_timer.h"
//////////////////////////////////////////////////////////////////
// Begin Stopwatch timer class definitions for all OS platforms //
//////////////////////////////////////////////////////////////////
#ifdef WIN32
StopWatchWin::StopWatchWin() :
start_time(), end_time(),
diff_time(0.0f), total_time(0.0f),
running(false), clock_sessions(0), freq(0), freq_set(false)
{
if (!freq_set) {
// helper variable
LARGE_INTEGER temp;
// get the tick frequency from the OS
QueryPerformanceFrequency((LARGE_INTEGER*)&temp);
// convert to type in which it is needed
freq = ((double)temp.QuadPart) / 1000.0;
// rememeber query
freq_set = true;
}
}
StopWatchWin::~StopWatchWin() { }
// functions, d
////////////////////////////////////////////////////////////////////////////////
//! Start time measurement
////////////////////////////////////////////////////////////////////////////////
void
StopWatchWin::start()
{
QueryPerformanceCounter((LARGE_INTEGER*)&start_time);
running = true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop time measurement and increment add to the current diff_time summation
//! variable. Also increment the number of times this clock has been run.
////////////////////////////////////////////////////////////////////////////////
void
StopWatchWin::stop()
{
QueryPerformanceCounter((LARGE_INTEGER*)&end_time);
diff_time = (float)
(((double)end_time.QuadPart - (double)start_time.QuadPart) / freq);
total_time += diff_time;
clock_sessions++;
running = false;
}
////////////////////////////////////////////////////////////////////////////////
//! Reset the timer to 0. Does not change the timer running state but does
//! recapture this point in time as the current start time if it is running.
////////////////////////////////////////////////////////////////////////////////
void
StopWatchWin::reset()
{
diff_time = 0;
total_time = 0;
clock_sessions = 0;
if (running)
QueryPerformanceCounter((LARGE_INTEGER*)&start_time);
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned added to the
//! current diff_time sum, otherwise the current summed time difference alone
//! is returned.
////////////////////////////////////////////////////////////////////////////////
float
StopWatchWin::getTime()
{
// Return the TOTAL time to date
float retval = total_time;
if (running)
{
LARGE_INTEGER temp;
QueryPerformanceCounter((LARGE_INTEGER*)&temp);
retval += (float)
(((double)(temp.QuadPart - start_time.QuadPart)) / freq);
}
return retval;
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. for a single run based on the total number of COMPLETED runs
//! and the total time.
////////////////////////////////////////////////////////////////////////////////
float
StopWatchWin::getAverageTime()
{
return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f;
}
#else
StopWatchLinux::StopWatchLinux() :
start_time(), diff_time(0.0), total_time(0.0),
running(false), clock_sessions(0)
{ }
// Destructor
StopWatchLinux::~StopWatchLinux() { }
// functions, d
////////////////////////////////////////////////////////////////////////////////
//! Start time measurement
////////////////////////////////////////////////////////////////////////////////
void
StopWatchLinux::start() {
gettimeofday(&start_time, 0);
running = true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop time measurement and increment add to the current diff_time summation
//! variable. Also increment the number of times this clock has been run.
////////////////////////////////////////////////////////////////////////////////
void
StopWatchLinux::stop() {
diff_time = getDiffTime();
total_time += diff_time;
running = false;
clock_sessions++;
}
////////////////////////////////////////////////////////////////////////////////
//! Reset the timer to 0. Does not change the timer running state but does
//! recapture this point in time as the current start time if it is running.
////////////////////////////////////////////////////////////////////////////////
void
StopWatchLinux::reset()
{
diff_time = 0;
total_time = 0;
clock_sessions = 0;
if (running)
gettimeofday(&start_time, 0);
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned added to the
//! current diff_time sum, otherwise the current summed time difference alone
//! is returned.
////////////////////////////////////////////////////////////////////////////////
float
StopWatchLinux::getTime()
{
// Return the TOTAL time to date
float retval = total_time;
if (running) {
retval += getDiffTime();
}
return retval;
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. for a single run based on the total number of COMPLETED runs
//! and the total time.
////////////////////////////////////////////////////////////////////////////////
float
StopWatchLinux::getAverageTime()
{
return (clock_sessions > 0) ? (total_time / clock_sessions) : 0.0f;
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
float
StopWatchLinux::getDiffTime()
{
struct timeval t_time;
gettimeofday(&t_time, 0);
// time difference in milli-seconds
return (float)(1000.0 * (t_time.tv_sec - start_time.tv_sec)
+ (0.001 * (t_time.tv_usec - start_time.tv_usec)));
}
#endif // _WIN32
////////////////////////////////////////////////////////////////////////////////
//! Timer functionality exported
////////////////////////////////////////////////////////////////////////////////
//! Create a new timer
//! @return true if a time has been created, otherwise false
//! @param name of the new timer, 0 if the creation failed
////////////////////////////////////////////////////////////////////////////////
bool
sdkCreateTimer(StopWatchInterface **timer_interface)
{
//printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
#ifdef _WIN32
*timer_interface = (StopWatchInterface *)new StopWatchWin();
#else
*timer_interface = (StopWatchInterface *)new StopWatchLinux();
#endif
return (*timer_interface != NULL) ? true : false;
}
////////////////////////////////////////////////////////////////////////////////
//! Delete a timer
//! @return true if a time has been deleted, otherwise false
//! @param name of the timer to delete
////////////////////////////////////////////////////////////////////////////////
bool
sdkDeleteTimer(StopWatchInterface **timer_interface)
{
//printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface) delete *timer_interface;
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Start the time with name \a name
//! @param name name of the timer to start
////////////////////////////////////////////////////////////////////////////////
bool
sdkStartTimer(StopWatchInterface **timer_interface)
{
//printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface) (*timer_interface)->start();
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop the time with name \a name. Does not reset.
//! @param name name of the timer to stop
////////////////////////////////////////////////////////////////////////////////
bool
sdkStopTimer(StopWatchInterface **timer_interface)
{
// printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface) (*timer_interface)->stop();
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Resets the timer's counter.
//! @param name name of the timer to reset.
////////////////////////////////////////////////////////////////////////////////
bool
sdkResetTimer(StopWatchInterface **timer_interface)
{
// printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface) (*timer_interface)->reset();
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Return the average time for timer execution as the total time
//! for the timer dividied by the number of completed (stopped) runs the timer
//! has made.
//! Excludes the current running time if the timer is currently running.
//! @param name name of the timer to return the time of
////////////////////////////////////////////////////////////////////////////////
float
sdkGetAverageTimerValue(StopWatchInterface **timer_interface)
{
// printf("sdkGetAverageTimerValue called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
return (*timer_interface)->getAverageTime();
else
return 0.0f;
}
////////////////////////////////////////////////////////////////////////////////
//! Total execution time for the timer over all runs since the last reset
//! or timer creation.
//! @param name name of the timer to obtain the value of.
////////////////////////////////////////////////////////////////////////////////
float
sdkGetTimerValue(StopWatchInterface **timer_interface)
{
// printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
return (*timer_interface)->getTime();
else
return 0.0f;
}

View File

@@ -0,0 +1,228 @@
/////////////////////////////////////////////////////////////////////////////
//
// Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
//
// Please refer to the NVIDIA end user license agreement (EULA) associated
// with this source code for terms and conditions that govern your use of
// this software. Any use, reproduction, disclosure, or distribution of
// this software and related documentation outside the terms of the EULA
// is strictly prohibited.
//
/////////////////////////////////////////////////////////////////////////////
// Helper Timer Functions (this is the inlined version)
#ifndef HELPER_TIMER_H
#define HELPER_TIMER_H
// includes, system
#include <vector>
// includes, project
#include "my_exception.h"
// Definition of the StopWatch Interface, this is used if we don't want to use the CUT functions
// But rather in a self contained class interface
class StopWatchInterface
{
public:
StopWatchInterface() {};
virtual ~StopWatchInterface() {};
public:
//! Start time measurement
virtual void start() = 0;
//! Stop time measurement
virtual void stop() = 0;
//! Reset time counters to zero
virtual void reset() = 0;
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
virtual float getTime() = 0;
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
virtual float getAverageTime() = 0;
};
//////////////////////////////////////////////////////////////////
// Begin Stopwatch timer class definitions for all OS platforms //
//////////////////////////////////////////////////////////////////
#ifdef WIN32
// includes, system
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
#undef min
#undef max
//! Windows specific implementation of StopWatch
class StopWatchWin : public StopWatchInterface
{
public:
//! Constructor, default
StopWatchWin();
// Destructor
~StopWatchWin();
public:
//! Start time measurement
void start();
//! Stop time measurement
void stop();
//! Reset time counters to zero
void reset();
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
float getTime();
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
float getAverageTime();
private:
// member variables
//! Start of measurement
LARGE_INTEGER start_time;
//! End of measurement
LARGE_INTEGER end_time;
//! Time difference between the last start and stop
float diff_time;
//! TOTAL time difference between starts and stops
float total_time;
//! flag if the stop watch is running
bool running;
//! Number of times clock has been started
//! and stopped to allow averaging
int clock_sessions;
//! tick frequency
double freq;
//! flag if the frequency has been set
bool freq_set;
};
#else
// Declarations for Stopwatch on Linux and Mac OSX
// includes, system
#include <ctime>
#include <sys/time.h>
//! Windows specific implementation of StopWatch
class StopWatchLinux : public StopWatchInterface
{
public:
//! Constructor, default
StopWatchLinux();
// Destructor
virtual ~StopWatchLinux();
public:
//! Start time measurement
void start();
//! Stop time measurement
void stop();
//! Reset time counters to zero
void reset();
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
float getTime();
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
float getAverageTime();
private:
// helper functions
//! Get difference between start time and current time
float getDiffTime();
private:
// member variables
//! Start of measurement
struct timeval start_time;
//! Time difference between the last start and stop
float diff_time;
//! TOTAL time difference between starts and stops
float total_time;
//! flag if the stop watch is running
bool running;
//! Number of times clock has been started
//! and stopped to allow averaging
int clock_sessions;
};
#endif // _WIN32
////////////////////////////////////////////////////////////////////////////////
//! Timer functionality exported
////////////////////////////////////////////////////////////////////////////////
//! Create a new timer
//! @return true if a time has been created, otherwise false
//! @param name of the new timer, 0 if the creation failed
////////////////////////////////////////////////////////////////////////////////
bool sdkCreateTimer(StopWatchInterface **timer_interface);
////////////////////////////////////////////////////////////////////////////////
//! Delete a timer
//! @return true if a time has been deleted, otherwise false
//! @param name of the timer to delete
////////////////////////////////////////////////////////////////////////////////
bool sdkDeleteTimer(StopWatchInterface **timer_interface);
////////////////////////////////////////////////////////////////////////////////
//! Start the time with name \a name
//! @param name name of the timer to start
////////////////////////////////////////////////////////////////////////////////
bool sdkStartTimer(StopWatchInterface **timer_interface);
////////////////////////////////////////////////////////////////////////////////
//! Stop the time with name \a name. Does not reset.
//! @param name name of the timer to stop
////////////////////////////////////////////////////////////////////////////////
bool sdkStopTimer(StopWatchInterface **timer_interface);
////////////////////////////////////////////////////////////////////////////////
//! Resets the timer's counter.
//! @param name name of the timer to reset.
////////////////////////////////////////////////////////////////////////////////
bool sdkResetTimer(StopWatchInterface **timer_interface);
////////////////////////////////////////////////////////////////////////////////
//! Return the average time for timer execution as the total time
//! for the timer dividied by the number of completed (stopped) runs the timer
//! has made.
//! Excludes the current running time if the timer is currently running.
//! @param name name of the timer to return the time of
////////////////////////////////////////////////////////////////////////////////
float sdkGetAverageTimerValue(StopWatchInterface **timer_interface);
////////////////////////////////////////////////////////////////////////////////
//! Total execution time for the timer over all runs since the last reset
//! or timer creation.
//! @param name name of the timer to obtain the value of.
////////////////////////////////////////////////////////////////////////////////
float sdkGetTimerValue(StopWatchInterface **timer_interface);
#endif // HELPER_TIMER_H

View File

@@ -0,0 +1,151 @@
/*
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* CUda UTility Library */
#ifndef _MY_EXCEPTION_H_
#define _MY_EXCEPTION_H_
// includes, system
#include <exception>
#include <stdexcept>
#include <iostream>
#include <stdlib.h>
//! Exception wrapper.
//! @param Std_Exception Exception out of namespace std for easy typing.
template<class Std_Exception>
class Exception : public Std_Exception
{
public:
//! @brief Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it( const char* file,
const int line,
const char* detailed = "-" );
//! Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it( const char* file,
const int line,
const std::string& detailed);
//! Destructor
virtual ~Exception() throw();
private:
//! Constructor, default (private)
Exception();
//! Constructor, standard
//! @param str string returned by what()
Exception( const std::string& str);
};
////////////////////////////////////////////////////////////////////////////////
//! Exception handler function for arbitrary exceptions
//! @param ex exception to handle
////////////////////////////////////////////////////////////////////////////////
template<class Exception_Typ>
inline void
handleException( const Exception_Typ& ex)
{
std::cerr << ex.what() << std::endl;
exit( EXIT_FAILURE);
}
//! Convenience macros
//! Exception caused by dynamic program behavior, e.g. file does not exist
#define RUNTIME_EXCEPTION( msg) \
Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
//! Logic exception in program, e.g. an assert failed
#define LOGIC_EXCEPTION( msg) \
Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
//! Out of range exception
#define RANGE_EXCEPTION( msg) \
Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
////////////////////////////////////////////////////////////////////////////////
//! Implementation
// includes, system
#include <sstream>
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template<class Std_Exception>
void
Exception<Std_Exception>::
throw_it( const char* file, const int line, const char* detailed)
{
std::stringstream s;
// Quiet heavy-weight but exceptions are not for
// performance / release versions
s << "Exception in file '" << file << "' in line " << line << "\n"
<< "Detailed description: " << detailed << "\n";
throw Exception( s.str());
}
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template<class Std_Exception>
void
Exception<Std_Exception>::
throw_it( const char* file, const int line, const std::string& msg)
{
throw_it( file, line, msg.c_str());
}
////////////////////////////////////////////////////////////////////////////////
//! Constructor, default (private).
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::Exception() :
Exception("Unknown Exception.\n")
{ }
////////////////////////////////////////////////////////////////////////////////
//! Constructor, standard (private).
//! String returned by what().
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::Exception( const std::string& s) :
Std_Exception( s)
{ }
////////////////////////////////////////////////////////////////////////////////
//! Destructor
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::~Exception() throw() { }
// functions, exported
#endif // #ifndef _EXCEPTION_H_