dtof change 1

This commit is contained in:
kradchen
2023-10-09 09:50:54 +08:00
parent c4c22ff6cf
commit b527b108e1
7 changed files with 60 additions and 59 deletions

View File

@@ -58,7 +58,7 @@ tval3_options* getOptions(const TVALOptions& opt){
return optso;
}
sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, double * mValues,size_t mM, size_t mN, int nz, bool pagelocked) {
sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, float * mValues,size_t mM, size_t mN, int nz, bool pagelocked) {
size_t dim_y = mM;
size_t dim_x = mN;
@@ -66,7 +66,7 @@ sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, double * mValues,size_t
int *mi_columnIndex = yIdxs;
int n_nonzero = nz;
double *mi_data = mValues;
float *mi_data = mValues;
sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero,
sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined);
@@ -76,13 +76,13 @@ sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, double * mValues,size_t
for(int i=0; i < mo->nnz; i++) {
mo->ind()[i] = mi_dim_y[i];
mo->val()[i] = (float)mi_data[i];
mo->val()[i] = mi_data[i];
}
return mo;
}
TVALResult TVALGPU(int *xIdxs, int *yIdxs, double *mValues, size_t mM, size_t mN,
TVALResult TVALGPU(int *xIdxs, int *yIdxs, float *mValues, size_t mM, size_t mN,
int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt,
int device, bool pagelocked) {
int ip = dims[0];
@@ -108,12 +108,13 @@ TVALResult TVALGPU(int *xIdxs, int *yIdxs, double *mValues, size_t mM, size_t mN
if(plb_b != NULL)
HANDLE_ERROR(cudaHostUnregister(plb_b));
result.data = new double[mU.len];
result.data = new float[mU.len];
std::copy(mU.data(),mU.data()+mU.len,result.data);
result.dims[0] = mU.dim_x;
result.dims[1] = mU.dim_y;
result.dims[2] = mU.dim_z;
// if(info != NULL) *info = setInfo(ti_info);
delete mb;

View File

@@ -3,7 +3,7 @@
#include "tvalstruct.h"
extern TVALResult TVALGPU(int *xIdxs, int *yIdxs, double *mValues, size_t mM, size_t mN,
extern TVALResult TVALGPU(int *xIdxs, int *yIdxs, float *mValues, size_t mM, size_t mN,
int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt,
int device, bool pagelocked) ;
#endif // __TVAL3GPU3D_H__

View File

@@ -19,7 +19,7 @@ struct TVALOptions{
};
struct TVALResult{
double* data = nullptr;
float* data = nullptr;
int dims[3]{0};
std::string errormsg;
};

View File

@@ -25,14 +25,14 @@
* @param[out] resEnvelopeRef, result of envelope of reference aScan
*
**/
int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScansRef_r,int numberScans, int numberSamples, int RESAMPLE_FACTOR, int nthreads, double* resDetect, double* resEnvelope, double* resEnvelopeRef) {
int calculateBankDetectAndHilbertTransformation(float* aScans_r, float* aScansRef_r,int numberScans, int numberSamples, int RESAMPLE_FACTOR, int nthreads, float* resDetect, float* resEnvelope, float* resEnvelopeRef) {
// resampling infos
int nresample_c; // for complex hermetian symmetry for upsample 2 -> stays the same (!)
int nresample_r; // for real from hermetian symmetry for upsample 2 -> stays the same (!)
double scale;
float scale;
bool even;
@@ -43,34 +43,34 @@ int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScans
int endIndex;
// interim results
static fftw_complex* resultCrossCor_c = NULL;
static fftw_complex* aScans_c_res = NULL;
static fftw_complex* aScansRef_c_res = NULL;
static fftw_complex* aScans_c = NULL;
static fftw_complex* aScansRef_c = NULL;
double* resultCrossCor_r;
double* aScans;
static fftwf_complex* resultCrossCor_c = NULL;
static fftwf_complex* aScans_c_res = NULL;
static fftwf_complex* aScansRef_c_res = NULL;
static fftwf_complex* aScans_c = NULL;
static fftwf_complex* aScansRef_c = NULL;
float* resultCrossCor_r;
float* aScans;
// fftw plans
static fftw_plan plan_fftAScans_rc = NULL;
static fftw_plan plan_ifftAScans_cr = NULL;
static fftw_plan plan_ifftAScans_cc = NULL;
// fftwf plans
static fftwf_plan plan_fftAScans_rc = NULL;
static fftwf_plan plan_ifftAScans_cr = NULL;
static fftwf_plan plan_ifftAScans_cc = NULL;
// fftw wisdom
char filenameFftwWisdom[200] = "";
// fftwf wisdom
char filenamefftwfWisdom[200] = "";
// precalculations
nresample_r = numberSamples * RESAMPLE_FACTOR;
nresample_c = numberSamples * RESAMPLE_FACTOR / 2;
scale = double((1.0 / nresample_r)* RESAMPLE_FACTOR);
scale = float((1.0 / nresample_r)* RESAMPLE_FACTOR);
even = (nresample_r / 2.0);
// load wisdom
sprintf(filenameFftwWisdom, "fftw_wisdom_detection_%d.wis", FFTW_WISDOM_TYPE);
sprintf(filenamefftwfWisdom, "fftwf_wisdom_detection_%d.wis", FFTW_WISDOM_TYPE);
int loadedWisdomUsed = 0;
if(fftw_import_wisdom_from_filename(filenameFftwWisdom) == 0) {
if(fftwf_import_wisdom_from_filename(filenamefftwfWisdom) == 0) {
// printf("wisdom not loaded.\n");
} else {
loadedWisdomUsed = 1;
@@ -78,33 +78,33 @@ int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScans
}
// mem alloc
resultCrossCor_c = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * nresample_r);
aScans_c_res = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * nresample_r);
aScansRef_c_res = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * nresample_r);
resultCrossCor_r = (double*)malloc(numberScans * nresample_r * sizeof(double));
aScans_c = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * numberSamples);
aScansRef_c = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * numberSamples);
aScans = (double*)malloc(numberScans * numberSamples * sizeof(double));
resultCrossCor_c = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * nresample_r);
aScans_c_res = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * nresample_r);
aScansRef_c_res = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * nresample_r);
resultCrossCor_r = (float*)malloc(numberScans * nresample_r * sizeof(float));
aScans_c = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * numberSamples);
aScansRef_c = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * numberSamples);
aScans = (float*)malloc(numberScans * numberSamples * sizeof(float));
/* fftw initializations */
/* fftwf initializations */
// thread ini
if ((nthreads > 0)) {
if (fftw_init_threads() == 0){
if (fftwf_init_threads() == 0){
printf("Data input are not the same size. Exiting.");
return 1;
}
fftw_plan_with_nthreads(nthreads);
fftwf_plan_with_nthreads(nthreads);
}
// plan creations
plan_fftAScans_rc = fftw_plan_many_dft_r2c(1, &numberSamples, numberScans, aScans, NULL, 1, numberSamples, aScans_c_res, &nresample_r, 1, nresample_r, FFTW_WISDOM_TYPE);
plan_ifftAScans_cr = fftw_plan_many_dft_c2r(1, &nresample_r, numberScans, resultCrossCor_c, NULL, 1, nresample_r, resultCrossCor_r, &nresample_r, 1, nresample_r, FFTW_WISDOM_TYPE);
plan_ifftAScans_cc = fftw_plan_many_dft(1, &numberSamples, numberScans, aScans_c_res, NULL, 1, nresample_r, aScans_c, &numberSamples, 1, numberSamples, 1, FFTW_WISDOM_TYPE);
plan_fftAScans_rc = fftwf_plan_many_dft_r2c(1, &numberSamples, numberScans, aScans, NULL, 1, numberSamples, aScans_c_res, &nresample_r, 1, nresample_r, FFTW_WISDOM_TYPE);
plan_ifftAScans_cr = fftwf_plan_many_dft_c2r(1, &nresample_r, numberScans, resultCrossCor_c, NULL, 1, nresample_r, resultCrossCor_r, &nresample_r, 1, nresample_r, FFTW_WISDOM_TYPE);
plan_ifftAScans_cc = fftwf_plan_many_dft(1, &numberSamples, numberScans, aScans_c_res, NULL, 1, nresample_r, aScans_c, &numberSamples, 1, numberSamples, 1, FFTW_WISDOM_TYPE);
// DFT of input signals
fftw_execute_dft_r2c(plan_fftAScans_rc, aScans_r, aScans_c_res);
fftw_execute_dft_r2c(plan_fftAScans_rc, aScansRef_r, aScansRef_c_res);
fftwf_execute_dft_r2c(plan_fftAScans_rc, aScans_r, aScans_c_res);
fftwf_execute_dft_r2c(plan_fftAScans_rc, aScansRef_r, aScansRef_c_res);
/* Calculus of fft(tab1)* conj(fft(tab2)) (first part) */
/* and calculations for hilbert transform */
@@ -154,9 +154,9 @@ int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScans
// Execute a IDFT plan
fftw_execute(plan_ifftAScans_cr);
fftw_execute_dft(plan_ifftAScans_cc,aScans_c_res,aScans_c);
fftw_execute_dft(plan_ifftAScans_cc,aScansRef_c_res,aScansRef_c);
fftwf_execute(plan_ifftAScans_cr);
fftwf_execute_dft(plan_ifftAScans_cc,aScans_c_res,aScans_c);
fftwf_execute_dft(plan_ifftAScans_cc,aScansRef_c_res,aScansRef_c);
// maximum detection
maximumDetection(resultCrossCor_r, numberScans, nresample_r, resDetect);
@@ -169,24 +169,24 @@ int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScans
}
// Store Wisdom
fftw_export_wisdom_to_filename(filenameFftwWisdom);
fftwf_export_wisdom_to_filename(filenamefftwfWisdom);
// clean
fftw_destroy_plan(plan_fftAScans_rc);
fftw_destroy_plan(plan_ifftAScans_cr);
fftw_destroy_plan(plan_ifftAScans_cc);
fftwf_destroy_plan(plan_fftAScans_rc);
fftwf_destroy_plan(plan_ifftAScans_cr);
fftwf_destroy_plan(plan_ifftAScans_cc);
fftw_free(resultCrossCor_c);
fftw_free(aScans_c_res);
fftw_free(aScansRef_c_res);
fftw_free(aScans_c);
fftw_free(aScansRef_c);
fftwf_free(resultCrossCor_c);
fftwf_free(aScans_c_res);
fftwf_free(aScansRef_c_res);
fftwf_free(aScans_c);
fftwf_free(aScansRef_c);
free(resultCrossCor_r);
free(aScans);
// fftw_cleanup_threads();
// fftw_cleanup();
fftw_forget_wisdom();
// fftwf_cleanup_threads();
// fftwf_cleanup();
fftwf_forget_wisdom();
return 0;
}

View File

@@ -1 +1 @@
extern int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScansRef_r,int numberScans, int numberSamples, int resampleFactor, int nthreads, double* resDetect, double* resEnvelope, double* resEnvelopeRef);
extern int calculateBankDetectAndHilbertTransformation(float* aScans_r, float* aScansRef_r,int numberScans, int numberSamples, int resampleFactor, int nthreads, float* resDetect, float* resEnvelope, float* resEnvelopeRef);

View File

@@ -9,9 +9,9 @@
* @param[out] outVector pointer to output array, calculated idx for each 0:n-1
*
**/
void maximumDetection(double* inArray, int n, int m, double* outVector) {
void maximumDetection(float* inArray, int n, int m, float* outVector) {
double maxVal;
float maxVal;
for (int j = 0; j < n; j++) {
outVector[j] = 0;
maxVal = inArray[j*m];

View File

@@ -1 +1 @@
extern void maximumDetection(double* inArray, int n, int m, double* outVector);
extern void maximumDetection(float* inArray, int n, int m, float* outVector);