dtof change 1

This commit is contained in:
kradchen
2023-10-09 09:50:54 +08:00
parent c4c22ff6cf
commit b527b108e1
7 changed files with 60 additions and 59 deletions

View File

@@ -58,7 +58,7 @@ tval3_options* getOptions(const TVALOptions& opt){
return optso; return optso;
} }
sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, double * mValues,size_t mM, size_t mN, int nz, bool pagelocked) { sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, float * mValues,size_t mM, size_t mN, int nz, bool pagelocked) {
size_t dim_y = mM; size_t dim_y = mM;
size_t dim_x = mN; size_t dim_x = mN;
@@ -66,7 +66,7 @@ sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, double * mValues,size_t
int *mi_columnIndex = yIdxs; int *mi_columnIndex = yIdxs;
int n_nonzero = nz; int n_nonzero = nz;
double *mi_data = mValues; float *mi_data = mValues;
sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero, sparse_mat_host *mo = new sparse_mat_host(dim_y, dim_x, n_nonzero,
sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined); sparse_mat_csc, false, pagelocked, cudaHostAllocWriteCombined);
@@ -76,13 +76,13 @@ sparse_mat_host *getSparseMatrix(int* xIdxs, int* yIdxs, double * mValues,size_t
for(int i=0; i < mo->nnz; i++) { for(int i=0; i < mo->nnz; i++) {
mo->ind()[i] = mi_dim_y[i]; mo->ind()[i] = mi_dim_y[i];
mo->val()[i] = (float)mi_data[i]; mo->val()[i] = mi_data[i];
} }
return mo; return mo;
} }
TVALResult TVALGPU(int *xIdxs, int *yIdxs, double *mValues, size_t mM, size_t mN, TVALResult TVALGPU(int *xIdxs, int *yIdxs, float *mValues, size_t mM, size_t mN,
int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt, int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt,
int device, bool pagelocked) { int device, bool pagelocked) {
int ip = dims[0]; int ip = dims[0];
@@ -108,12 +108,13 @@ TVALResult TVALGPU(int *xIdxs, int *yIdxs, double *mValues, size_t mM, size_t mN
if(plb_b != NULL) if(plb_b != NULL)
HANDLE_ERROR(cudaHostUnregister(plb_b)); HANDLE_ERROR(cudaHostUnregister(plb_b));
result.data = new double[mU.len]; result.data = new float[mU.len];
std::copy(mU.data(),mU.data()+mU.len,result.data); std::copy(mU.data(),mU.data()+mU.len,result.data);
result.dims[0] = mU.dim_x; result.dims[0] = mU.dim_x;
result.dims[1] = mU.dim_y; result.dims[1] = mU.dim_y;
result.dims[2] = mU.dim_z; result.dims[2] = mU.dim_z;
// if(info != NULL) *info = setInfo(ti_info); // if(info != NULL) *info = setInfo(ti_info);
delete mb; delete mb;

View File

@@ -3,7 +3,7 @@
#include "tvalstruct.h" #include "tvalstruct.h"
extern TVALResult TVALGPU(int *xIdxs, int *yIdxs, double *mValues, size_t mM, size_t mN, extern TVALResult TVALGPU(int *xIdxs, int *yIdxs, float *mValues, size_t mM, size_t mN,
int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt, int nz, float *bData, size_t *bDims, size_t *dims, const TVALOptions& opt,
int device, bool pagelocked) ; int device, bool pagelocked) ;
#endif // __TVAL3GPU3D_H__ #endif // __TVAL3GPU3D_H__

View File

@@ -19,7 +19,7 @@ struct TVALOptions{
}; };
struct TVALResult{ struct TVALResult{
double* data = nullptr; float* data = nullptr;
int dims[3]{0}; int dims[3]{0};
std::string errormsg; std::string errormsg;
}; };

View File

@@ -25,14 +25,14 @@
* @param[out] resEnvelopeRef, result of envelope of reference aScan * @param[out] resEnvelopeRef, result of envelope of reference aScan
* *
**/ **/
int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScansRef_r,int numberScans, int numberSamples, int RESAMPLE_FACTOR, int nthreads, double* resDetect, double* resEnvelope, double* resEnvelopeRef) { int calculateBankDetectAndHilbertTransformation(float* aScans_r, float* aScansRef_r,int numberScans, int numberSamples, int RESAMPLE_FACTOR, int nthreads, float* resDetect, float* resEnvelope, float* resEnvelopeRef) {
// resampling infos // resampling infos
int nresample_c; // for complex hermetian symmetry for upsample 2 -> stays the same (!) int nresample_c; // for complex hermetian symmetry for upsample 2 -> stays the same (!)
int nresample_r; // for real from hermetian symmetry for upsample 2 -> stays the same (!) int nresample_r; // for real from hermetian symmetry for upsample 2 -> stays the same (!)
double scale; float scale;
bool even; bool even;
@@ -43,34 +43,34 @@ int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScans
int endIndex; int endIndex;
// interim results // interim results
static fftw_complex* resultCrossCor_c = NULL; static fftwf_complex* resultCrossCor_c = NULL;
static fftw_complex* aScans_c_res = NULL; static fftwf_complex* aScans_c_res = NULL;
static fftw_complex* aScansRef_c_res = NULL; static fftwf_complex* aScansRef_c_res = NULL;
static fftw_complex* aScans_c = NULL; static fftwf_complex* aScans_c = NULL;
static fftw_complex* aScansRef_c = NULL; static fftwf_complex* aScansRef_c = NULL;
double* resultCrossCor_r; float* resultCrossCor_r;
double* aScans; float* aScans;
// fftw plans // fftwf plans
static fftw_plan plan_fftAScans_rc = NULL; static fftwf_plan plan_fftAScans_rc = NULL;
static fftw_plan plan_ifftAScans_cr = NULL; static fftwf_plan plan_ifftAScans_cr = NULL;
static fftw_plan plan_ifftAScans_cc = NULL; static fftwf_plan plan_ifftAScans_cc = NULL;
// fftw wisdom // fftwf wisdom
char filenameFftwWisdom[200] = ""; char filenamefftwfWisdom[200] = "";
// precalculations // precalculations
nresample_r = numberSamples * RESAMPLE_FACTOR; nresample_r = numberSamples * RESAMPLE_FACTOR;
nresample_c = numberSamples * RESAMPLE_FACTOR / 2; nresample_c = numberSamples * RESAMPLE_FACTOR / 2;
scale = double((1.0 / nresample_r)* RESAMPLE_FACTOR); scale = float((1.0 / nresample_r)* RESAMPLE_FACTOR);
even = (nresample_r / 2.0); even = (nresample_r / 2.0);
// load wisdom // load wisdom
sprintf(filenameFftwWisdom, "fftw_wisdom_detection_%d.wis", FFTW_WISDOM_TYPE); sprintf(filenamefftwfWisdom, "fftwf_wisdom_detection_%d.wis", FFTW_WISDOM_TYPE);
int loadedWisdomUsed = 0; int loadedWisdomUsed = 0;
if(fftw_import_wisdom_from_filename(filenameFftwWisdom) == 0) { if(fftwf_import_wisdom_from_filename(filenamefftwfWisdom) == 0) {
// printf("wisdom not loaded.\n"); // printf("wisdom not loaded.\n");
} else { } else {
loadedWisdomUsed = 1; loadedWisdomUsed = 1;
@@ -78,33 +78,33 @@ int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScans
} }
// mem alloc // mem alloc
resultCrossCor_c = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * nresample_r); resultCrossCor_c = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * nresample_r);
aScans_c_res = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * nresample_r); aScans_c_res = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * nresample_r);
aScansRef_c_res = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * nresample_r); aScansRef_c_res = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * nresample_r);
resultCrossCor_r = (double*)malloc(numberScans * nresample_r * sizeof(double)); resultCrossCor_r = (float*)malloc(numberScans * nresample_r * sizeof(float));
aScans_c = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * numberSamples); aScans_c = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * numberSamples);
aScansRef_c = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numberScans * numberSamples); aScansRef_c = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex) * numberScans * numberSamples);
aScans = (double*)malloc(numberScans * numberSamples * sizeof(double)); aScans = (float*)malloc(numberScans * numberSamples * sizeof(float));
/* fftw initializations */ /* fftwf initializations */
// thread ini // thread ini
if ((nthreads > 0)) { if ((nthreads > 0)) {
if (fftw_init_threads() == 0){ if (fftwf_init_threads() == 0){
printf("Data input are not the same size. Exiting."); printf("Data input are not the same size. Exiting.");
return 1; return 1;
} }
fftw_plan_with_nthreads(nthreads); fftwf_plan_with_nthreads(nthreads);
} }
// plan creations // plan creations
plan_fftAScans_rc = fftw_plan_many_dft_r2c(1, &numberSamples, numberScans, aScans, NULL, 1, numberSamples, aScans_c_res, &nresample_r, 1, nresample_r, FFTW_WISDOM_TYPE); plan_fftAScans_rc = fftwf_plan_many_dft_r2c(1, &numberSamples, numberScans, aScans, NULL, 1, numberSamples, aScans_c_res, &nresample_r, 1, nresample_r, FFTW_WISDOM_TYPE);
plan_ifftAScans_cr = fftw_plan_many_dft_c2r(1, &nresample_r, numberScans, resultCrossCor_c, NULL, 1, nresample_r, resultCrossCor_r, &nresample_r, 1, nresample_r, FFTW_WISDOM_TYPE); plan_ifftAScans_cr = fftwf_plan_many_dft_c2r(1, &nresample_r, numberScans, resultCrossCor_c, NULL, 1, nresample_r, resultCrossCor_r, &nresample_r, 1, nresample_r, FFTW_WISDOM_TYPE);
plan_ifftAScans_cc = fftw_plan_many_dft(1, &numberSamples, numberScans, aScans_c_res, NULL, 1, nresample_r, aScans_c, &numberSamples, 1, numberSamples, 1, FFTW_WISDOM_TYPE); plan_ifftAScans_cc = fftwf_plan_many_dft(1, &numberSamples, numberScans, aScans_c_res, NULL, 1, nresample_r, aScans_c, &numberSamples, 1, numberSamples, 1, FFTW_WISDOM_TYPE);
// DFT of input signals // DFT of input signals
fftw_execute_dft_r2c(plan_fftAScans_rc, aScans_r, aScans_c_res); fftwf_execute_dft_r2c(plan_fftAScans_rc, aScans_r, aScans_c_res);
fftw_execute_dft_r2c(plan_fftAScans_rc, aScansRef_r, aScansRef_c_res); fftwf_execute_dft_r2c(plan_fftAScans_rc, aScansRef_r, aScansRef_c_res);
/* Calculus of fft(tab1)* conj(fft(tab2)) (first part) */ /* Calculus of fft(tab1)* conj(fft(tab2)) (first part) */
/* and calculations for hilbert transform */ /* and calculations for hilbert transform */
@@ -154,9 +154,9 @@ int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScans
// Execute a IDFT plan // Execute a IDFT plan
fftw_execute(plan_ifftAScans_cr); fftwf_execute(plan_ifftAScans_cr);
fftw_execute_dft(plan_ifftAScans_cc,aScans_c_res,aScans_c); fftwf_execute_dft(plan_ifftAScans_cc,aScans_c_res,aScans_c);
fftw_execute_dft(plan_ifftAScans_cc,aScansRef_c_res,aScansRef_c); fftwf_execute_dft(plan_ifftAScans_cc,aScansRef_c_res,aScansRef_c);
// maximum detection // maximum detection
maximumDetection(resultCrossCor_r, numberScans, nresample_r, resDetect); maximumDetection(resultCrossCor_r, numberScans, nresample_r, resDetect);
@@ -169,24 +169,24 @@ int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScans
} }
// Store Wisdom // Store Wisdom
fftw_export_wisdom_to_filename(filenameFftwWisdom); fftwf_export_wisdom_to_filename(filenamefftwfWisdom);
// clean // clean
fftw_destroy_plan(plan_fftAScans_rc); fftwf_destroy_plan(plan_fftAScans_rc);
fftw_destroy_plan(plan_ifftAScans_cr); fftwf_destroy_plan(plan_ifftAScans_cr);
fftw_destroy_plan(plan_ifftAScans_cc); fftwf_destroy_plan(plan_ifftAScans_cc);
fftw_free(resultCrossCor_c); fftwf_free(resultCrossCor_c);
fftw_free(aScans_c_res); fftwf_free(aScans_c_res);
fftw_free(aScansRef_c_res); fftwf_free(aScansRef_c_res);
fftw_free(aScans_c); fftwf_free(aScans_c);
fftw_free(aScansRef_c); fftwf_free(aScansRef_c);
free(resultCrossCor_r); free(resultCrossCor_r);
free(aScans); free(aScans);
// fftw_cleanup_threads(); // fftwf_cleanup_threads();
// fftw_cleanup(); // fftwf_cleanup();
fftw_forget_wisdom(); fftwf_forget_wisdom();
return 0; return 0;
} }

View File

@@ -1 +1 @@
extern int calculateBankDetectAndHilbertTransformation(double* aScans_r, double* aScansRef_r,int numberScans, int numberSamples, int resampleFactor, int nthreads, double* resDetect, double* resEnvelope, double* resEnvelopeRef); extern int calculateBankDetectAndHilbertTransformation(float* aScans_r, float* aScansRef_r,int numberScans, int numberSamples, int resampleFactor, int nthreads, float* resDetect, float* resEnvelope, float* resEnvelopeRef);

View File

@@ -9,9 +9,9 @@
* @param[out] outVector pointer to output array, calculated idx for each 0:n-1 * @param[out] outVector pointer to output array, calculated idx for each 0:n-1
* *
**/ **/
void maximumDetection(double* inArray, int n, int m, double* outVector) { void maximumDetection(float* inArray, int n, int m, float* outVector) {
double maxVal; float maxVal;
for (int j = 0; j < n; j++) { for (int j = 0; j < n; j++) {
outVector[j] = 0; outVector[j] = 0;
maxVal = inArray[j*m]; maxVal = inArray[j*m];

View File

@@ -1 +1 @@
extern void maximumDetection(double* inArray, int n, int m, double* outVector); extern void maximumDetection(float* inArray, int n, int m, float* outVector);