Improve sensitivityCalc performance.

This commit is contained in:
kradchen
2023-05-19 09:52:27 +08:00
parent 7088602b59
commit 40c882e3a9

View File

@@ -131,16 +131,18 @@ Aurora::Matrix precalcSensitivityForTAS(const Aurora::Matrix &aMSensChar,
const Aurora::Matrix &aMStartPositions,
const Aurora::Matrix &aMStartNormals,
const Aurora::Matrix &aMEndPositions) {
auto sens = Aurora::zeros(aMEndPositions.getDimSize(1),
aMStartPositions.getDimSize(1));
for (size_t i = 0; i < aMStartPositions.getDimSize(1); ++i) {
auto dirVector = aMEndPositions -
Aurora::repmat(aMStartPositions(Aurora::$, i).toMatrix(),
1, aMEndPositions.getDimSize(1));
sens(Aurora::$, i) = getSensitivity(
aMSensChar, aMStartNormals(Aurora::$, i).toMatrix(), dirVector);
}
return sens;
auto sens = Aurora::zeros(aMEndPositions.getDimSize(1),
aMStartPositions.getDimSize(1));
//有修改多线程化可提升10倍速
#pragma omp parallel for
for (size_t i = 0; i < aMStartPositions.getDimSize(1); ++i) {
auto dirVector = aMEndPositions -
Aurora::repmat(aMStartPositions(Aurora::$, i).toMatrix(),
1, aMEndPositions.getDimSize(1));
sens(Aurora::$, i) = getSensitivity(
aMSensChar, aMStartNormals(Aurora::$, i).toMatrix(), dirVector);
}
return sens;
}
std::vector<Aurora::Matrix>
@@ -150,35 +152,35 @@ combineSensitivity(const Aurora::Matrix &aVSenderTASRange,
const Aurora::Matrix &aVReceiverElementRange,
const Aurora::Matrix &aMSenderSens,
const Aurora::Matrix &aMReceiverSens) {
double maxReceiverElementRange =
Aurora::max(aVReceiverElementRange).getScalar();
double maxReceiverTASRange = Aurora::max(aVReceiverTASRange).getScalar();
double maxSenderElementRange = Aurora::max(aVSenderElementRange).getScalar();
double maxSenderTASRange = Aurora::max(aVSenderTASRange).getScalar();
std::vector<Aurora::Matrix> ret;
for (size_t i = 0; i < maxSenderTASRange; ++i) {
ret.emplace_back(Aurora::zeros(maxReceiverElementRange, maxReceiverTASRange,
maxSenderElementRange));
}
size_t countSE = 0;
for (size_t i = 0; i < aVSenderTASRange.getDataSize(); i++) {
auto se = aVReceiverTASRange.getData()[i];
for (size_t j = 0; j < aVSenderElementRange.getDataSize(); j++) {
auto sn = aVSenderElementRange.getData()[j];
size_t countRE = 0;
for (size_t k = 0; k < aVReceiverTASRange.getDataSize(); k++) {
auto re = aVReceiverTASRange.getData()[k];
for (size_t n = 0; n < aVReceiverElementRange.getDataSize(); n++) {
auto rn = aVReceiverElementRange.getData()[n];
ret[se - 1](rn - 1, re - 1, sn - 1) =
aMSenderSens(countRE, countSE).toMatrix().getScalar() *
aMReceiverSens(countSE, countRE).toMatrix().getScalar();
countRE++;
}
}
countSE++;
double maxReceiverElementRange =
Aurora::max(aVReceiverElementRange).getScalar();
double maxReceiverTASRange = Aurora::max(aVReceiverTASRange).getScalar();
double maxSenderElementRange = Aurora::max(aVSenderElementRange).getScalar();
double maxSenderTASRange = Aurora::max(aVSenderTASRange).getScalar();
std::vector<Aurora::Matrix> ret;
for (size_t i = 0; i < maxSenderTASRange; ++i) {
ret.emplace_back(Aurora::zeros(maxReceiverElementRange, maxReceiverTASRange,
maxSenderElementRange));
}
//有修改多线程化可提升10倍速
#pragma omp parallel for
for (size_t i = 0; i < aVSenderTASRange.getDataSize(); i++) {
auto se = aVReceiverTASRange.getData()[i];
for (size_t j = 0; j < aVSenderElementRange.getDataSize(); j++) {
auto sn = aVSenderElementRange.getData()[j];
size_t countSE = i * aVSenderElementRange.getDataSize() + j;
for (size_t k = 0; k < aVReceiverTASRange.getDataSize(); k++) {
auto re = aVReceiverTASRange.getData()[k];
for (size_t n = 0; n < aVReceiverElementRange.getDataSize(); n++) {
size_t countRE = k*aVReceiverElementRange.getDataSize()+n;
auto rn = aVReceiverElementRange.getData()[n];
ret[se - 1](rn - 1, re - 1, sn - 1) =
aMSenderSens(countRE, countSE).toMatrix().getScalar() *
aMReceiverSens(countSE, countRE).toMatrix().getScalar();
}
}
}
}
}
return ret;
}