// The code was adapted from Eigen/src/Eigenvaleus/RealSchur.h // // Copyright (C) 2008 Gael Guennebaud // Copyright (C) 2010,2012 Jitse Niesen // Copyright (C) 2021-2022 Yixuan Qiu // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at https://mozilla.org/MPL/2.0/. #ifndef SPECTRA_UPPER_HESSENBERG_SCHUR_H #define SPECTRA_UPPER_HESSENBERG_SCHUR_H #include #include #include #include #include "../Util/TypeTraits.h" namespace Spectra { template class UpperHessenbergSchur { private: using Index = Eigen::Index; using Matrix = Eigen::Matrix; using Vector = Eigen::Matrix; using Vector2s = Eigen::Matrix; using Vector3s = Eigen::Matrix; using GenericMatrix = Eigen::Ref; using ConstGenericMatrix = const Eigen::Ref; Index m_n; // Size of the matrix Matrix m_T; // T matrix, A = UTU' Matrix m_U; // U matrix, A = UTU' bool m_computed; // L1 norm of an upper Hessenberg matrix static Scalar upper_hessenberg_l1_norm(ConstGenericMatrix& x) { const Index n = x.cols(); Scalar norm(0); for (Index j = 0; j < n; j++) norm += x.col(j).segment(0, (std::min)(n, j + 2)).cwiseAbs().sum(); return norm; } // Look for single small sub-diagonal element and returns its index Index find_small_subdiag(Index iu, const Scalar& near_0) const { using std::abs; const Scalar eps = Eigen::NumTraits::epsilon(); Index res = iu; while (res > 0) { Scalar s = abs(m_T.coeff(res - 1, res - 1)) + abs(m_T.coeff(res, res)); s = Eigen::numext::maxi(s * eps, near_0); if (abs(m_T.coeff(res, res - 1)) <= s) break; res--; } return res; } // Update T given that rows iu-1 and iu decouple from the rest void split_off_two_rows(Index iu, const Scalar& ex_shift) { using std::sqrt; using std::abs; // The eigenvalues of the 2x2 matrix [a b; c d] are // trace +/- sqrt(discr/4) where discr = tr^2 - 4*det, tr = a + d, det = ad - bc Scalar p = Scalar(0.5) * (m_T.coeff(iu - 1, iu - 1) - m_T.coeff(iu, iu)); Scalar q = p * p + m_T.coeff(iu, iu - 1) * m_T.coeff(iu - 1, iu); // q = tr^2 / 4 - det = discr/4 m_T.coeffRef(iu, iu) += ex_shift; m_T.coeffRef(iu - 1, iu - 1) += ex_shift; if (q >= Scalar(0)) // Two real eigenvalues { Scalar z = sqrt(abs(q)); Eigen::JacobiRotation rot; rot.makeGivens((p >= Scalar(0)) ? (p + z) : (p - z), m_T.coeff(iu, iu - 1)); m_T.rightCols(m_n - iu + 1).applyOnTheLeft(iu - 1, iu, rot.adjoint()); m_T.topRows(iu + 1).applyOnTheRight(iu - 1, iu, rot); m_T.coeffRef(iu, iu - 1) = Scalar(0); m_U.applyOnTheRight(iu - 1, iu, rot); } if (iu > 1) m_T.coeffRef(iu - 1, iu - 2) = Scalar(0); } // Form shift in shift_info, and update ex_shift if an exceptional shift is performed void compute_shift(Index iu, Index iter, Scalar& ex_shift, Vector3s& shift_info) { using std::sqrt; using std::abs; shift_info.coeffRef(0) = m_T.coeff(iu, iu); shift_info.coeffRef(1) = m_T.coeff(iu - 1, iu - 1); shift_info.coeffRef(2) = m_T.coeff(iu, iu - 1) * m_T.coeff(iu - 1, iu); // Wilkinson's original ad hoc shift if (iter == 10) { ex_shift += shift_info.coeff(0); for (Index i = 0; i <= iu; ++i) m_T.coeffRef(i, i) -= shift_info.coeff(0); Scalar s = abs(m_T.coeff(iu, iu - 1)) + abs(m_T.coeff(iu - 1, iu - 2)); shift_info.coeffRef(0) = Scalar(0.75) * s; shift_info.coeffRef(1) = Scalar(0.75) * s; shift_info.coeffRef(2) = Scalar(-0.4375) * s * s; } // MATLAB's new ad hoc shift if (iter == 30) { Scalar s = (shift_info.coeff(1) - shift_info.coeff(0)) / Scalar(2); s = s * s + shift_info.coeff(2); if (s > Scalar(0)) { s = sqrt(s); if (shift_info.coeff(1) < shift_info.coeff(0)) s = -s; s = s + (shift_info.coeff(1) - shift_info.coeff(0)) / Scalar(2); s = shift_info.coeff(0) - shift_info.coeff(2) / s; ex_shift += s; for (Index i = 0; i <= iu; ++i) m_T.coeffRef(i, i) -= s; shift_info.setConstant(Scalar(0.964)); } } } // Compute index im at which Francis QR step starts and the first Householder vector void init_francis_qr_step(Index il, Index iu, const Vector3s& shift_info, Index& im, Vector3s& first_householder_vec) const { using std::abs; const Scalar eps = Eigen::NumTraits::epsilon(); Vector3s& v = first_householder_vec; // alias to save typing for (im = iu - 2; im >= il; --im) { const Scalar Tmm = m_T.coeff(im, im); const Scalar r = shift_info.coeff(0) - Tmm; const Scalar s = shift_info.coeff(1) - Tmm; v.coeffRef(0) = (r * s - shift_info.coeff(2)) / m_T.coeff(im + 1, im) + m_T.coeff(im, im + 1); v.coeffRef(1) = m_T.coeff(im + 1, im + 1) - Tmm - r - s; v.coeffRef(2) = m_T.coeff(im + 2, im + 1); if (im == il) break; const Scalar lhs = m_T.coeff(im, im - 1) * (abs(v.coeff(1)) + abs(v.coeff(2))); const Scalar rhs = v.coeff(0) * (abs(m_T.coeff(im - 1, im - 1)) + abs(Tmm) + abs(m_T.coeff(im + 1, im + 1))); if (abs(lhs) < eps * rhs) break; } } // P = I - tau * v * v' = P' // PX = X - tau * v * (v'X), X [3 x c] static void apply_householder_left(const Vector2s& ess, const Scalar& tau, Scalar* x, Index ncol, Index stride) { const Scalar v1 = ess.coeff(0), v2 = ess.coeff(1); const Scalar* const x_end = x + ncol * stride; for (; x < x_end; x += stride) { const Scalar tvx = tau * (x[0] + v1 * x[1] + v2 * x[2]); x[0] -= tvx; x[1] -= tvx * v1; x[2] -= tvx * v2; } } // P = I - tau * v * v' = P' // XP = X - tau * (X * v) * v', X [r x 3] static void apply_householder_right(const Vector2s& ess, const Scalar& tau, Scalar* x, Index nrow, Index stride) { const Scalar v1 = ess.coeff(0), v2 = ess.coeff(1); Scalar* x0 = x; Scalar* x1 = x + stride; Scalar* x2 = x1 + stride; for (Index i = 0; i < nrow; i++) { const Scalar txv = tau * (x0[i] + v1 * x1[i] + v2 * x2[i]); x0[i] -= txv; x1[i] -= txv * v1; x2[i] -= txv * v2; } } // SIMD version of apply_householder_right() // Inspired by apply_rotation_in_the_plane_selector() in Eigen/src/Jacobi/Jacobi.h static void apply_householder_right_simd(const Vector2s& ess, const Scalar& tau, Scalar* x, Index nrow, Index stride) { // Packet type using Eigen::internal::ploadu; using Eigen::internal::pstoreu; using Eigen::internal::pset1; using Eigen::internal::padd; using Eigen::internal::psub; using Eigen::internal::pmul; using Packet = typename Eigen::internal::packet_traits::type; constexpr unsigned char PacketSize = Eigen::internal::packet_traits::size; constexpr unsigned char Peeling = 2; constexpr unsigned char Increment = Peeling * PacketSize; // Column heads Scalar* x0 = x; Scalar* x1 = x + stride; Scalar* x2 = x1 + stride; // Pointers for the current row Scalar* px0 = x0; Scalar* px1 = x1; Scalar* px2 = x2; // Householder reflectors const Scalar v1 = ess.coeff(0), v2 = ess.coeff(1); // Vectorized versions const Packet vtau = pset1(tau); const Packet vv1 = pset1(v1); const Packet vv2 = pset1(v2); // n % (2^k) == n & (2^k-1), see https://stackoverflow.com/q/3072665 // const Index peeling_end = nrow - nrow % Increment; const Index aligned_end = nrow - (nrow & (PacketSize - 1)); const Index peeling_end = nrow - (nrow & (Increment - 1)); for (Index i = 0; i < peeling_end; i += Increment) { Packet vx01 = ploadu(px0); Packet vx02 = ploadu(px0 + PacketSize); Packet vx11 = ploadu(px1); Packet vx12 = ploadu(px1 + PacketSize); Packet vx21 = ploadu(px2); Packet vx22 = ploadu(px2 + PacketSize); // Packet txv1 = vtau * (vx01 + vv1 * vx11 + vv2 * vx21); Packet txv1 = pmul(vtau, padd(padd(vx01, pmul(vv1, vx11)), pmul(vv2, vx21))); Packet txv2 = pmul(vtau, padd(padd(vx02, pmul(vv1, vx12)), pmul(vv2, vx22))); pstoreu(px0, psub(vx01, txv1)); pstoreu(px0 + PacketSize, psub(vx02, txv2)); pstoreu(px1, psub(vx11, pmul(txv1, vv1))); pstoreu(px1 + PacketSize, psub(vx12, pmul(txv2, vv1))); pstoreu(px2, psub(vx21, pmul(txv1, vv2))); pstoreu(px2 + PacketSize, psub(vx22, pmul(txv2, vv2))); px0 += Increment; px1 += Increment; px2 += Increment; } if (aligned_end != peeling_end) { px0 = x0 + peeling_end; px1 = x1 + peeling_end; px2 = x2 + peeling_end; Packet x0_p = ploadu(px0); Packet x1_p = ploadu(px1); Packet x2_p = ploadu(px2); Packet txv = pmul(vtau, padd(padd(x0_p, pmul(vv1, x1_p)), pmul(vv2, x2_p))); pstoreu(px0, psub(x0_p, txv)); pstoreu(px1, psub(x1_p, pmul(txv, vv1))); pstoreu(px2, psub(x2_p, pmul(txv, vv2))); } // Remaining rows for (Index i = aligned_end; i < nrow; i++) { const Scalar txv = tau * (x0[i] + v1 * x1[i] + v2 * x2[i]); x0[i] -= txv; x1[i] -= txv * v1; x2[i] -= txv * v2; } } // Perform a Francis QR step involving rows il:iu and columns im:iu void perform_francis_qr_step(Index il, Index im, Index iu, const Vector3s& first_householder_vec, const Scalar& near_0) { using std::abs; for (Index k = im; k <= iu - 2; ++k) { const bool first_iter = (k == im); Vector3s v; if (first_iter) v = first_householder_vec; else v = m_T.template block<3, 1>(k, k - 1); Scalar tau, beta; Vector2s ess; v.makeHouseholder(ess, tau, beta); if (abs(beta) > near_0) // if v is not zero { if (first_iter && k > il) m_T.coeffRef(k, k - 1) = -m_T.coeff(k, k - 1); else if (!first_iter) m_T.coeffRef(k, k - 1) = beta; // These Householder transformations form the O(n^3) part of the algorithm // m_T.block(k, k, 3, m_n - k).applyHouseholderOnTheLeft(ess, tau, workspace); // m_T.block(0, k, (std::min)(iu, k + 3) + 1, 3).applyHouseholderOnTheRight(ess, tau, workspace); // m_U.block(0, k, m_n, 3).applyHouseholderOnTheRight(ess, tau, workspace); apply_householder_left(ess, tau, &m_T.coeffRef(k, k), m_n - k, m_n); apply_householder_right_simd(ess, tau, &m_T.coeffRef(0, k), (std::min)(iu, k + 3) + 1, m_n); apply_householder_right_simd(ess, tau, &m_U.coeffRef(0, k), m_n, m_n); } } // The last 2-row block Eigen::JacobiRotation rot; Scalar beta; rot.makeGivens(m_T.coeff(iu - 1, iu - 2), m_T.coeff(iu, iu - 2), &beta); if (abs(beta) > near_0) // if v is not zero { m_T.coeffRef(iu - 1, iu - 2) = beta; m_T.rightCols(m_n - iu + 1).applyOnTheLeft(iu - 1, iu, rot.adjoint()); m_T.topRows(iu + 1).applyOnTheRight(iu - 1, iu, rot); m_U.applyOnTheRight(iu - 1, iu, rot); } // clean up pollution due to round-off errors for (Index i = im + 2; i <= iu; ++i) { m_T.coeffRef(i, i - 2) = Scalar(0); if (i > im + 2) m_T.coeffRef(i, i - 3) = Scalar(0); } } public: UpperHessenbergSchur() : m_n(0), m_computed(false) {} UpperHessenbergSchur(ConstGenericMatrix& mat) : m_n(mat.rows()), m_computed(false) { compute(mat); } void compute(ConstGenericMatrix& mat) { using std::abs; using std::sqrt; if (mat.rows() != mat.cols()) throw std::invalid_argument("UpperHessenbergSchur: matrix must be square"); m_n = mat.rows(); m_T.resize(m_n, m_n); m_U.resize(m_n, m_n); constexpr Index max_iter_per_row = 40; const Index max_iter = m_n * max_iter_per_row; m_T.noalias() = mat; m_U.setIdentity(); // The matrix m_T is divided in three parts. // Rows 0,...,il-1 are decoupled from the rest because m_T(il,il-1) is zero. // Rows il,...,iu is the part we are working on (the active window). // Rows iu+1,...,end are already brought in triangular form. Index iu = m_n - 1; Index iter = 0; // iteration count for current eigenvalue Index total_iter = 0; // iteration count for whole matrix Scalar ex_shift(0); // sum of exceptional shifts const Scalar norm = upper_hessenberg_l1_norm(m_T); // sub-diagonal entries smaller than near_0 will be treated as zero. // We use eps^2 to enable more precision in small eigenvalues. const Scalar eps = Eigen::NumTraits::epsilon(); const Scalar near_0 = Eigen::numext::maxi(norm * eps * eps, TypeTraits::min()); if (norm != Scalar(0)) { while (iu >= 0) { Index il = find_small_subdiag(iu, near_0); // Check for convergence if (il == iu) // One root found { m_T.coeffRef(iu, iu) += ex_shift; if (iu > 0) m_T.coeffRef(iu, iu - 1) = Scalar(0); iu--; iter = 0; } else if (il == iu - 1) // Two roots found { split_off_two_rows(iu, ex_shift); iu -= 2; iter = 0; } else // No convergence yet { Vector3s first_householder_vec = Vector3s::Zero(), shift_info; compute_shift(iu, iter, ex_shift, shift_info); iter++; total_iter++; if (total_iter > max_iter) break; Index im; init_francis_qr_step(il, iu, shift_info, im, first_householder_vec); perform_francis_qr_step(il, im, iu, first_householder_vec, near_0); } } } if (total_iter > max_iter) throw std::runtime_error("UpperHessenbergSchur: Schur decomposition failed"); m_computed = true; } const Matrix& matrix_T() const { if (!m_computed) throw std::logic_error("UpperHessenbergSchur: need to call compute() first"); return m_T; } const Matrix& matrix_U() const { if (!m_computed) throw std::logic_error("UpperHessenbergSchur: need to call compute() first"); return m_U; } void swap_T(Matrix& other) { m_T.swap(other); } void swap_U(Matrix& other) { m_U.swap(other); } }; } // namespace Spectra #endif // SPECTRA_UPPER_HESSENBERG_SCHUR_H