LCOV - Palace Coverage Report - models/strattonchu.cpp

LCOV - code coverage report

Current view:	top level - models - strattonchu.cpp (source / functions)		Coverage	Total	Hit
Test:	Palace Coverage Report	Lines:	100.0 %	33	33
Test Date:	2025-10-23 22:45:05	Functions:	100.0 %	1	1
Legend:	Lines: hit not hit

            Line data    Source code

       1              : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
       2              : // SPDX-License-Identifier: Apache-2.0
       3              : 
       4              : #include "strattonchu.hpp"
       5              : 
       6              : #include "fem/coefficient.hpp"
       7              : #include "utils/omp.hpp"
       8              : 
       9              : namespace palace
      10              : {
      11              : 
      12              : // Computes contribution to Stratton-Chu far-field integrands for multiple observation
      13              : // directions for the given element T.
      14              : //
      15              : // The Stratton-Chu transformations to compute far-field electric field rE_∞(r̂)
      16              : // at multiple observation directions r₀ (specified by a vector of (θ, ϕ)s) are:
      17              : //
      18              : // r E_∞(r₀) = (ik/4π) r₀ × ∫_S [n̂ × E - Z r₀ × (n̂ × H)] e^{ikr₀·r'} dS  ≈ r₀ × Σ (Iᵣ +
      19              : // iIᵢ).
      20              : //
      21              : // where:
      22              : //   - S is the boundary surface with outward normal n̂.
      23              : //   - E, H are the electric and magnetic fields on the surface.
      24              : //   - k = ω/c is the wavenumber.
      25              : //   - Z is the impedance.
      26              : //   - r₀ = (sin θ cos φ, sin θ sin φ, cos θ) are observation directions.
      27              : //   - r' are source points on the surface.
      28              : //
      29              : // This function computes Iᵣ and Iᵢ for all the input observation points.
      30              : //
      31              : // This equation was obtained starting from Stratton-Chu's transformations, assuming an
      32              : // analytic expression for Green's function (G(r, r0) = exp(-ik|r - r₀|) / (4π|r - r₀|)),
      33              : // and expanding for r that goes to infinity.
      34              : //
      35              : // Note:
      36              : // - This equation is only valid in three dimensional space.
      37              : // - This equation is only valid when all the materials have scalar μ and ε.
      38              : // - The implementation assumes S is an external boundary.
      39              : //
      40              : // Note also:
      41              : //
      42              : // This function uses std::vector<std::array<double, 3>> instead of Vectors. The
      43              : // reason for this is so that we can ensure that memory layout is simple (and
      44              : // contiguous), ensuring that we can perform one single MPI reduction for all
      45              : // the points in integrand_*.
      46         9600 : void AddStrattonChuIntegrandAtElement(const GridFunction &E, const GridFunction &B,
      47              :                                       const MaterialOperator &mat_op, double omega_re,
      48              :                                       double omega_im,
      49              :                                       std::vector<std::array<double, 3>> &r_naughts,
      50              :                                       mfem::ElementTransformation &T,
      51              :                                       const mfem::IntegrationRule &ir,
      52              :                                       std::vector<std::array<double, 3>> &integrand_r,
      53              :                                       std::vector<std::array<double, 3>> &integrand_i)
      54              : {
      55              :   MFEM_ASSERT(E.VectorDim() == 3, "Stratton-Chu requires 3D vector fields!");
      56              :   MFEM_ASSERT(B.VectorDim() == 3, "Stratton-Chu requires 3D vector fields!");
      57              : 
      58         9600 :   MFEM_VERIFY(integrand_r.size() == r_naughts.size() &&
      59              :                   integrand_i.size() == r_naughts.size(),
      60              :               "Mismatch between input points and result vector.")
      61              : 
      62              :   MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
      63              :               "Unexpected element type in BdrSurfaceFluxCoefficient!");
      64              : 
      65         9600 :   StaticVector<3> r_phys;
      66         9600 :   StaticVector<3> E_real, E_imag, B_real, B_imag;
      67         9600 :   StaticVector<3> ZH_real, ZH_imag;
      68         9600 :   StaticVector<3> normal;
      69         9600 :   StaticVector<3> n_cross_Er, n_cross_ZHr;
      70         9600 :   StaticVector<3> n_cross_Ei, n_cross_ZHi;
      71              : 
      72              :   const mfem::ParMesh *mesh = E.Real().ParFESpace()->GetParMesh();
      73         9600 :   mfem::FaceElementTransformations FET;
      74         9600 :   mfem::IsoparametricTransformation T1, T2;
      75              : 
      76        38400 :   for (int j = 0; j < ir.GetNPoints(); j++)
      77              :   {
      78              :     const mfem::IntegrationPoint &ip = ir.IntPoint(j);
      79              :     T.SetIntPoint(&ip);
      80              : 
      81              :     MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT,
      82              :                 "Unexpected element type in BdrSurfaceFluxCoefficient!");
      83              : 
      84        28800 :     bool invert = BdrGridFunctionCoefficient::GetBdrElementNeighborTransformations(
      85              :         T.ElementNo, *mesh, FET, T1, T2, &ip);  // NOTE: this updates FET.
      86        28800 :     MFEM_VERIFY(!FET.Elem2,
      87              :                 "FarField computations are only supported on external boundaries.")
      88              : 
      89        28800 :     T.Transform(ip, r_phys);
      90              : 
      91              :     // Evaluate E and B fields on this element.
      92        28800 :     E.Real().GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), E_real);
      93        28800 :     E.Imag().GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), E_imag);
      94        28800 :     B.Real().GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), B_real);
      95        28800 :     B.Imag().GetVectorValue(*FET.Elem1, FET.Elem1->GetIntPoint(), B_imag);
      96              : 
      97              :     // We assume that the material is isotropic, so the wave speed is a scalar.
      98        28800 :     double wave_speed = mat_op.GetLightSpeedMax(FET.Elem1->Attribute);
      99        28800 :     double k_re = omega_re / wave_speed;
     100        28800 :     double k_im = omega_im / wave_speed;
     101        28800 :     double quadrature_weight = ip.weight * T.Weight();
     102              : 
     103              :     // Complex prefactor: (ik/4π) = (i(k_re + ik_im)/4π) = (ik_re - k_im)/4π.
     104        28800 :     double prefactor_re = -quadrature_weight * k_im / (4 * M_PI);
     105        28800 :     double prefactor_im = quadrature_weight * k_re / (4 * M_PI);
     106              : 
     107              :     // Z * H = c0 * B.
     108        28800 :     mat_op.GetLightSpeed(FET.Elem1->Attribute).Mult(B_real, ZH_real);
     109        28800 :     mat_op.GetLightSpeed(FET.Elem1->Attribute).Mult(B_imag, ZH_imag);
     110        28800 :     BdrGridFunctionCoefficient::GetNormal(T, normal, invert);
     111              : 
     112              :     // n̂ × E.
     113        28800 :     linalg::Cross3(normal, E_real, n_cross_Er);
     114        28800 :     linalg::Cross3(normal, E_imag, n_cross_Ei);
     115              : 
     116              :     // n̂ × ZH.
     117        28800 :     linalg::Cross3(normal, ZH_real, n_cross_ZHr);
     118        28800 :     linalg::Cross3(normal, ZH_imag, n_cross_ZHi);
     119              : 
     120              :     // This is a hot loop. Manually unrolling and avoiding Vectors significantly
     121              :     // increases performance.
     122        28800 :     PalacePragmaOmp(parallel for schedule(static))
     123              :     for (size_t i = 0; i < r_naughts.size(); i++)
     124              :     {
     125              :       const auto &r = r_naughts[i];
     126              : 
     127              :       double r0 = r[0], r1 = r[1], r2 = r[2];
     128              :       double dot_product = r0 * r_phys(0) + r1 * r_phys(1) + r2 * r_phys(2);
     129              : 
     130              :       // Complex phase: exp(i*k*r₀·r') = exp(i*(k_re + ik_im)*dot_product)
     131              :       //                               = exp(i*k_re*dot_product - k_im*dot_product)
     132              :       //                               = exp(-k_im*dot_product) * exp(i*k_re*dot_product).
     133              :       double amplitude = std::exp(-k_im * dot_product);
     134              :       double phase_re = k_re * dot_product;
     135              :       double cos_phase = std::cos(phase_re);
     136              :       double sin_phase = std::sin(phase_re);
     137              : 
     138              :       // Complex weight: prefactor * exp(i*k*r₀·r').
     139              :       double w_real = amplitude * (prefactor_re * cos_phase - prefactor_im * sin_phase);
     140              :       double w_imag = amplitude * (prefactor_re * sin_phase + prefactor_im * cos_phase);
     141              : 
     142              :       // r₀ × (n̂ × ZH).
     143              :       double cr0 = r1 * n_cross_ZHr(2) - r2 * n_cross_ZHr(1);
     144              :       double cr1 = r2 * n_cross_ZHr(0) - r0 * n_cross_ZHr(2);
     145              :       double cr2 = r0 * n_cross_ZHr(1) - r1 * n_cross_ZHr(0);
     146              : 
     147              :       double ci0 = r1 * n_cross_ZHi(2) - r2 * n_cross_ZHi(1);
     148              :       double ci1 = r2 * n_cross_ZHi(0) - r0 * n_cross_ZHi(2);
     149              :       double ci2 = r0 * n_cross_ZHi(1) - r1 * n_cross_ZHi(0);
     150              : 
     151              :       // Complex multiplication: (A + iB) * (w_real + i*w_imag).
     152              :       double A0 = n_cross_Er(0) - cr0, B0 = n_cross_Ei(0) - ci0;
     153              :       double A1 = n_cross_Er(1) - cr1, B1 = n_cross_Ei(1) - ci1;
     154              :       double A2 = n_cross_Er(2) - cr2, B2 = n_cross_Ei(2) - ci2;
     155              : 
     156              :       integrand_r[i][0] += A0 * w_real - B0 * w_imag;
     157              :       integrand_r[i][1] += A1 * w_real - B1 * w_imag;
     158              :       integrand_r[i][2] += A2 * w_real - B2 * w_imag;
     159              : 
     160              :       integrand_i[i][0] += A0 * w_imag + B0 * w_real;
     161              :       integrand_i[i][1] += A1 * w_imag + B1 * w_real;
     162              :       integrand_i[i][2] += A2 * w_imag + B2 * w_real;
     163              :     }
     164              :   }
     165        19200 : }
     166              : 
     167              : };  // namespace palace

Generated by: LCOV version 2.0-1