Line data Source code
1 : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 : // SPDX-License-Identifier: Apache-2.0
3 :
4 : #include "dorfler.hpp"
5 :
6 : #include <algorithm>
7 : #include <limits>
8 : #include <numeric>
9 : #include <mfem.hpp>
10 :
11 : namespace palace::utils
12 : {
13 :
14 0 : std::array<double, 2> ComputeDorflerThreshold(MPI_Comm comm, const Vector &e,
15 : double fraction)
16 : {
17 : // Precompute the sort and partial sum to make evaluating a candidate partition fast.
18 0 : e.HostRead();
19 0 : std::vector<double> estimates(e.begin(), e.end());
20 0 : std::sort(estimates.begin(), estimates.end());
21 :
22 : // Accumulate the squares of the estimates.
23 0 : std::vector<double> sum(estimates.size());
24 0 : for (auto &x : estimates)
25 : {
26 0 : x *= x;
27 : }
28 : std::partial_sum(estimates.begin(), estimates.end(), sum.begin());
29 0 : for (auto &x : estimates)
30 : {
31 0 : x = std::sqrt(x);
32 : }
33 :
34 : // The pivot is the first point which leaves (1-θ) of the total sum after it.
35 0 : const double local_total = sum.size() > 0 ? sum.back() : 0.0;
36 0 : auto pivot = std::lower_bound(sum.begin(), sum.end(), (1 - fraction) * local_total);
37 : auto index = std::distance(sum.begin(), pivot);
38 0 : double error_threshold = estimates.size() > 0 ? estimates[index] : 0.0;
39 :
40 : // Compute the number of elements, and amount of error, marked by threshold value e.
41 0 : auto Marked = [&estimates, &sum, &local_total](double e) -> std::pair<std::size_t, double>
42 : {
43 0 : if (local_total > 0)
44 : {
45 0 : const auto lb = std::lower_bound(estimates.begin(), estimates.end(), e);
46 : const auto elems_marked = std::distance(lb, estimates.end());
47 : const double error_unmarked =
48 0 : lb != estimates.begin() ? sum[sum.size() - elems_marked - 1] : 0;
49 0 : const double error_marked = local_total - error_unmarked;
50 0 : return {elems_marked, error_marked};
51 : }
52 : else
53 : {
54 0 : return {0, 0.0};
55 : }
56 0 : };
57 :
58 : // Each processor will compute a different threshold: if a given processor has lots of low
59 : // error elements, their value will be lower and if a processor has high error, their
60 : // value will be higher. Thus using the value from the low error processor will give too
61 : // many elements, and using the value from the high error processor will give too few. The
62 : // correct threshold value will be an intermediate between the min and max over
63 : // processors.
64 0 : double min_threshold = error_threshold;
65 0 : double max_threshold = error_threshold;
66 0 : Mpi::GlobalMin(1, &min_threshold, comm);
67 0 : Mpi::GlobalMax(1, &max_threshold, comm);
68 : struct
69 : {
70 : std::size_t total;
71 : std::size_t min_marked;
72 : std::size_t max_marked;
73 : } elements;
74 0 : elements.total = estimates.size();
75 : struct
76 : {
77 : double total;
78 : double min_marked;
79 : double max_marked;
80 : } error;
81 0 : error.total = local_total;
82 0 : std::tie(elements.max_marked, error.max_marked) = Marked(min_threshold);
83 0 : std::tie(elements.min_marked, error.min_marked) = Marked(max_threshold);
84 0 : Mpi::GlobalSum(3, &elements.total, comm);
85 0 : Mpi::GlobalSum(3, &error.total, comm);
86 0 : const double max_indicator = [&]()
87 : {
88 0 : double max_indicator = estimates.size() > 0 ? estimates.back() : 0.0;
89 0 : Mpi::GlobalMax(1, &max_indicator, comm);
90 0 : return max_indicator;
91 0 : }();
92 : MFEM_ASSERT(min_threshold <= max_threshold,
93 : "Error in Dorfler marking: min: " << min_threshold << " max " << max_threshold
94 : << "!");
95 0 : auto [elem_marked, error_marked] = Marked(error_threshold);
96 :
97 : // Keep track of the number of elements marked by the threshold bounds. If the top and
98 : // bottom values are equal (or separated by only 1), there's no point further bisecting.
99 : // The maximum iterations is just to prevert runaway.
100 : constexpr int max_it = 100;
101 0 : for (int i = 0; i < max_it; i++)
102 : {
103 0 : error_threshold = (min_threshold + max_threshold) / 2;
104 0 : std::tie(elem_marked, error_marked) = Marked(error_threshold);
105 :
106 : // All processors need the values used for the stopping criteria.
107 0 : Mpi::GlobalSum(1, &elem_marked, comm);
108 0 : Mpi::GlobalSum(1, &error_marked, comm);
109 : MFEM_ASSERT(elem_marked > 0, "Some elements must have been marked!");
110 : MFEM_ASSERT(error_marked > 0, "Some error must have been marked!");
111 0 : const auto candidate_fraction = error_marked / error.total;
112 : if constexpr (false)
113 : {
114 : Mpi::Print(
115 : "Marking threshold: {:e} < {:e} < {:e}\nMarked elements: {:d} <= {:d} <= {:d}\n",
116 : min_threshold, error_threshold, max_threshold, elements.min_marked, elem_marked,
117 : elements.max_marked);
118 : }
119 :
120 : // Set the tolerance based off of the largest local indicator value. These tolerance
121 : // values extremely tight because this loop is fast, and getting the marking correct is
122 : // important.
123 : constexpr double frac_tol = 2 * std::numeric_limits<double>::epsilon();
124 0 : const double error_tol = 2 * std::numeric_limits<double>::epsilon() * max_indicator;
125 0 : if (std::abs(max_threshold - min_threshold) < error_tol ||
126 0 : std::abs(candidate_fraction - fraction) < frac_tol ||
127 0 : elements.max_marked <= (elements.min_marked + 1))
128 : {
129 : // Candidate fraction matches to tolerance, or the number of marked elements is no
130 : // longer changing.
131 : if constexpr (false)
132 : {
133 : Mpi::Print("ΔFraction: {:.3e} (tol = {:.3e})\nΔThreshold: {:.3e} (tol = "
134 : "{:.3e})\nΔElements: {:d}\n",
135 : candidate_fraction - fraction, frac_tol, max_threshold - min_threshold,
136 : error_tol, elements.max_marked - elements.min_marked);
137 : }
138 : break;
139 : }
140 :
141 : // Update in preparation for next iteration. The logic here looks inverted compared to a
142 : // usual binary search, because a smaller value marks a larger number of elements and
143 : // thus a greater fraction of error.
144 0 : if (candidate_fraction > fraction)
145 : {
146 : // This candidate marked too much, raise the lower bound.
147 0 : min_threshold = error_threshold;
148 0 : elements.max_marked = elem_marked;
149 0 : error.max_marked = error_marked;
150 : }
151 0 : else if (candidate_fraction < fraction)
152 : {
153 : // This candidate marked too little, lower the upper bound.
154 0 : max_threshold = error_threshold;
155 0 : elements.min_marked = elem_marked;
156 0 : error.min_marked = error_marked;
157 : }
158 : }
159 :
160 : // Always choose the lower threshold value, thereby marking the larger number of elements
161 : // and fraction of the total error. Would rather over mark than under mark, as Dörfler
162 : // marking is the smallest set that covers at least the specified fraction of the error.
163 0 : error_threshold = min_threshold;
164 0 : error_marked = error.max_marked;
165 : MFEM_ASSERT(error_threshold > 0.0,
166 : "Error threshold result from marking must be positive!");
167 0 : MFEM_VERIFY(error_marked >= fraction * error.total,
168 : "Marked error = " << error_marked << ", total error =" << error.total
169 : << ". Dorfler marking predicate failed!");
170 0 : return {error_threshold, error_marked / error.total};
171 : }
172 :
173 0 : std::array<double, 2> ComputeDorflerCoarseningThreshold(const mfem::ParMesh &mesh,
174 : const Vector &e, double fraction)
175 : {
176 0 : MFEM_VERIFY(mesh.Nonconforming(), "Can only perform coarsening on a Nonconforming mesh!");
177 0 : const auto &derefinement_table = mesh.pncmesh->GetDerefinementTable();
178 : mfem::Array<double> elem_error(e.Size());
179 0 : for (int i = 0; i < e.Size(); i++)
180 : {
181 0 : elem_error[i] = e[i];
182 : }
183 0 : mesh.pncmesh->SynchronizeDerefinementData(elem_error, derefinement_table);
184 : Vector coarse_error(derefinement_table.Size());
185 : mfem::Array<int> row;
186 0 : for (int i = 0; i < derefinement_table.Size(); i++)
187 : {
188 0 : derefinement_table.GetRow(i, row);
189 0 : coarse_error[i] = std::sqrt(
190 : std::accumulate(row.begin(), row.end(), 0.0, [&elem_error](double s, int i)
191 0 : { return s += std::pow(elem_error[i], 2.0); }));
192 : }
193 :
194 : // Given the coarse errors, we use the Dörfler marking strategy to identify the
195 : // smallest set of original elements that make up (1 - θ) of the total error. The
196 : // complement of this set is then the largest number of elements that make up θ of the
197 : // total error.
198 0 : return ComputeDorflerThreshold(mesh.GetComm(), coarse_error, 1.0 - fraction);
199 : }
200 :
201 : } // namespace palace::utils
|