Line data Source code
1 : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 : // SPDX-License-Identifier: Apache-2.0
3 :
4 : #include "operator.hpp"
5 :
6 : #include <numeric>
7 : #include <ceed/backend.h>
8 : #include <mfem.hpp>
9 : #include <mfem/general/forall.hpp>
10 : #include "fem/fespace.hpp"
11 : #include "linalg/hypre.hpp"
12 : #include "utils/omp.hpp"
13 :
14 : namespace palace::ceed
15 : {
16 :
17 11346 : Operator::Operator(int h, int w) : palace::Operator(h, w)
18 : {
19 11346 : const std::size_t nt = internal::GetCeedObjects().size();
20 11346 : op.resize(nt, nullptr);
21 11346 : op_t.resize(nt, nullptr);
22 11346 : u.resize(nt, nullptr);
23 11346 : v.resize(nt, nullptr);
24 11346 : PalacePragmaOmp(parallel if (op.size() > 1))
25 : {
26 : const int id = utils::GetThreadNum();
27 : MFEM_ASSERT(static_cast<std::size_t>(id) < op.size(),
28 : "Out of bounds access for thread number " << id << "!");
29 : Ceed ceed = ceed::internal::GetCeedObjects()[utils::GetThreadNum()];
30 : CeedOperator loc_op, loc_op_t;
31 : CeedVector loc_u, loc_v;
32 : PalaceCeedCall(ceed, CeedOperatorCreateComposite(ceed, &loc_op));
33 : PalaceCeedCall(ceed, CeedOperatorCreateComposite(ceed, &loc_op_t));
34 : PalaceCeedCall(ceed, CeedVectorCreate(ceed, width, &loc_u));
35 : PalaceCeedCall(ceed, CeedVectorCreate(ceed, height, &loc_v));
36 : op[id] = loc_op;
37 : op_t[id] = loc_op_t;
38 : u[id] = loc_u;
39 : v[id] = loc_v;
40 : }
41 : temp.UseDevice(true);
42 11346 : }
43 :
44 16764 : Operator::~Operator()
45 : {
46 11346 : PalacePragmaOmp(parallel if (op.size() > 1))
47 : {
48 : const int id = utils::GetThreadNum();
49 : MFEM_ASSERT(static_cast<std::size_t>(id) < op.size(),
50 : "Out of bounds access for thread number " << id << "!");
51 : Ceed ceed;
52 : PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed));
53 : PalaceCeedCall(ceed, CeedOperatorDestroy(&op[id]));
54 : PalaceCeedCall(ceed, CeedOperatorDestroy(&op_t[id]));
55 : PalaceCeedCall(ceed, CeedVectorDestroy(&u[id]));
56 : PalaceCeedCall(ceed, CeedVectorDestroy(&v[id]));
57 : }
58 16764 : }
59 :
60 16661 : void Operator::AddSubOperator(CeedOperator sub_op, CeedOperator sub_op_t)
61 : {
62 : // This should be called from within a OpenMP parallel region.
63 16661 : const int id = utils::GetThreadNum();
64 : MFEM_ASSERT(static_cast<std::size_t>(id) < op.size(),
65 : "Out of bounds access for thread number " << id << "!");
66 : Ceed ceed;
67 16661 : PalaceCeedCallBackend(CeedOperatorGetCeed(sub_op, &ceed));
68 : CeedSize l_in, l_out;
69 16661 : PalaceCeedCall(ceed, CeedOperatorGetActiveVectorLengths(sub_op, &l_in, &l_out));
70 16661 : MFEM_VERIFY((l_in < 0 || mfem::internal::to_int(l_in) == width) &&
71 : (l_out < 0 || mfem::internal::to_int(l_out) == height),
72 : "Dimensions mismatch for CeedOperator!");
73 16661 : PalaceCeedCall(ceed, CeedOperatorCompositeAddSub(op[id], sub_op));
74 16661 : PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op));
75 16661 : if (sub_op_t)
76 : {
77 : Ceed ceed_t;
78 1254 : PalaceCeedCallBackend(CeedOperatorGetCeed(sub_op_t, &ceed_t));
79 1254 : MFEM_VERIFY(ceed_t == ceed, "Ceed context mismatch for transpose CeedOperator!");
80 : CeedSize l_in_t, l_out_t;
81 1254 : PalaceCeedCall(ceed, CeedOperatorGetActiveVectorLengths(sub_op_t, &l_in_t, &l_out_t));
82 1254 : MFEM_VERIFY(l_in_t == l_out && l_out_t == l_in,
83 : "Dimensions mismatch for transpose CeedOperator!");
84 1254 : PalaceCeedCall(ceed, CeedOperatorCompositeAddSub(op_t[id], sub_op_t));
85 1254 : PalaceCeedCall(ceed, CeedOperatorDestroy(&sub_op_t));
86 : }
87 16661 : }
88 :
89 11346 : void Operator::Finalize()
90 : {
91 11346 : PalacePragmaOmp(parallel if (op.size() > 1))
92 : {
93 : const int id = utils::GetThreadNum();
94 : MFEM_ASSERT(static_cast<std::size_t>(id) < op.size(),
95 : "Out of bounds access for thread number " << id << "!");
96 : Ceed ceed;
97 : PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed));
98 : PalaceCeedCall(ceed, CeedOperatorCheckReady(op[id]));
99 : PalaceCeedCall(ceed, CeedOperatorCheckReady(op_t[id]));
100 : }
101 11346 : }
102 :
103 0 : void Operator::DestroyAssemblyData() const
104 : {
105 0 : PalacePragmaOmp(parallel if (op.size() > 1))
106 : {
107 : const int id = utils::GetThreadNum();
108 : MFEM_ASSERT(static_cast<std::size_t>(id) < op.size(),
109 : "Out of bounds access for thread number " << id << "!");
110 : Ceed ceed;
111 : PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed));
112 : PalaceCeedCall(ceed, CeedOperatorAssemblyDataStrip(op[id]));
113 : }
114 0 : }
115 :
116 5832 : void Operator::AssembleDiagonal(Vector &diag) const
117 : {
118 : Ceed ceed;
119 : CeedMemType mem;
120 5832 : MFEM_VERIFY(diag.Size() == height, "Invalid size for diagonal vector!");
121 5832 : diag = 0.0;
122 5832 : PalaceCeedCallBackend(CeedOperatorGetCeed(op[0], &ceed));
123 5832 : PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem));
124 5832 : if (!mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
125 : {
126 0 : mem = CEED_MEM_HOST;
127 : }
128 5832 : auto *diag_data = diag.ReadWrite(mem == CEED_MEM_DEVICE);
129 :
130 5832 : PalacePragmaOmp(parallel if (op.size() > 1))
131 : {
132 : const int id = utils::GetThreadNum();
133 : MFEM_ASSERT(static_cast<std::size_t>(id) < op.size(),
134 : "Out of bounds access for thread number " << id << "!");
135 : Ceed ceed;
136 : PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed));
137 : PalaceCeedCall(ceed, CeedVectorSetArray(v[id], mem, CEED_USE_POINTER, diag_data));
138 : PalaceCeedCall(
139 : ceed, CeedOperatorLinearAssembleAddDiagonal(op[id], v[id], CEED_REQUEST_IMMEDIATE));
140 : PalaceCeedCall(ceed, CeedVectorTakeArray(v[id], mem, nullptr));
141 : PalaceCeedCall(ceed, CeedOperatorAssemblyDataStrip(op[id]));
142 : }
143 5832 : }
144 :
145 : namespace
146 : {
147 :
148 11892 : inline void CeedAddMult(const std::vector<CeedOperator> &op,
149 : const std::vector<CeedVector> &u, const std::vector<CeedVector> &v,
150 : const Vector &x, Vector &y)
151 : {
152 : Ceed ceed;
153 : CeedMemType mem;
154 11892 : PalaceCeedCallBackend(CeedOperatorGetCeed(op[0], &ceed));
155 11892 : PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, &mem));
156 11892 : if (!mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE)
157 : {
158 0 : mem = CEED_MEM_HOST;
159 : }
160 11892 : const auto *x_data = x.Read(mem == CEED_MEM_DEVICE);
161 11892 : auto *y_data = y.ReadWrite(mem == CEED_MEM_DEVICE);
162 :
163 11892 : PalacePragmaOmp(parallel if (op.size() > 1))
164 : {
165 : const int id = utils::GetThreadNum();
166 : MFEM_ASSERT(static_cast<std::size_t>(id) < op.size(),
167 : "Out of bounds access for thread number " << id << "!");
168 : Ceed ceed;
169 : PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed));
170 : PalaceCeedCall(ceed, CeedVectorSetArray(u[id], mem, CEED_USE_POINTER,
171 : const_cast<CeedScalar *>(x_data)));
172 : PalaceCeedCall(ceed, CeedVectorSetArray(v[id], mem, CEED_USE_POINTER, y_data));
173 : PalaceCeedCall(ceed,
174 : CeedOperatorApplyAdd(op[id], u[id], v[id], CEED_REQUEST_IMMEDIATE));
175 : PalaceCeedCall(ceed, CeedVectorTakeArray(u[id], mem, nullptr));
176 : PalaceCeedCall(ceed, CeedVectorTakeArray(v[id], mem, nullptr));
177 : }
178 11892 : }
179 :
180 : } // namespace
181 :
182 11340 : void Operator::Mult(const Vector &x, Vector &y) const
183 : {
184 11340 : y = 0.0;
185 11340 : CeedAddMult(op, u, v, x, y);
186 11340 : if (dof_multiplicity.Size() > 0)
187 : {
188 540 : y *= dof_multiplicity;
189 : }
190 11340 : }
191 :
192 12 : void Operator::AddMult(const Vector &x, Vector &y, const double a) const
193 : {
194 12 : MFEM_VERIFY(a == 1.0, "ceed::Operator::AddMult only supports coefficient = 1.0!");
195 12 : if (dof_multiplicity.Size() > 0)
196 : {
197 0 : temp.SetSize(height);
198 0 : temp = 0.0;
199 0 : CeedAddMult(op, u, v, x, temp);
200 : {
201 : const auto *d_dof_multiplicity = dof_multiplicity.Read();
202 : const auto *d_temp = temp.Read();
203 0 : auto *d_y = y.ReadWrite();
204 0 : mfem::forall(height, [=] MFEM_HOST_DEVICE(int i)
205 0 : { d_y[i] += d_dof_multiplicity[i] * d_temp[i]; });
206 : }
207 : }
208 : else
209 : {
210 12 : CeedAddMult(op, u, v, x, y);
211 : }
212 12 : }
213 :
214 540 : void Operator::MultTranspose(const Vector &x, Vector &y) const
215 : {
216 540 : y = 0.0;
217 540 : AddMultTranspose(x, y);
218 540 : }
219 :
220 540 : void Operator::AddMultTranspose(const Vector &x, Vector &y, const double a) const
221 : {
222 540 : MFEM_VERIFY(a == 1.0,
223 : "ceed::Operator::AddMultTranspose only supports coefficient = 1.0!");
224 540 : if (dof_multiplicity.Size() > 0)
225 : {
226 540 : temp.SetSize(height);
227 : {
228 : const auto *d_dof_multiplicity = dof_multiplicity.Read();
229 540 : const auto *d_x = x.Read();
230 : auto *d_temp = temp.Write();
231 540 : mfem::forall(height, [=] MFEM_HOST_DEVICE(int i)
232 0 : { d_temp[i] = d_dof_multiplicity[i] * d_x[i]; });
233 : }
234 540 : CeedAddMult(op_t, v, u, temp, y);
235 : }
236 : else
237 : {
238 0 : CeedAddMult(op_t, v, u, x, y);
239 : }
240 540 : }
241 :
242 : namespace
243 : {
244 :
245 : int CeedInternalCallocArray(size_t n, size_t unit, void *p)
246 : {
247 336 : *(void **)p = calloc(n, unit);
248 : MFEM_ASSERT(!n || !unit || *(void **)p,
249 : "calloc failed to allocate " << n << " members of size " << unit << "!");
250 : return 0;
251 : }
252 :
253 : int CeedInternalFree(void *p)
254 : {
255 12948 : free(*(void **)p);
256 12948 : *(void **)p = nullptr;
257 : return 0;
258 : }
259 :
260 : #define CeedInternalCalloc(n, p) CeedInternalCallocArray((n), sizeof(**(p)), p)
261 :
262 12612 : void CeedOperatorAssembleCOO(Ceed ceed, CeedOperator op, bool skip_zeros, CeedSize *nnz,
263 : CeedInt **rows, CeedInt **cols, CeedVector *vals,
264 : CeedMemType *mem)
265 : {
266 12612 : PalaceCeedCall(ceed, CeedGetPreferredMemType(ceed, mem));
267 :
268 : // Assemble sparsity pattern (rows, cols are always host pointers).
269 12612 : PalaceCeedCall(ceed, CeedOperatorLinearAssembleSymbolic(op, nnz, rows, cols));
270 :
271 : // Assemble values.
272 12612 : PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, vals));
273 12612 : PalaceCeedCall(ceed, CeedOperatorLinearAssemble(op, *vals));
274 :
275 : // Filter out zero entries. For now, eliminating zeros happens all on the host.
276 : // std::cout << " Operator full assembly (COO) has " << *nnz << " NNZ";
277 12612 : if (skip_zeros && *nnz > 0)
278 : {
279 : // XX TODO: Use Thrust for this (thrust::copy_if and thrust::zip_iterator)
280 : CeedInt *new_rows, *new_cols;
281 336 : PalaceCeedCall(ceed, CeedInternalCalloc(*nnz, &new_rows));
282 : PalaceCeedCall(ceed, CeedInternalCalloc(*nnz, &new_cols));
283 :
284 : CeedVector new_vals;
285 336 : PalaceCeedCall(ceed, CeedVectorCreate(ceed, *nnz, &new_vals));
286 :
287 : CeedSize q = 0;
288 : const CeedScalar *vals_array;
289 : CeedScalar *new_vals_array;
290 336 : PalaceCeedCall(ceed, CeedVectorGetArrayRead(*vals, CEED_MEM_HOST, &vals_array));
291 336 : PalaceCeedCall(ceed, CeedVectorGetArrayWrite(new_vals, CEED_MEM_HOST, &new_vals_array));
292 22799824 : for (CeedSize k = 0; k < *nnz; k++)
293 : {
294 22799488 : if (vals_array[k] != 0.0)
295 : {
296 4420967 : new_rows[q] = (*rows)[k];
297 4420967 : new_cols[q] = (*cols)[k];
298 4420967 : new_vals_array[q] = vals_array[k];
299 4420967 : q++;
300 : }
301 : }
302 336 : PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(*vals, &vals_array));
303 336 : PalaceCeedCall(ceed, CeedVectorRestoreArray(new_vals, &new_vals_array));
304 :
305 : PalaceCeedCall(ceed, CeedInternalFree(rows));
306 : PalaceCeedCall(ceed, CeedInternalFree(cols));
307 336 : PalaceCeedCall(ceed, CeedVectorDestroy(vals));
308 :
309 336 : *nnz = q;
310 336 : *rows = new_rows;
311 336 : *cols = new_cols;
312 336 : *vals = new_vals;
313 :
314 : // std::cout << " (new NNZ after removal: " << *nnz << ")";
315 : }
316 : // std::cout << "\n";
317 12612 : }
318 :
319 12612 : std::unique_ptr<hypre::HypreCSRMatrix> OperatorCOOtoCSR(Ceed ceed, CeedInt m, CeedInt n,
320 : CeedSize nnz, CeedInt *rows,
321 : CeedInt *cols, CeedVector vals,
322 : CeedMemType mem, bool set)
323 : {
324 : // Preallocate CSR memory on host (like PETSc's MatSetValuesCOO). Check for overflow for
325 : // large nonzero counts.
326 : const int nnz_int = mfem::internal::to_int(nnz);
327 25224 : mfem::Array<int> I(m + 1), J(nnz_int), perm(nnz_int), Jmap(nnz_int + 1);
328 : I = 0;
329 678761945 : for (int k = 0; k < nnz_int; k++)
330 : {
331 678749333 : perm[k] = k;
332 : }
333 : std::sort(perm.begin(), perm.end(),
334 13295478666 : [&](const int &i, const int &j) { return (rows[i] < rows[j]); });
335 :
336 : int q = -1; // True nnz index
337 9208558 : for (int k = 0; k < nnz_int;)
338 : {
339 : // Sort column entries in the row.
340 9195946 : const int row = rows[perm[k]];
341 : const int start = k;
342 687945279 : while (k < nnz_int && rows[perm[k]] == row)
343 : {
344 678749333 : k++;
345 : }
346 9195946 : std::sort(perm.begin() + start, perm.begin() + k,
347 5262860820 : [&](const int &i, const int &j) { return (cols[i] < cols[j]); });
348 :
349 9195946 : q++;
350 9195946 : I[row + 1] = 1;
351 9195946 : J[q] = cols[perm[start]];
352 9195946 : Jmap[q + 1] = 1;
353 678749333 : for (int p = start + 1; p < k; p++)
354 : {
355 669553387 : if (cols[perm[p]] != cols[perm[p - 1]])
356 : {
357 : // New nonzero.
358 603596599 : q++;
359 603596599 : I[row + 1]++;
360 603596599 : J[q] = cols[perm[p]];
361 603596599 : Jmap[q + 1] = 1;
362 : }
363 : else
364 : {
365 65956788 : Jmap[q + 1]++;
366 : }
367 : }
368 : }
369 : PalaceCeedCall(ceed, CeedInternalFree(&rows));
370 : PalaceCeedCall(ceed, CeedInternalFree(&cols));
371 :
372 : // Finalize I, Jmap.
373 12612 : const int nnz_new = q + 1;
374 12612 : I[0] = 0;
375 21468470 : for (int i = 0; i < m; i++)
376 : {
377 21455858 : I[i + 1] += I[i];
378 : }
379 12612 : Jmap[0] = 0;
380 612805157 : for (int k = 0; k < nnz_new; k++)
381 : {
382 612792545 : Jmap[k + 1] += Jmap[k];
383 : }
384 :
385 : // Construct and fill the final CSR matrix. On GPU, MFEM and Hypre share the same memory
386 : // space. On CPU, the inner nested OpenMP loop (if enabled in MFEM) should be ignored.
387 12612 : auto mat = std::make_unique<hypre::HypreCSRMatrix>(m, n, nnz_new);
388 : {
389 : const auto *d_I_old = I.Read();
390 : auto *d_I = mat->GetI();
391 12612 : mfem::forall(m + 1, [=] MFEM_HOST_DEVICE(int i) { d_I[i] = d_I_old[i]; });
392 : }
393 : {
394 : const auto *d_J_old = J.Read();
395 : auto *d_J = mat->GetJ();
396 12612 : mfem::forall(nnz_new, [=] MFEM_HOST_DEVICE(int k) { d_J[k] = d_J_old[k]; });
397 : }
398 : {
399 12612 : auto FillValues = [&](const double *vals_array)
400 : {
401 12612 : const auto *d_perm = perm.Read();
402 12612 : const auto *d_Jmap = Jmap.Read();
403 12612 : auto *d_A = mat->GetData();
404 12612 : if (set)
405 : {
406 336 : mfem::forall(nnz_new, [=] MFEM_HOST_DEVICE(int k)
407 0 : { d_A[k] = vals_array[d_perm[d_Jmap[k]]]; });
408 : }
409 : else
410 : {
411 12276 : mfem::forall(nnz_new,
412 12276 : [=] MFEM_HOST_DEVICE(int k)
413 : {
414 : double sum = 0.0;
415 1283623193 : for (int p = d_Jmap[k]; p < d_Jmap[k + 1]; p++)
416 : {
417 674328366 : sum += vals_array[d_perm[p]];
418 : }
419 609294827 : d_A[k] = sum;
420 609294827 : });
421 : }
422 25224 : };
423 : Ceed ceed;
424 : const CeedScalar *vals_array;
425 12612 : PalaceCeedCallBackend(CeedVectorGetCeed(vals, &ceed));
426 12612 : PalaceCeedCall(ceed, CeedVectorGetArrayRead(vals, mem, &vals_array));
427 12612 : if (mfem::Device::Allows(mfem::Backend::DEVICE_MASK) && mem != CEED_MEM_DEVICE)
428 : {
429 : // Copy values to device before filling.
430 : Vector d_vals(nnz_int);
431 : {
432 : auto *d_vals_array = d_vals.HostWrite();
433 0 : PalacePragmaOmp(parallel for schedule(static))
434 : for (int k = 0; k < nnz_int; k++)
435 : {
436 : d_vals_array[k] = vals_array[k];
437 : }
438 : }
439 0 : FillValues(d_vals.Read());
440 : }
441 : else
442 : {
443 : // No copy required.
444 12612 : FillValues(vals_array);
445 : }
446 12612 : PalaceCeedCall(ceed, CeedVectorRestoreArrayRead(vals, &vals_array));
447 12612 : PalaceCeedCall(ceed, CeedVectorDestroy(&vals));
448 : }
449 :
450 12612 : return mat;
451 : }
452 :
453 : } // namespace
454 :
455 10950 : std::unique_ptr<hypre::HypreCSRMatrix> CeedOperatorFullAssemble(const Operator &op,
456 : bool skip_zeros, bool set)
457 : {
458 : // Assemble operators on each thread.
459 10950 : std::vector<std::unique_ptr<hypre::HypreCSRMatrix>> loc_mat(op.Size());
460 10950 : PalacePragmaOmp(parallel if (op.Size() > 1))
461 : {
462 : const int id = utils::GetThreadNum();
463 : MFEM_ASSERT(static_cast<std::size_t>(id) < op.Size(),
464 : "Out of bounds access for thread number " << id << "!");
465 : Ceed ceed;
466 : PalaceCeedCallBackend(CeedOperatorGetCeed(op[id], &ceed));
467 :
468 : // Check if the operator is empty, otherwise assemble.
469 : CeedInt nsub_ops;
470 : PalaceCeedCall(ceed, CeedOperatorCompositeGetNumSub(op[id], &nsub_ops));
471 : if (nsub_ops == 0)
472 : {
473 : loc_mat[id] = std::make_unique<hypre::HypreCSRMatrix>(op.Height(), op.Width(), 0);
474 : }
475 : else
476 : {
477 : // First, get matrix on master thread in COO format, withs rows/cols always on host
478 : // and vals potentially on the device. Process skipping zeros if desired.
479 : CeedSize nnz;
480 : CeedInt *rows, *cols;
481 : CeedVector vals;
482 : CeedMemType mem;
483 : CeedOperatorAssembleCOO(ceed, op[id], skip_zeros, &nnz, &rows, &cols, &vals, &mem);
484 : PalaceCeedCall(ceed, CeedOperatorAssemblyDataStrip(op[id]));
485 :
486 : // Convert COO to CSR (on each thread). The COO memory is free'd internally.
487 : loc_mat[id] =
488 : OperatorCOOtoCSR(ceed, op.Height(), op.Width(), nnz, rows, cols, vals, mem, set);
489 : }
490 : }
491 :
492 : // Add CSR matrix objects from each thread (HYPRE's hypre_CSRMatrixAdd uses threads
493 : // internally as available). We have to scale the duplicated nonzeros when set = true.
494 : auto mat = std::move(loc_mat[0]);
495 : std::unique_ptr<hypre::HypreCSRMatrix> b_mat;
496 10950 : if (set && op.Size() > 1)
497 : {
498 222 : b_mat = std::make_unique<hypre::HypreCSRMatrix>(hypre_CSRMatrixClone(*mat, 0));
499 222 : hypre_CSRMatrixSetConstantValues(*b_mat, 1.0);
500 444 : for (std::size_t id = 1; id < op.Size(); id++)
501 : {
502 222 : hypre_CSRMatrix *b_loc_mat = hypre_CSRMatrixClone(*loc_mat[id], 0);
503 222 : hypre_CSRMatrixSetConstantValues(b_loc_mat, 1.0);
504 444 : b_mat = std::make_unique<hypre::HypreCSRMatrix>(
505 222 : hypre_CSRMatrixAdd(1.0, *b_mat, 1.0, b_loc_mat));
506 222 : hypre_CSRMatrixDestroy(b_loc_mat);
507 : }
508 : }
509 21900 : for (std::size_t id = 1; id < op.Size(); id++)
510 : {
511 21900 : mat = std::make_unique<hypre::HypreCSRMatrix>(
512 21900 : hypre_CSRMatrixAdd(1.0, *mat, 1.0, *loc_mat[id]));
513 : }
514 10950 : if (set && op.Size() > 1)
515 : {
516 : const auto *d_b_data = b_mat->GetData();
517 : auto *d_data = mat->GetData();
518 : mfem::forall(mat->NNZ(),
519 222 : [=] MFEM_HOST_DEVICE(int i) { d_data[i] *= 1.0 / d_b_data[i]; });
520 : }
521 :
522 10950 : return mat;
523 10950 : }
524 :
525 0 : std::unique_ptr<Operator> CeedOperatorCoarsen(const Operator &op_fine,
526 : const FiniteElementSpace &fespace_coarse)
527 : {
528 : auto SingleOperatorCoarsen =
529 0 : [&fespace_coarse](Ceed ceed, CeedOperator op_fine, CeedOperator *op_coarse)
530 : {
531 : CeedBasis basis_fine;
532 : CeedElemTopology geom;
533 0 : PalaceCeedCall(ceed, CeedOperatorGetActiveBasis(op_fine, &basis_fine));
534 0 : PalaceCeedCall(ceed, CeedBasisGetTopology(basis_fine, &geom));
535 :
536 : const auto &geom_data =
537 0 : fespace_coarse.GetMesh().GetCeedGeomFactorData(ceed).at(GetMfemTopology(geom));
538 0 : CeedElemRestriction restr_coarse = fespace_coarse.GetCeedElemRestriction(
539 0 : ceed, GetMfemTopology(geom), geom_data.indices);
540 0 : CeedBasis basis_coarse = fespace_coarse.GetCeedBasis(ceed, GetMfemTopology(geom));
541 :
542 0 : PalaceCeedCall(ceed, CeedOperatorMultigridLevelCreate(op_fine, nullptr, restr_coarse,
543 : basis_coarse, op_coarse, nullptr,
544 : nullptr));
545 0 : PalaceCeedCall(ceed, CeedOperatorAssemblyDataStrip(*op_coarse));
546 0 : };
547 :
548 : // Initialize the coarse operator.
549 0 : auto op_coarse = std::make_unique<SymmetricOperator>(fespace_coarse.GetVSize(),
550 0 : fespace_coarse.GetVSize());
551 :
552 : // Assemble the coarse operator by coarsening each sub-operator (over threads, geometry
553 : // types, integrators) of the original fine operator.
554 0 : PalacePragmaOmp(parallel if (op_fine.Size() > 1))
555 : {
556 : const int id = utils::GetThreadNum();
557 : MFEM_ASSERT(static_cast<std::size_t>(id) < op_fine.Size(),
558 : "Out of bounds access for thread number " << id << "!");
559 : Ceed ceed;
560 : PalaceCeedCallBackend(CeedOperatorGetCeed(op_fine[id], &ceed));
561 : {
562 : Ceed ceed_parent;
563 : PalaceCeedCall(ceed, CeedGetParent(ceed, &ceed_parent));
564 : if (ceed_parent)
565 : {
566 : ceed = ceed_parent;
567 : }
568 : }
569 : CeedInt nsub_ops_fine;
570 : CeedOperator *sub_ops_fine;
571 : PalaceCeedCall(ceed, CeedOperatorCompositeGetNumSub(op_fine[id], &nsub_ops_fine));
572 : PalaceCeedCall(ceed, CeedOperatorCompositeGetSubList(op_fine[id], &sub_ops_fine));
573 : for (CeedInt k = 0; k < nsub_ops_fine; k++)
574 : {
575 : CeedOperator sub_op_coarse;
576 : SingleOperatorCoarsen(ceed, sub_ops_fine[k], &sub_op_coarse);
577 : op_coarse->AddSubOperator(sub_op_coarse); // Sub-operator owned by ceed::Operator
578 : }
579 : }
580 :
581 : // Finalize the operator (call CeedOperatorCheckReady).
582 0 : op_coarse->Finalize();
583 :
584 0 : return op_coarse;
585 : }
586 :
587 : } // namespace palace::ceed
|