Line data Source code
1 : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 : // SPDX-License-Identifier: Apache-2.0
3 :
4 : #ifndef PALACE_LIBCEED_HCURLHDIV_ERROR_33_QF_H
5 : #define PALACE_LIBCEED_HCURLHDIV_ERROR_33_QF_H
6 :
7 : #include "../coeff/coeff_3_qf.h"
8 : #include "utils_33_qf.h"
9 :
10 0 : CEED_QFUNCTION(f_apply_hcurlhdiv_error_33)(void *__restrict__ ctx, CeedInt Q,
11 : const CeedScalar *const *in,
12 : CeedScalar *const *out)
13 : {
14 0 : const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u1 = in[1],
15 0 : *u2 = in[2];
16 0 : CeedScalar *v = out[0];
17 :
18 0 : CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
19 : {
20 : CeedScalar adjJt_loc[9], v1_loc[3], v2_loc[3];
21 0 : MatUnpack33(adjJt + i, Q, adjJt_loc);
22 : {
23 0 : const CeedScalar u1_loc[3] = {u1[i + Q * 0], u1[i + Q * 1], u1[i + Q * 2]};
24 : CeedScalar coeff[9];
25 0 : CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
26 : MultBAx33(adjJt_loc, coeff, u1_loc, v1_loc);
27 : }
28 : {
29 0 : const CeedScalar u2_loc[3] = {u2[i + Q * 0], u2[i + Q * 1], u2[i + Q * 2]};
30 : CeedScalar coeff[9], J_loc[9];
31 : CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
32 : AdjJt33(adjJt_loc, J_loc);
33 : MultBAx33(J_loc, coeff, u2_loc, v2_loc);
34 : }
35 0 : v2_loc[0] -= v1_loc[0];
36 0 : v2_loc[1] -= v1_loc[1];
37 0 : v2_loc[2] -= v1_loc[2];
38 0 : v[i] =
39 0 : wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1] + v2_loc[2] * v2_loc[2]);
40 : }
41 0 : return 0;
42 : }
43 :
44 0 : CEED_QFUNCTION(f_apply_hdivhcurl_error_33)(void *__restrict__ ctx, CeedInt Q,
45 : const CeedScalar *const *in,
46 : CeedScalar *const *out)
47 : {
48 0 : const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u1 = in[1],
49 0 : *u2 = in[2];
50 0 : CeedScalar *v = out[0];
51 :
52 0 : CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
53 : {
54 : CeedScalar adjJt_loc[9], v1_loc[3], v2_loc[3];
55 0 : MatUnpack33(adjJt + i, Q, adjJt_loc);
56 : {
57 0 : const CeedScalar u1_loc[3] = {u1[i + Q * 0], u1[i + Q * 1], u1[i + Q * 2]};
58 : CeedScalar coeff[9], J_loc[9];
59 0 : CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
60 : AdjJt33(adjJt_loc, J_loc);
61 : MultBAx33(J_loc, coeff, u1_loc, v1_loc);
62 : }
63 : {
64 0 : const CeedScalar u2_loc[3] = {u2[i + Q * 0], u2[i + Q * 1], u2[i + Q * 2]};
65 : CeedScalar coeff[9];
66 : CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
67 : MultBAx33(adjJt_loc, coeff, u2_loc, v2_loc);
68 : }
69 0 : v2_loc[0] -= v1_loc[0];
70 0 : v2_loc[1] -= v1_loc[1];
71 0 : v2_loc[2] -= v1_loc[2];
72 0 : v[i] =
73 0 : wdetJ[i] * (v2_loc[0] * v2_loc[0] + v2_loc[1] * v2_loc[1] + v2_loc[2] * v2_loc[2]);
74 : }
75 0 : return 0;
76 : }
77 :
78 : #endif // PALACE_LIBCEED_HCURLHDIV_ERROR_33_QF_H
|