Line data Source code
1 : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 : // SPDX-License-Identifier: Apache-2.0
3 :
4 : #ifndef PALACE_LIBCEED_HCURL_HDIV_22_QF_H
5 : #define PALACE_LIBCEED_HCURL_HDIV_22_QF_H
6 :
7 : #include "../coeff/coeff_2_qf.h"
8 : #include "utils_22_qf.h"
9 :
10 9855 : CEED_QFUNCTION(f_apply_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q,
11 : const CeedScalar *const *in, CeedScalar *const *out)
12 : {
13 9855 : const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
14 9855 : CeedScalar *v = out[0];
15 :
16 792567 : CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
17 : {
18 782712 : const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
19 : CeedScalar coeff[4], adjJt_loc[4], J_loc[4], v_loc[2];
20 782712 : CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
21 782712 : MatUnpack22(adjJt + i, Q, adjJt_loc);
22 : AdjJt22(adjJt_loc, J_loc);
23 : MultAtBCx22(J_loc, coeff, adjJt_loc, u_loc, v_loc);
24 :
25 782712 : v[i + Q * 0] = wdetJ[i] * v_loc[0];
26 782712 : v[i + Q * 1] = wdetJ[i] * v_loc[1];
27 : }
28 9855 : return 0;
29 : }
30 :
31 5010 : CEED_QFUNCTION(f_apply_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q,
32 : const CeedScalar *const *in, CeedScalar *const *out)
33 : {
34 5010 : const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1];
35 5010 : CeedScalar *v = out[0];
36 :
37 400242 : CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
38 : {
39 395232 : const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]};
40 : CeedScalar coeff[4], adjJt_loc[4], J_loc[4], v_loc[2];
41 395232 : CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
42 395232 : MatUnpack22(adjJt + i, Q, adjJt_loc);
43 : AdjJt22(adjJt_loc, J_loc);
44 : MultAtBCx22(adjJt_loc, coeff, J_loc, u_loc, v_loc);
45 :
46 395232 : v[i + Q * 0] = wdetJ[i] * v_loc[0];
47 395232 : v[i + Q * 1] = wdetJ[i] * v_loc[1];
48 : }
49 5010 : return 0;
50 : }
51 :
52 : #endif // PALACE_LIBCEED_HCURL_HDIV_22_QF_H
|