Line data Source code
1 : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 : // SPDX-License-Identifier: Apache-2.0
3 :
4 : #ifndef PALACE_LIBCEED_HDIV_MASS_33_QF_H
5 : #define PALACE_LIBCEED_HDIV_MASS_33_QF_H
6 :
7 : #include "../coeff/coeff_3_qf.h"
8 : #include "utils_33_qf.h"
9 :
10 132 : CEED_QFUNCTION(f_apply_hdivmass_33)(void *__restrict__ ctx, CeedInt Q,
11 : const CeedScalar *const *in, CeedScalar *const *out)
12 : {
13 132 : const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1],
14 132 : *curlu = in[2];
15 132 : CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1];
16 :
17 69156 : CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++)
18 : {
19 : CeedScalar adjJt_loc[9];
20 69024 : MatUnpack33(adjJt + i, Q, adjJt_loc);
21 : {
22 69024 : const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]};
23 : CeedScalar coeff[9], v_loc[3];
24 69024 : CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff);
25 : MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc);
26 :
27 69024 : v[i + Q * 0] = wdetJ[i] * v_loc[0];
28 69024 : v[i + Q * 1] = wdetJ[i] * v_loc[1];
29 69024 : v[i + Q * 2] = wdetJ[i] * v_loc[2];
30 : }
31 : {
32 69024 : const CeedScalar u_loc[3] = {curlu[i + Q * 0], curlu[i + Q * 1], curlu[i + Q * 2]};
33 : CeedScalar coeff[9], J_loc[9], v_loc[3];
34 69024 : CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff);
35 : AdjJt33(adjJt_loc, J_loc);
36 : MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc);
37 :
38 69024 : curlv[i + Q * 0] = wdetJ[i] * v_loc[0];
39 69024 : curlv[i + Q * 1] = wdetJ[i] * v_loc[1];
40 69024 : curlv[i + Q * 2] = wdetJ[i] * v_loc[2];
41 : }
42 : }
43 132 : return 0;
44 : }
45 :
46 : #endif // PALACE_LIBCEED_HDIV_MASS_33_QF_H
|