Line data Source code
1 : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 : // SPDX-License-Identifier: Apache-2.0
3 :
4 : #ifndef PALACE_LIBCEED_UTILS_33_QF_H
5 : #define PALACE_LIBCEED_UTILS_33_QF_H
6 :
7 : #ifndef CEED_RUNNING_JIT_PASS
8 : #include <math.h>
9 : #endif
10 :
11 : CEED_QFUNCTION_HELPER CeedScalar DetJ33(const CeedScalar J[9])
12 : {
13 : // J: 0 3 6
14 : // 1 4 7
15 : // 2 5 8
16 : return J[0] * (J[4] * J[8] - J[5] * J[7]) - J[1] * (J[3] * J[8] - J[5] * J[6]) +
17 : J[2] * (J[3] * J[7] - J[4] * J[6]);
18 : }
19 :
20 : template <bool ComputeDet = false>
21 : CEED_QFUNCTION_HELPER CeedScalar AdjJt33(const CeedScalar J[9], CeedScalar adjJt[9])
22 : {
23 : // Compute adj(J)^T / det(J) and store the result.
24 : // J: 0 3 6
25 : // 1 4 7
26 : // 2 5 8
27 72780416 : adjJt[0] = J[4] * J[8] - J[7] * J[5];
28 72780416 : adjJt[3] = J[7] * J[2] - J[1] * J[8];
29 72780416 : adjJt[6] = J[1] * J[5] - J[4] * J[2];
30 72780416 : adjJt[1] = J[6] * J[5] - J[3] * J[8];
31 72780416 : adjJt[4] = J[0] * J[8] - J[6] * J[2];
32 72780416 : adjJt[7] = J[3] * J[2] - J[0] * J[5];
33 72780416 : adjJt[2] = J[3] * J[7] - J[6] * J[4];
34 72780416 : adjJt[5] = J[6] * J[1] - J[0] * J[7];
35 72780416 : adjJt[8] = J[0] * J[4] - J[3] * J[1];
36 29921704 : return ComputeDet ? (J[0] * adjJt[0] + J[1] * adjJt[1] + J[2] * adjJt[2]) : 0.0;
37 : }
38 :
39 : CEED_QFUNCTION_HELPER void MatUnpack33(const CeedScalar *A, const CeedInt A_stride,
40 : CeedScalar A_loc[9])
41 : {
42 93424288 : A_loc[0] = A[A_stride * 0];
43 93424288 : A_loc[1] = A[A_stride * 1];
44 93424288 : A_loc[2] = A[A_stride * 2];
45 93424288 : A_loc[3] = A[A_stride * 3];
46 93424288 : A_loc[4] = A[A_stride * 4];
47 93424288 : A_loc[5] = A[A_stride * 5];
48 93424288 : A_loc[6] = A[A_stride * 6];
49 93424288 : A_loc[7] = A[A_stride * 7];
50 93424288 : A_loc[8] = A[A_stride * 8];
51 : }
52 :
53 : CEED_QFUNCTION_HELPER void MultBx33(const CeedScalar B[9], const CeedScalar x[3],
54 : CeedScalar y[3])
55 : {
56 : // B: 0 3 6
57 : // 1 4 7
58 : // 2 5 8
59 : y[0] = B[0] * x[0] + B[3] * x[1] + B[6] * x[2];
60 : y[1] = B[1] * x[0] + B[4] * x[1] + B[7] * x[2];
61 : y[2] = B[2] * x[0] + B[5] * x[1] + B[8] * x[2];
62 : }
63 :
64 : CEED_QFUNCTION_HELPER void MultAtBCx33(const CeedScalar A[9], const CeedScalar B[9],
65 : const CeedScalar C[9], const CeedScalar x[3],
66 : CeedScalar y[3])
67 : {
68 : // A, B, C: 0 3 6
69 : // 1 4 7
70 : // 2 5 8
71 : CeedScalar z[3];
72 :
73 61355032 : y[0] = C[0] * x[0] + C[3] * x[1] + C[6] * x[2];
74 61355032 : y[1] = C[1] * x[0] + C[4] * x[1] + C[7] * x[2];
75 61355032 : y[2] = C[2] * x[0] + C[5] * x[1] + C[8] * x[2];
76 :
77 61355032 : z[0] = B[0] * y[0] + B[3] * y[1] + B[6] * y[2];
78 61355032 : z[1] = B[1] * y[0] + B[4] * y[1] + B[7] * y[2];
79 61355032 : z[2] = B[2] * y[0] + B[5] * y[1] + B[8] * y[2];
80 :
81 61355032 : y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2];
82 61355032 : y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2];
83 61355032 : y[2] = A[6] * z[0] + A[7] * z[1] + A[8] * z[2];
84 : }
85 :
86 : CEED_QFUNCTION_HELPER void MultBAx33(const CeedScalar A[9], const CeedScalar B[9],
87 : const CeedScalar x[3], CeedScalar y[3])
88 : {
89 : // A, B: 0 3 6
90 : // 1 4 7
91 : // 2 5 8
92 : CeedScalar z[3];
93 :
94 2216576 : z[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2];
95 2216576 : z[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2];
96 2216576 : z[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2];
97 :
98 2216576 : y[0] = B[0] * z[0] + B[3] * z[1] + B[6] * z[2];
99 2216576 : y[1] = B[1] * z[0] + B[4] * z[1] + B[7] * z[2];
100 2216576 : y[2] = B[2] * z[0] + B[5] * z[1] + B[8] * z[2];
101 : }
102 :
103 : CEED_QFUNCTION_HELPER void MultAtBA33(const CeedScalar A[9], const CeedScalar B[9],
104 : CeedScalar C[9])
105 : {
106 : // A, B, C: 0 3 6
107 : // 1 4 7
108 : // 2 5 8
109 :
110 : // First compute entries of R = B A.
111 0 : const CeedScalar R11 = B[0] * A[0] + B[3] * A[1] + B[6] * A[2];
112 0 : const CeedScalar R21 = B[1] * A[0] + B[4] * A[1] + B[7] * A[2];
113 0 : const CeedScalar R31 = B[2] * A[0] + B[5] * A[1] + B[8] * A[2];
114 0 : const CeedScalar R12 = B[0] * A[3] + B[3] * A[4] + B[6] * A[5];
115 0 : const CeedScalar R22 = B[1] * A[3] + B[4] * A[4] + B[7] * A[5];
116 0 : const CeedScalar R32 = B[2] * A[3] + B[5] * A[4] + B[8] * A[5];
117 0 : const CeedScalar R13 = B[0] * A[6] + B[3] * A[7] + B[6] * A[8];
118 0 : const CeedScalar R23 = B[1] * A[6] + B[4] * A[7] + B[7] * A[8];
119 0 : const CeedScalar R33 = B[2] * A[6] + B[5] * A[7] + B[8] * A[8];
120 :
121 0 : C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
122 0 : C[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
123 0 : C[2] = A[6] * R11 + A[7] * R21 + A[8] * R31;
124 0 : C[3] = A[0] * R12 + A[1] * R22 + A[2] * R32;
125 0 : C[4] = A[3] * R12 + A[4] * R22 + A[5] * R32;
126 0 : C[5] = A[6] * R12 + A[7] * R22 + A[8] * R32;
127 0 : C[6] = A[0] * R13 + A[1] * R23 + A[2] * R33;
128 0 : C[7] = A[3] * R13 + A[4] * R23 + A[5] * R33;
129 0 : C[8] = A[6] * R13 + A[7] * R23 + A[8] * R33;
130 : }
131 :
132 : CEED_QFUNCTION_HELPER void MultAtBC33(const CeedScalar A[9], const CeedScalar B[9],
133 : const CeedScalar C[9], CeedScalar D[9])
134 : {
135 : // A, B, C, D: 0 3 6
136 : // 1 4 7
137 : // 2 5 8
138 :
139 : // First compute entries of R = B C.
140 0 : const CeedScalar R11 = B[0] * C[0] + B[3] * C[1] + B[6] * C[2];
141 0 : const CeedScalar R21 = B[1] * C[0] + B[4] * C[1] + B[7] * C[2];
142 0 : const CeedScalar R31 = B[2] * C[0] + B[5] * C[1] + B[8] * C[2];
143 0 : const CeedScalar R12 = B[0] * C[3] + B[3] * C[4] + B[6] * C[5];
144 0 : const CeedScalar R22 = B[1] * C[3] + B[4] * C[4] + B[7] * C[5];
145 0 : const CeedScalar R32 = B[2] * C[3] + B[5] * C[4] + B[8] * C[5];
146 0 : const CeedScalar R13 = B[0] * C[6] + B[3] * C[7] + B[6] * C[8];
147 0 : const CeedScalar R23 = B[1] * C[6] + B[4] * C[7] + B[7] * C[8];
148 0 : const CeedScalar R33 = B[2] * C[6] + B[5] * C[7] + B[8] * C[8];
149 :
150 0 : D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
151 0 : D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
152 0 : D[2] = A[6] * R11 + A[7] * R21 + A[8] * R31;
153 0 : D[3] = A[0] * R12 + A[1] * R22 + A[2] * R32;
154 0 : D[4] = A[3] * R12 + A[4] * R22 + A[5] * R32;
155 0 : D[5] = A[6] * R12 + A[7] * R22 + A[8] * R32;
156 0 : D[6] = A[0] * R13 + A[1] * R23 + A[2] * R33;
157 0 : D[7] = A[3] * R13 + A[4] * R23 + A[5] * R33;
158 0 : D[8] = A[6] * R13 + A[7] * R23 + A[8] * R33;
159 : }
160 :
161 : CEED_QFUNCTION_HELPER void MultBA33(const CeedScalar A[9], const CeedScalar B[9],
162 : CeedScalar C[9])
163 : {
164 : // A, B, C: 0 3 6
165 : // 1 4 7
166 : // 2 5 8
167 0 : C[0] = B[0] * A[0] + B[3] * A[1] + B[6] * A[2];
168 0 : C[1] = B[1] * A[0] + B[4] * A[1] + B[7] * A[2];
169 0 : C[2] = B[2] * A[0] + B[5] * A[1] + B[8] * A[2];
170 0 : C[3] = B[0] * A[3] + B[3] * A[4] + B[6] * A[5];
171 0 : C[4] = B[1] * A[3] + B[4] * A[4] + B[7] * A[5];
172 0 : C[5] = B[2] * A[3] + B[5] * A[4] + B[8] * A[5];
173 0 : C[6] = B[0] * A[6] + B[3] * A[7] + B[6] * A[8];
174 0 : C[7] = B[1] * A[6] + B[4] * A[7] + B[7] * A[8];
175 0 : C[8] = B[2] * A[6] + B[5] * A[7] + B[8] * A[8];
176 : }
177 :
178 : #endif // PALACE_LIBCEED_UTILS_33_QF_H
|