Line data Source code
1 : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 : // SPDX-License-Identifier: Apache-2.0
3 :
4 : #ifndef PALACE_LIBCEED_UTILS_32_QF_H
5 : #define PALACE_LIBCEED_UTILS_32_QF_H
6 :
7 : #ifndef CEED_RUNNING_JIT_PASS
8 : #include <math.h>
9 : #endif
10 :
11 : CEED_QFUNCTION_HELPER CeedScalar DetJ32(const CeedScalar J[6])
12 : {
13 : // J: 0 3
14 : // 1 4
15 : // 2 5
16 : const CeedScalar E = J[0] * J[0] + J[1] * J[1] + J[2] * J[2];
17 : const CeedScalar G = J[3] * J[3] + J[4] * J[4] + J[5] * J[5];
18 : const CeedScalar F = J[0] * J[3] + J[1] * J[4] + J[2] * J[5];
19 : return sqrt(E * G - F * F);
20 : }
21 :
22 : template <bool ComputeDet = false>
23 : CEED_QFUNCTION_HELPER CeedScalar AdjJt32(const CeedScalar J[6], CeedScalar adjJt[6])
24 : {
25 : // Compute adj(J)^T / det(J) and store the result.
26 : // J: 0 3
27 : // 1 4
28 : // 2 5
29 4746600 : const CeedScalar E = J[0] * J[0] + J[1] * J[1] + J[2] * J[2];
30 4746600 : const CeedScalar G = J[3] * J[3] + J[4] * J[4] + J[5] * J[5];
31 4746600 : const CeedScalar F = J[0] * J[3] + J[1] * J[4] + J[2] * J[5];
32 4746600 : const CeedScalar d = sqrt(E * G - F * F);
33 4746600 : adjJt[0] = (G * J[0] - F * J[3]) / d;
34 4746600 : adjJt[1] = (G * J[1] - F * J[4]) / d;
35 4746600 : adjJt[2] = (G * J[2] - F * J[5]) / d;
36 4746600 : adjJt[3] = (E * J[3] - F * J[0]) / d;
37 4746600 : adjJt[4] = (E * J[4] - F * J[1]) / d;
38 4746600 : adjJt[5] = (E * J[5] - F * J[2]) / d;
39 : return ComputeDet ? d : 0.0;
40 : }
41 :
42 : CEED_QFUNCTION_HELPER void MatUnpack32(const CeedScalar *A, const CeedInt A_stride,
43 : CeedScalar A_loc[6])
44 : {
45 6856008 : A_loc[0] = A[A_stride * 0];
46 6856008 : A_loc[1] = A[A_stride * 1];
47 6856008 : A_loc[2] = A[A_stride * 2];
48 6856008 : A_loc[3] = A[A_stride * 3];
49 6856008 : A_loc[4] = A[A_stride * 4];
50 6856008 : A_loc[5] = A[A_stride * 5];
51 : }
52 :
53 : CEED_QFUNCTION_HELPER void MultAtBCx32(const CeedScalar A[6], const CeedScalar B[9],
54 : const CeedScalar C[6], const CeedScalar x[2],
55 : CeedScalar y[2])
56 : {
57 : // A, C: 0 3 B: 0 3 6
58 : // 1 4 1 4 7
59 : // 2 5 2 5 8
60 : CeedScalar z[3], t;
61 :
62 2109408 : y[0] = C[0] * x[0] + C[3] * x[1];
63 2109408 : y[1] = C[1] * x[0] + C[4] * x[1];
64 2109408 : t = C[2] * x[0] + C[5] * x[1];
65 :
66 2109408 : z[0] = B[0] * y[0] + B[3] * y[1] + B[6] * t;
67 2109408 : z[1] = B[1] * y[0] + B[4] * y[1] + B[7] * t;
68 2109408 : z[2] = B[2] * y[0] + B[5] * y[1] + B[8] * t;
69 :
70 2109408 : y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2];
71 2109408 : y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2];
72 : }
73 :
74 : CEED_QFUNCTION_HELPER void MultBAx32(const CeedScalar A[6], const CeedScalar B[9],
75 : const CeedScalar x[2], CeedScalar y[3])
76 : {
77 : // A: 0 3 B: 0 3 6
78 : // 1 4 1 4 7
79 : // 2 5 2 5 8
80 : CeedScalar z[3];
81 :
82 0 : z[0] = A[0] * x[0] + A[3] * x[1];
83 0 : z[1] = A[1] * x[0] + A[4] * x[1];
84 0 : z[2] = A[2] * x[0] + A[5] * x[1];
85 :
86 0 : y[0] = B[0] * z[0] + B[3] * z[1] + B[6] * z[2];
87 0 : y[1] = B[1] * z[0] + B[4] * z[1] + B[7] * z[2];
88 0 : y[2] = B[2] * z[0] + B[5] * z[1] + B[8] * z[2];
89 : }
90 :
91 : CEED_QFUNCTION_HELPER void MultAtBA32(const CeedScalar A[6], const CeedScalar B[9],
92 : CeedScalar C[4])
93 : {
94 : // A: 0 3 B: 0 3 6 C: 0 2
95 : // 1 4 1 4 7 1 3
96 : // 2 5 2 5 8
97 :
98 : // First compute entries of R = B A.
99 0 : const CeedScalar R11 = B[0] * A[0] + B[3] * A[1] + B[6] * A[2];
100 0 : const CeedScalar R21 = B[1] * A[0] + B[4] * A[1] + B[7] * A[2];
101 0 : const CeedScalar R31 = B[2] * A[0] + B[5] * A[1] + B[8] * A[2];
102 0 : const CeedScalar R12 = B[0] * A[3] + B[3] * A[4] + B[6] * A[5];
103 0 : const CeedScalar R22 = B[1] * A[3] + B[4] * A[4] + B[7] * A[5];
104 0 : const CeedScalar R32 = B[2] * A[3] + B[5] * A[4] + B[8] * A[5];
105 :
106 0 : C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
107 0 : C[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
108 0 : C[2] = A[0] * R12 + A[1] * R22 + A[2] * R32;
109 0 : C[3] = A[3] * R12 + A[4] * R22 + A[5] * R32;
110 : }
111 :
112 : CEED_QFUNCTION_HELPER void MultAtBC32(const CeedScalar A[6], const CeedScalar B[9],
113 : const CeedScalar C[6], CeedScalar D[4])
114 : {
115 : // A, C: 0 3 B: 0 3 6 D: 0 2
116 : // 1 4 1 4 7 1 3
117 : // 2 5 2 5 8
118 :
119 : // First compute entries of R = B C.
120 0 : const CeedScalar R11 = B[0] * C[0] + B[3] * C[1] + B[6] * C[2];
121 0 : const CeedScalar R21 = B[1] * C[0] + B[4] * C[1] + B[7] * C[2];
122 0 : const CeedScalar R31 = B[2] * C[0] + B[5] * C[1] + B[8] * C[2];
123 0 : const CeedScalar R12 = B[0] * C[3] + B[3] * C[4] + B[6] * C[5];
124 0 : const CeedScalar R22 = B[1] * C[3] + B[4] * C[4] + B[7] * C[5];
125 0 : const CeedScalar R32 = B[2] * C[3] + B[5] * C[4] + B[8] * C[5];
126 :
127 0 : D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31;
128 0 : D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31;
129 0 : D[2] = A[0] * R12 + A[1] * R22 + A[2] * R32;
130 0 : D[3] = A[3] * R12 + A[4] * R22 + A[5] * R32;
131 : }
132 :
133 : CEED_QFUNCTION_HELPER void MultBA32(const CeedScalar A[6], const CeedScalar B[9],
134 : CeedScalar C[6])
135 : {
136 : // A, C: 0 3 B: 0 3 6
137 : // 1 4 1 4 7
138 : // 2 5 2 5 8
139 0 : C[0] = B[0] * A[0] + B[3] * A[1] + B[6] * A[2];
140 0 : C[1] = B[1] * A[0] + B[4] * A[1] + B[7] * A[2];
141 0 : C[2] = B[2] * A[0] + B[5] * A[1] + B[8] * A[2];
142 0 : C[3] = B[0] * A[3] + B[3] * A[4] + B[6] * A[5];
143 0 : C[4] = B[1] * A[3] + B[4] * A[4] + B[7] * A[5];
144 0 : C[5] = B[2] * A[3] + B[5] * A[4] + B[8] * A[5];
145 : }
146 :
147 : #endif // PALACE_LIBCEED_UTILS_32_QF_H
|