32#include "NE10_types.h"
37ne10_result_t ne10_mulmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count)
39#define A1 src1[ itr ].c1.r1
40#define A2 src2[ itr ].c1.r1
41#define B1 src1[ itr ].c1.r2
42#define B2 src2[ itr ].c1.r2
43#define C1 src1[ itr ].c2.r1
44#define C2 src2[ itr ].c2.r1
45#define D1 src1[ itr ].c2.r2
46#define D2 src2[ itr ].c2.r2
48 NE10_X_OPERATION_FLOAT_C
50 dst[ itr ].c1.r1 = (A1 * A2) + (C1 * B2);
51 dst[ itr ].c1.r2 = (B1 * A2) + (D1 * B2);
53 dst[ itr ].c2.r1 = (A1 * C2) + (C1 * D2);
54 dst[ itr ].c2.r2 = (B1 * C2) + (D1 * D2);
67ne10_result_t ne10_mulmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count)
69#define A1 src1[ itr ].c1.r1
70#define A2 src2[ itr ].c1.r1
71#define B1 src1[ itr ].c1.r2
72#define B2 src2[ itr ].c1.r2
73#define C1 src1[ itr ].c1.r3
74#define C2 src2[ itr ].c1.r3
75#define D1 src1[ itr ].c2.r1
76#define D2 src2[ itr ].c2.r1
77#define E1 src1[ itr ].c2.r2
78#define E2 src2[ itr ].c2.r2
79#define F1 src1[ itr ].c2.r3
80#define F2 src2[ itr ].c2.r3
81#define G1 src1[ itr ].c3.r1
82#define G2 src2[ itr ].c3.r1
83#define H1 src1[ itr ].c3.r2
84#define H2 src2[ itr ].c3.r2
85#define I1 src1[ itr ].c3.r3
86#define I2 src2[ itr ].c3.r3
88 NE10_X_OPERATION_FLOAT_C
90 dst[ itr ].c1.r1 = (A1 * A2) + (D1 * B2) + (G1 * C2);
91 dst[ itr ].c1.r2 = (B1 * A2) + (E1 * B2) + (H1 * C2);
92 dst[ itr ].c1.r3 = (C1 * A2) + (F1 * B2) + (I1 * C2);
94 dst[ itr ].c2.r1 = (A1 * D2) + (D1 * E2) + (G1 * F2);
95 dst[ itr ].c2.r2 = (B1 * D2) + (E1 * E2) + (H1 * F2);
96 dst[ itr ].c2.r3 = (C1 * D2) + (F1 * E2) + (I1 * F2);
98 dst[ itr ].c3.r1 = (A1 * G2) + (D1 * H2) + (G1 * I2);
99 dst[ itr ].c3.r2 = (B1 * G2) + (E1 * H2) + (H1 * I2);
100 dst[ itr ].c3.r3 = (C1 * G2) + (F1 * H2) + (I1 * I2);
123ne10_result_t ne10_mulmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count)
125#define A1 src1[ itr ].c1.r1
126#define A2 src2[ itr ].c1.r1
127#define B1 src1[ itr ].c1.r2
128#define B2 src2[ itr ].c1.r2
129#define C1 src1[ itr ].c1.r3
130#define C2 src2[ itr ].c1.r3
131#define D1 src1[ itr ].c1.r4
132#define D2 src2[ itr ].c1.r4
134#define E1 src1[ itr ].c2.r1
135#define E2 src2[ itr ].c2.r1
136#define F1 src1[ itr ].c2.r2
137#define F2 src2[ itr ].c2.r2
138#define G1 src1[ itr ].c2.r3
139#define G2 src2[ itr ].c2.r3
140#define H1 src1[ itr ].c2.r4
141#define H2 src2[ itr ].c2.r4
143#define I1 src1[ itr ].c3.r1
144#define I2 src2[ itr ].c3.r1
145#define J1 src1[ itr ].c3.r2
146#define J2 src2[ itr ].c3.r2
147#define K1 src1[ itr ].c3.r3
148#define K2 src2[ itr ].c3.r3
149#define L1 src1[ itr ].c3.r4
150#define L2 src2[ itr ].c3.r4
152#define M1 src1[ itr ].c4.r1
153#define M2 src2[ itr ].c4.r1
154#define N1 src1[ itr ].c4.r2
155#define N2 src2[ itr ].c4.r2
156#define O1 src1[ itr ].c4.r3
157#define O2 src2[ itr ].c4.r3
158#define P1 src1[ itr ].c4.r4
159#define P2 src2[ itr ].c4.r4
161 NE10_X_OPERATION_FLOAT_C
163 dst[ itr ].c1.r1 = (A1 * A2) + (E1 * B2) + (I1 * C2) + (M1 * D2);
164 dst[ itr ].c1.r2 = (B1 * A2) + (F1 * B2) + (J1 * C2) + (N1 * D2);
165 dst[ itr ].c1.r3 = (C1 * A2) + (G1 * B2) + (K1 * C2) + (O1 * D2);
166 dst[ itr ].c1.r4 = (D1 * A2) + (H1 * B2) + (L1 * C2) + (P1 * D2);
168 dst[ itr ].c2.r1 = (A1 * E2) + (E1 * F2) + (I1 * G2) + (M1 * H2);
169 dst[ itr ].c2.r2 = (B1 * E2) + (F1 * F2) + (J1 * G2) + (N1 * H2);
170 dst[ itr ].c2.r3 = (C1 * E2) + (G1 * F2) + (K1 * G2) + (O1 * H2);
171 dst[ itr ].c2.r4 = (D1 * E2) + (H1 * F2) + (L1 * G2) + (P1 * H2);
173 dst[ itr ].c3.r1 = (A1 * I2) + (E1 * J2) + (I1 * K2) + (M1 * L2);
174 dst[ itr ].c3.r2 = (B1 * I2) + (F1 * J2) + (J1 * K2) + (N1 * L2);
175 dst[ itr ].c3.r3 = (C1 * I2) + (G1 * J2) + (K1 * K2) + (O1 * L2);
176 dst[ itr ].c3.r4 = (D1 * I2) + (H1 * J2) + (L1 * K2) + (P1 * L2);
178 dst[ itr ].c4.r1 = (A1 * M2) + (E1 * N2) + (I1 * O2) + (M1 * P2);
179 dst[ itr ].c4.r2 = (B1 * M2) + (F1 * N2) + (J1 * O2) + (N1 * P2);
180 dst[ itr ].c4.r3 = (C1 * M2) + (G1 * N2) + (K1 * O2) + (O1 * P2);
181 dst[ itr ].c4.r4 = (D1 * M2) + (H1 * N2) + (L1 * O2) + (P1 * P2);