Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_mulcmatvec.c
1/*
2 * Copyright 2011-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : math/NE10_mulcmatvec.neon.s
30 */
31
32#include "NE10_types.h"
33#include "macros.h"
34
35#include <assert.h>
36
37ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_c (ne10_vec2f_t * dst, const ne10_mat2x2f_t * cst, ne10_vec2f_t * src, ne10_uint32_t count)
38{
39#define A1 cst->c1.r1
40#define B1 cst->c1.r2
41#define C1 cst->c2.r1
42#define D1 cst->c2.r2
43
44 NE10_CMATVEC_OPERATION_X_C
45 (
46 dst[ itr ].x = A1 * src[ itr ].x + C1 * src[ itr ].y;
47 dst[ itr ].y = B1 * src[ itr ].x + D1 * src[ itr ].y;
48 );
49
50#undef A1
51#undef B1
52#undef C1
53#undef D1
54}
55
56ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_c (ne10_vec3f_t * dst, const ne10_mat3x3f_t * cst, ne10_vec3f_t * src, ne10_uint32_t count)
57{
58#define A1 cst->c1.r1
59#define B1 cst->c1.r2
60#define C1 cst->c1.r3
61#define D1 cst->c2.r1
62#define E1 cst->c2.r2
63#define F1 cst->c2.r3
64#define G1 cst->c3.r1
65#define H1 cst->c3.r2
66#define I1 cst->c3.r3
67
68 NE10_CMATVEC_OPERATION_X_C
69 (
70 dst[ itr ].x = A1 * src[ itr ].x + D1 * src[ itr ].y + G1 * src[ itr ].z;
71 dst[ itr ].y = B1 * src[ itr ].x + E1 * src[ itr ].y + H1 * src[ itr ].z;
72 dst[ itr ].z = C1 * src[ itr ].x + F1 * src[ itr ].y + I1 * src[ itr ].z;
73 );
74
75#undef A1
76#undef B1
77#undef C1
78#undef D1
79#undef E1
80#undef F1
81#undef G1
82#undef H1
83#undef I1
84}
85
86extern ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_c (ne10_vec4f_t * dst, const ne10_mat4x4f_t * cst, ne10_vec4f_t * src, ne10_uint32_t count)
87{
88#define A1 cst->c1.r1
89#define B1 cst->c1.r2
90#define C1 cst->c1.r3
91#define D1 cst->c1.r4
92#define E1 cst->c2.r1
93#define F1 cst->c2.r2
94#define G1 cst->c2.r3
95#define H1 cst->c2.r4
96#define I1 cst->c3.r1
97#define J1 cst->c3.r2
98#define K1 cst->c3.r3
99#define L1 cst->c3.r4
100#define M1 cst->c4.r1
101#define N1 cst->c4.r2
102#define O1 cst->c4.r3
103#define P1 cst->c4.r4
104
105 NE10_CMATVEC_OPERATION_X_C
106 (
107 dst[ itr ].x = A1 * src[ itr ].x + E1 * src[ itr ].y + I1 * src[ itr ].z + M1 * src[ itr ].w;
108 dst[ itr ].y = B1 * src[ itr ].x + F1 * src[ itr ].y + J1 * src[ itr ].z + N1 * src[ itr ].w;
109 dst[ itr ].z = C1 * src[ itr ].x + G1 * src[ itr ].y + K1 * src[ itr ].z + O1 * src[ itr ].w;
110 dst[ itr ].w = D1 * src[ itr ].x + H1 * src[ itr ].y + L1 * src[ itr ].z + P1 * src[ itr ].w;
111 );
112
113#undef A1
114#undef B1
115#undef C1
116#undef D1
117#undef E1
118#undef F1
119#undef G1
120#undef H1
121#undef I1
122#undef J1
123#undef K1
124#undef L1
125#undef M1
126#undef N1
127#undef O1
128#undef P1
129}
a 2-tuple of ne10_float32_t values.
Definition NE10_types.h:88
a 3-tuple of ne10_float32_t values.
Definition NE10_types.h:97
a 4-tuple of ne10_float32_t values.
Definition NE10_types.h:107