Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_fft_int16.c
1/*
2 * Copyright 2013-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* license of Kiss FFT */
29/*
30Copyright (c) 2003-2010, Mark Borgerding
31
32All rights reserved.
33
34Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35
36 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41*/
42
43/*
44 * NE10 Library : dsp/NE10_fft_int16.c
45 */
46
47#include "NE10_types.h"
48#include "NE10_macros.h"
49#include "NE10_fft.h"
50
51
52static void ne10_mixed_radix_butterfly_int16_c (ne10_fft_cpx_int16_t * Fout,
54 ne10_int32_t * factors,
55 ne10_fft_cpx_int16_t * twiddles,
56 ne10_fft_cpx_int16_t * buffer,
57 ne10_int32_t scaled_flag)
58{
59 ne10_int32_t fstride, mstride, N;
60 ne10_int32_t fstride1;
61 ne10_int32_t f_count, m_count;
62 ne10_int32_t stage_count;
63
64 ne10_fft_cpx_int16_t scratch_in[8];
65 ne10_fft_cpx_int16_t scratch_out[8];
66 ne10_fft_cpx_int16_t scratch[16];
67 ne10_fft_cpx_int16_t scratch_tw[6];
68
69 ne10_fft_cpx_int16_t *Fin1, *Fin2, *Fout1, *Fout2;
70 ne10_fft_cpx_int16_t *Fout_ls = Fout;
72 ne10_fft_cpx_int16_t *tw, *tw1, *tw2;
73 const ne10_int32_t TW_81 = 23169;
74 const ne10_int32_t TW_81N = -23169;
75
76
77
78 // init fstride, mstride, N, tw
79 stage_count = factors[0];
80 fstride = factors[1];
81 mstride = factors[ (stage_count << 1) - 1 ];
82 N = factors[ stage_count << 1 ]; // radix
83 tw = twiddles;
84
85 // the first stage
86 Fin1 = Fin;
87 Fout1 = Fout;
88 if (N == 2) // length of FFT is 2^n (n is odd)
89 {
90 // radix 8
91 N = fstride >> 1; // 1/4 of length of FFT
92 fstride1 = fstride >> 2;
93
94 Fin1 = Fin;
95 for (f_count = 0; f_count < fstride1; f_count ++)
96 {
97 Fout1 = & Fout[ f_count * 8 ];
98 // load
99 if (scaled_flag == 1)
100 {
101 NE10_F2I16_FIXDIV (Fin1[0], 8);
102 NE10_F2I16_FIXDIV (Fin1[0 + fstride], 8);
103 NE10_F2I16_FIXDIV (Fin1[fstride1], 8);
104 NE10_F2I16_FIXDIV (Fin1[fstride1 + fstride], 8);
105 NE10_F2I16_FIXDIV (Fin1[fstride1 * 2], 8);
106 NE10_F2I16_FIXDIV (Fin1[fstride1 * 2 + fstride], 8);
107 NE10_F2I16_FIXDIV (Fin1[fstride1 * 3], 8);
108 NE10_F2I16_FIXDIV (Fin1[fstride1 * 3 + fstride], 8);
109 }
110 scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
111 scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
112 scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
113 scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
114 scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
115 scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
116 scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
117 scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
118 scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
119 scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
120 scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
121 scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
122 scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
123 scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
124 scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
125 scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
126
127 // radix 4 butterfly without twiddles
128 scratch[0] = scratch_in[0];
129 scratch[1] = scratch_in[1];
130
131 scratch[2] = scratch_in[2];
132 scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].r + scratch_in[3].i) * TW_81) >> NE10_F2I16_SHIFT);
133 scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].i - scratch_in[3].r) * TW_81) >> NE10_F2I16_SHIFT);
134
135 scratch[4] = scratch_in[4];
136 scratch[5].r = scratch_in[5].i;
137 scratch[5].i = -scratch_in[5].r;
138
139 scratch[6].r = scratch_in[6].r;
140 scratch[6].i = scratch_in[6].i;
141 scratch[7].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].r - scratch_in[7].i) * TW_81N) >> NE10_F2I16_SHIFT);
142 scratch[7].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].i + scratch_in[7].r) * TW_81N) >> NE10_F2I16_SHIFT);
143
144 // radix 2 butterfly
145 scratch[8].r = scratch[0].r + scratch[4].r;
146 scratch[8].i = scratch[0].i + scratch[4].i;
147 scratch[9].r = scratch[1].r + scratch[5].r;
148 scratch[9].i = scratch[1].i + scratch[5].i;
149
150 scratch[10].r = scratch[0].r - scratch[4].r;
151 scratch[10].i = scratch[0].i - scratch[4].i;
152 scratch[11].r = scratch[1].r - scratch[5].r;
153 scratch[11].i = scratch[1].i - scratch[5].i;
154
155 // radix 2 butterfly
156 scratch[12].r = scratch[2].r + scratch[6].r;
157 scratch[12].i = scratch[2].i + scratch[6].i;
158 scratch[13].r = scratch[3].r + scratch[7].r;
159 scratch[13].i = scratch[3].i + scratch[7].i;
160
161 scratch[14].r = scratch[2].r - scratch[6].r;
162 scratch[14].i = scratch[2].i - scratch[6].i;
163 scratch[15].r = scratch[3].r - scratch[7].r;
164 scratch[15].i = scratch[3].i - scratch[7].i;
165
166 // third result
167 scratch_out[4].r = scratch[8].r - scratch[12].r;
168 scratch_out[4].i = scratch[8].i - scratch[12].i;
169 scratch_out[5].r = scratch[9].r - scratch[13].r;
170 scratch_out[5].i = scratch[9].i - scratch[13].i;
171
172 // first result
173 scratch_out[0].r = scratch[8].r + scratch[12].r;
174 scratch_out[0].i = scratch[8].i + scratch[12].i;
175 scratch_out[1].r = scratch[9].r + scratch[13].r;
176 scratch_out[1].i = scratch[9].i + scratch[13].i;
177
178 // second result
179 scratch_out[2].r = scratch[10].r + scratch[14].i;
180 scratch_out[2].i = scratch[10].i - scratch[14].r;
181 scratch_out[3].r = scratch[11].r + scratch[15].i;
182 scratch_out[3].i = scratch[11].i - scratch[15].r;
183
184 // forth result
185 scratch_out[6].r = scratch[10].r - scratch[14].i;
186 scratch_out[6].i = scratch[10].i + scratch[14].r;
187 scratch_out[7].r = scratch[11].r - scratch[15].i;
188 scratch_out[7].i = scratch[11].i + scratch[15].r;
189
190 // store
191 Fout1[0] = scratch_out[0];
192 Fout1[1] = scratch_out[1];
193 Fout1[2] = scratch_out[2];
194 Fout1[3] = scratch_out[3];
195 Fout1[4] = scratch_out[4];
196 Fout1[5] = scratch_out[5];
197 Fout1[6] = scratch_out[6];
198 Fout1[7] = scratch_out[7];
199
200 Fin1 += 1;
201 } // f_count
202 tw += 6;
203 mstride <<= 2;
204 fstride >>= 4;
205 stage_count -= 2;
206
207 // swap
208 Ftmp = buffer;
209 buffer = Fout;
210 Fout = Ftmp;
211 }
212 else if (N == 4) // length of FFT is 2^n (n is even)
213 {
214 //fstride is nfft>>2
215 for (f_count = fstride; f_count ; f_count --)
216 {
217 // load
218 scratch_in[0] = *Fin1;
219 Fin2 = Fin1 + fstride;
220 scratch_in[1] = *Fin2;
221 Fin2 = Fin2 + fstride;
222 scratch_in[2] = *Fin2;
223 Fin2 = Fin2 + fstride;
224 scratch_in[3] = *Fin2;
225
226 // radix 4 butterfly without twiddles
227 if (scaled_flag == 1)
228 {
229 NE10_F2I16_FIXDIV (scratch_in[0], 4);
230 NE10_F2I16_FIXDIV (scratch_in[1], 4);
231 NE10_F2I16_FIXDIV (scratch_in[2], 4);
232 NE10_F2I16_FIXDIV (scratch_in[3], 4);
233 }
234
235 // radix 2 butterfly
236 scratch[0].r = scratch_in[0].r + scratch_in[2].r;
237 scratch[0].i = scratch_in[0].i + scratch_in[2].i;
238
239 scratch[1].r = scratch_in[0].r - scratch_in[2].r;
240 scratch[1].i = scratch_in[0].i - scratch_in[2].i;
241
242 // radix 2 butterfly
243 scratch[2].r = scratch_in[1].r + scratch_in[3].r;
244 scratch[2].i = scratch_in[1].i + scratch_in[3].i;
245
246 scratch[3].r = scratch_in[1].r - scratch_in[3].r;
247 scratch[3].i = scratch_in[1].i - scratch_in[3].i;
248
249 // third result
250 scratch_out[2].r = scratch[0].r - scratch[2].r;
251 scratch_out[2].i = scratch[0].i - scratch[2].i;
252
253 // first result
254 scratch_out[0].r = scratch[0].r + scratch[2].r;
255 scratch_out[0].i = scratch[0].i + scratch[2].i;
256
257 // second result
258 scratch_out[1].r = scratch[1].r + scratch[3].i;
259 scratch_out[1].i = scratch[1].i - scratch[3].r;
260
261 // forth result
262 scratch_out[3].r = scratch[1].r - scratch[3].i;
263 scratch_out[3].i = scratch[1].i + scratch[3].r;
264
265 // store
266 * Fout1 ++ = scratch_out[0];
267 * Fout1 ++ = scratch_out[1];
268 * Fout1 ++ = scratch_out[2];
269 * Fout1 ++ = scratch_out[3];
270
271 Fin1++;
272 } // f_count
273
274 N = fstride; // 1/4 of length of FFT
275
276 // swap
277 Ftmp = buffer;
278 buffer = Fout;
279 Fout = Ftmp;
280
281 // update address for other stages
282 stage_count--;
283 fstride >>= 2;
284 // end of first stage
285 }
286
287
288 // others but the last one
289 for (; stage_count > 1 ; stage_count--)
290 {
291 Fin1 = buffer;
292 for (f_count = 0; f_count < fstride; f_count ++)
293 {
294 Fout1 = & Fout[ f_count * mstride << 2 ];
295 tw1 = tw;
296 for (m_count = mstride; m_count ; m_count --)
297 {
298 // load
299 scratch_tw[0] = *tw1;
300 tw2 = tw1 + mstride;
301 scratch_tw[1] = *tw2;
302 tw2 += mstride;
303 scratch_tw[2] = *tw2;
304 scratch_in[0] = * Fin1;
305 Fin2 = Fin1 + N;
306 scratch_in[1] = * Fin2;
307 Fin2 += N;
308 scratch_in[2] = * Fin2;
309 Fin2 += N;
310 scratch_in[3] = * Fin2;
311 if (scaled_flag == 1)
312 {
313 NE10_F2I16_FIXDIV (scratch_in[0], 4);
314 NE10_F2I16_FIXDIV (scratch_in[1], 4);
315 NE10_F2I16_FIXDIV (scratch_in[2], 4);
316 NE10_F2I16_FIXDIV (scratch_in[3], 4);
317 }
318
319 // radix 4 butterfly with twiddles
320
321 scratch[0] = scratch_in[0];
322 scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
323 - (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
324 scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
325 + (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
326
327 scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
328 - (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
329 scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
330 + (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
331
332 scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
333 - (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
334 scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
335 + (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
336
337 // radix 2 butterfly
338 scratch[4].r = scratch[0].r + scratch[2].r;
339 scratch[4].i = scratch[0].i + scratch[2].i;
340
341 scratch[5].r = scratch[0].r - scratch[2].r;
342 scratch[5].i = scratch[0].i - scratch[2].i;
343
344 // radix 2 butterfly
345 scratch[6].r = scratch[1].r + scratch[3].r;
346 scratch[6].i = scratch[1].i + scratch[3].i;
347
348 scratch[7].r = scratch[1].r - scratch[3].r;
349 scratch[7].i = scratch[1].i - scratch[3].i;
350
351 // third result
352 scratch_out[2].r = scratch[4].r - scratch[6].r;
353 scratch_out[2].i = scratch[4].i - scratch[6].i;
354
355 // first result
356 scratch_out[0].r = scratch[4].r + scratch[6].r;
357 scratch_out[0].i = scratch[4].i + scratch[6].i;
358
359 // second result
360 scratch_out[1].r = scratch[5].r + scratch[7].i;
361 scratch_out[1].i = scratch[5].i - scratch[7].r;
362
363 // forth result
364 scratch_out[3].r = scratch[5].r - scratch[7].i;
365 scratch_out[3].i = scratch[5].i + scratch[7].r;
366
367 // store
368 *Fout1 = scratch_out[0];
369 Fout2 = Fout1 + mstride;
370 *Fout2 = scratch_out[1];
371 Fout2 += mstride;
372 *Fout2 = scratch_out[2];
373 Fout2 += mstride;
374 *Fout2 = scratch_out[3];
375
376 tw1++;
377 Fin1 ++;
378 Fout1 ++;
379 } // m_count
380 } // f_count
381 tw += mstride * 3;
382 mstride <<= 2;
383 // swap
384 Ftmp = buffer;
385 buffer = Fout;
386 Fout = Ftmp;
387 fstride >>= 2;
388 } // stage_count
389
390 // the last one
391 if (stage_count)
392 {
393 Fin1 = buffer;
394 // if stage count is even, output to the input array
395 Fout1 = Fout_ls;
396
397 for (f_count = 0; f_count < fstride; f_count ++)
398 {
399 tw1 = tw;
400 for (m_count = mstride; m_count ; m_count --)
401 {
402 // load
403 scratch_tw[0] = *tw1;
404 tw2 = tw1 + mstride;
405 scratch_tw[1] = *tw2;
406 tw2 += mstride;
407 scratch_tw[2] = *tw2;
408 scratch_in[0] = * Fin1;
409 Fin2 = Fin1 + N;
410 scratch_in[1] = * Fin2;
411 Fin2 += N;
412 scratch_in[2] = * Fin2;
413 Fin2 += N;
414 scratch_in[3] = * Fin2;
415 if (scaled_flag == 1)
416 {
417 NE10_F2I16_FIXDIV (scratch_in[0], 4);
418 NE10_F2I16_FIXDIV (scratch_in[1], 4);
419 NE10_F2I16_FIXDIV (scratch_in[2], 4);
420 NE10_F2I16_FIXDIV (scratch_in[3], 4);
421 }
422
423 // radix 4 butterfly with twiddles
424
425 scratch[0] = scratch_in[0];
426 scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
427 - (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
428 scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
429 + (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
430
431 scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
432 - (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
433 scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
434 + (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
435
436 scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
437 - (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
438 scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
439 + (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
440
441 // radix 2 butterfly
442 scratch[4].r = scratch[0].r + scratch[2].r;
443 scratch[4].i = scratch[0].i + scratch[2].i;
444
445 scratch[5].r = scratch[0].r - scratch[2].r;
446 scratch[5].i = scratch[0].i - scratch[2].i;
447
448 // radix 2 butterfly
449 scratch[6].r = scratch[1].r + scratch[3].r;
450 scratch[6].i = scratch[1].i + scratch[3].i;
451
452 scratch[7].r = scratch[1].r - scratch[3].r;
453 scratch[7].i = scratch[1].i - scratch[3].i;
454
455 // third result
456 scratch_out[2].r = scratch[4].r - scratch[6].r;
457 scratch_out[2].i = scratch[4].i - scratch[6].i;
458
459 // first result
460 scratch_out[0].r = scratch[4].r + scratch[6].r;
461 scratch_out[0].i = scratch[4].i + scratch[6].i;
462
463 // second result
464 scratch_out[1].r = scratch[5].r + scratch[7].i;
465 scratch_out[1].i = scratch[5].i - scratch[7].r;
466
467 // forth result
468 scratch_out[3].r = scratch[5].r - scratch[7].i;
469 scratch_out[3].i = scratch[5].i + scratch[7].r;
470
471 // store
472 *Fout1 = scratch_out[0];
473 Fout2 = Fout1 + N;
474 *Fout2 = scratch_out[1];
475 Fout2 += N;
476 *Fout2 = scratch_out[2];
477 Fout2 += N;
478 *Fout2 = scratch_out[3];
479
480 tw1 ++;
481 Fin1 ++;
482 Fout1 ++;
483 } // m_count
484 } // f_count
485 } // last stage
486}
487
488static void ne10_mixed_radix_butterfly_inverse_int16_c (ne10_fft_cpx_int16_t * Fout,
490 ne10_int32_t * factors,
491 ne10_fft_cpx_int16_t * twiddles,
492 ne10_fft_cpx_int16_t * buffer,
493 ne10_int32_t scaled_flag)
494{
495 ne10_int32_t fstride, mstride, N;
496 ne10_int32_t fstride1;
497 ne10_int32_t f_count, m_count;
498 ne10_int32_t stage_count;
499
500 ne10_fft_cpx_int16_t scratch_in[8];
501 ne10_fft_cpx_int16_t scratch_out[8];
502 ne10_fft_cpx_int16_t scratch[16];
503 ne10_fft_cpx_int16_t scratch_tw[6];
504
505 ne10_fft_cpx_int16_t *Fin1, *Fin2, *Fout1, *Fout2;
506 ne10_fft_cpx_int16_t *Fout_ls = Fout;
508 ne10_fft_cpx_int16_t *tw, *tw1, *tw2;
509 const ne10_int32_t TW_81 = 23169;
510 const ne10_int32_t TW_81N = -23169;
511
512 // init fstride, mstride, N
513 stage_count = factors[0];
514 fstride = factors[1];
515 mstride = factors[ (stage_count << 1) - 1 ];
516 N = factors[ stage_count << 1 ]; // radix
517 tw = twiddles;
518
519 // the first stage
520 Fin1 = Fin;
521 Fout1 = Fout;
522 if (N == 2) // length of FFT is 2^n (n is odd)
523 {
524 // radix 8
525 N = fstride >> 1; // 1/4 of length of FFT
526 fstride1 = fstride >> 2;
527
528 Fin1 = Fin;
529 for (f_count = 0; f_count < fstride1; f_count ++)
530 {
531 Fout1 = & Fout[ f_count * 8 ];
532 // load
533 if (scaled_flag == 1)
534 {
535 NE10_F2I16_FIXDIV (Fin1[0], 8);
536 NE10_F2I16_FIXDIV (Fin1[0 + fstride], 8);
537 NE10_F2I16_FIXDIV (Fin1[fstride1], 8);
538 NE10_F2I16_FIXDIV (Fin1[fstride1 + fstride], 8);
539 NE10_F2I16_FIXDIV (Fin1[fstride1 * 2], 8);
540 NE10_F2I16_FIXDIV (Fin1[fstride1 * 2 + fstride], 8);
541 NE10_F2I16_FIXDIV (Fin1[fstride1 * 3], 8);
542 NE10_F2I16_FIXDIV (Fin1[fstride1 * 3 + fstride], 8);
543 }
544
545 scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
546 scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
547 scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
548 scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
549 scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
550 scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
551 scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
552 scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
553 scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
554 scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
555 scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
556 scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
557 scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
558 scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
559 scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
560 scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
561
562 // radix 4 butterfly with twiddles
563
564 scratch[0] = scratch_in[0];
565 scratch[1] = scratch_in[1];
566
567 scratch[2] = scratch_in[2];
568 scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].r - scratch_in[3].i) * TW_81) >> NE10_F2I16_SHIFT);
569 scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[3].i + scratch_in[3].r) * TW_81) >> NE10_F2I16_SHIFT);
570
571 scratch[4] = scratch_in[4];
572 scratch[5].r = -scratch_in[5].i;
573 scratch[5].i = scratch_in[5].r;
574
575 scratch[6].r = scratch_in[6].r;
576 scratch[6].i = scratch_in[6].i;
577 scratch[7].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].r + scratch_in[7].i) * TW_81N) >> NE10_F2I16_SHIFT);
578 scratch[7].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) (scratch_in[7].i - scratch_in[7].r) * TW_81N) >> NE10_F2I16_SHIFT);
579
580 // radix 2 butterfly
581 scratch[8].r = scratch[0].r + scratch[4].r;
582 scratch[8].i = scratch[0].i + scratch[4].i;
583 scratch[9].r = scratch[1].r + scratch[5].r;
584 scratch[9].i = scratch[1].i + scratch[5].i;
585
586 scratch[10].r = scratch[0].r - scratch[4].r;
587 scratch[10].i = scratch[0].i - scratch[4].i;
588 scratch[11].r = scratch[1].r - scratch[5].r;
589 scratch[11].i = scratch[1].i - scratch[5].i;
590
591 // radix 2 butterfly
592 scratch[12].r = scratch[2].r + scratch[6].r;
593 scratch[12].i = scratch[2].i + scratch[6].i;
594 scratch[13].r = scratch[3].r + scratch[7].r;
595 scratch[13].i = scratch[3].i + scratch[7].i;
596
597 scratch[14].r = scratch[2].r - scratch[6].r;
598 scratch[14].i = scratch[2].i - scratch[6].i;
599 scratch[15].r = scratch[3].r - scratch[7].r;
600 scratch[15].i = scratch[3].i - scratch[7].i;
601
602 // third result
603 scratch_out[4].r = scratch[8].r - scratch[12].r;
604 scratch_out[4].i = scratch[8].i - scratch[12].i;
605 scratch_out[5].r = scratch[9].r - scratch[13].r;
606 scratch_out[5].i = scratch[9].i - scratch[13].i;
607
608 // first result
609 scratch_out[0].r = scratch[8].r + scratch[12].r;
610 scratch_out[0].i = scratch[8].i + scratch[12].i;
611 scratch_out[1].r = scratch[9].r + scratch[13].r;
612 scratch_out[1].i = scratch[9].i + scratch[13].i;
613
614 // second result
615 scratch_out[2].r = scratch[10].r - scratch[14].i;
616 scratch_out[2].i = scratch[10].i + scratch[14].r;
617 scratch_out[3].r = scratch[11].r - scratch[15].i;
618 scratch_out[3].i = scratch[11].i + scratch[15].r;
619
620 // forth result
621 scratch_out[6].r = scratch[10].r + scratch[14].i;
622 scratch_out[6].i = scratch[10].i - scratch[14].r;
623 scratch_out[7].r = scratch[11].r + scratch[15].i;
624 scratch_out[7].i = scratch[11].i - scratch[15].r;
625
626 // store
627 Fout1[0] = scratch_out[0];
628 Fout1[1] = scratch_out[1];
629 Fout1[2] = scratch_out[2];
630 Fout1[3] = scratch_out[3];
631 Fout1[4] = scratch_out[4];
632 Fout1[5] = scratch_out[5];
633 Fout1[6] = scratch_out[6];
634 Fout1[7] = scratch_out[7];
635
636 Fin1 += 1;
637 } // f_count
638 tw += 6;
639 mstride <<= 2;
640 fstride >>= 4;
641 stage_count -= 2;
642
643 // swap
644 Ftmp = buffer;
645 buffer = Fout;
646 Fout = Ftmp;
647 }
648 else if (N == 4) // length of FFT is 2^n (n is even)
649 {
650 //fstride is nfft>>2
651 for (f_count = fstride; f_count ; f_count --)
652 {
653 // load
654 scratch_in[0] = *Fin1;
655 Fin2 = Fin1 + fstride;
656 scratch_in[1] = *Fin2;
657 Fin2 = Fin2 + fstride;
658 scratch_in[2] = *Fin2;
659 Fin2 = Fin2 + fstride;
660 scratch_in[3] = *Fin2;
661
662 // radix 4 butterfly without twiddles
663 if (scaled_flag == 1)
664 {
665 NE10_F2I16_FIXDIV (scratch_in[0], 4);
666 NE10_F2I16_FIXDIV (scratch_in[1], 4);
667 NE10_F2I16_FIXDIV (scratch_in[2], 4);
668 NE10_F2I16_FIXDIV (scratch_in[3], 4);
669 }
670
671 // radix 2 butterfly
672 scratch[0].r = scratch_in[0].r + scratch_in[2].r;
673 scratch[0].i = scratch_in[0].i + scratch_in[2].i;
674
675 scratch[1].r = scratch_in[0].r - scratch_in[2].r;
676 scratch[1].i = scratch_in[0].i - scratch_in[2].i;
677
678 // radix 2 butterfly
679 scratch[2].r = scratch_in[1].r + scratch_in[3].r;
680 scratch[2].i = scratch_in[1].i + scratch_in[3].i;
681
682 scratch[3].r = scratch_in[1].r - scratch_in[3].r;
683 scratch[3].i = scratch_in[1].i - scratch_in[3].i;
684
685 // third result
686 scratch_out[2].r = scratch[0].r - scratch[2].r;
687 scratch_out[2].i = scratch[0].i - scratch[2].i;
688
689 // first result
690 scratch_out[0].r = scratch[0].r + scratch[2].r;
691 scratch_out[0].i = scratch[0].i + scratch[2].i;
692
693 // second result
694 scratch_out[1].r = scratch[1].r - scratch[3].i;
695 scratch_out[1].i = scratch[1].i + scratch[3].r;
696
697 // forth result
698 scratch_out[3].r = scratch[1].r + scratch[3].i;
699 scratch_out[3].i = scratch[1].i - scratch[3].r;
700
701 // store
702 * Fout1 ++ = scratch_out[0];
703 * Fout1 ++ = scratch_out[1];
704 * Fout1 ++ = scratch_out[2];
705 * Fout1 ++ = scratch_out[3];
706
707 Fin1++;
708 } // f_count
709
710 N = fstride; // 1/4 of length of FFT
711 // update address for other stages
712 stage_count--;
713 fstride >>= 2;
714
715 // swap
716 Ftmp = buffer;
717 buffer = Fout;
718 Fout = Ftmp;
719
720 // end of first stage
721 }
722
723
724 // others but the last one
725 for (; stage_count > 1 ; stage_count--)
726 {
727 Fin1 = buffer;
728 for (f_count = 0; f_count < fstride; f_count ++)
729 {
730 Fout1 = & Fout[ f_count * mstride << 2 ];
731 tw1 = tw;
732 for (m_count = mstride; m_count ; m_count --)
733 {
734 // load
735 scratch_tw[0] = *tw1;
736 tw2 = tw1 + mstride;
737 scratch_tw[1] = *tw2;
738 tw2 += mstride;
739 scratch_tw[2] = *tw2;
740 scratch_in[0] = * Fin1;
741 Fin2 = Fin1 + N;
742 scratch_in[1] = * Fin2;
743 Fin2 += N;
744 scratch_in[2] = * Fin2;
745 Fin2 += N;
746 scratch_in[3] = * Fin2;
747 if (scaled_flag == 1)
748 {
749 NE10_F2I16_FIXDIV (scratch_in[0], 4);
750 NE10_F2I16_FIXDIV (scratch_in[1], 4);
751 NE10_F2I16_FIXDIV (scratch_in[2], 4);
752 NE10_F2I16_FIXDIV (scratch_in[3], 4);
753 }
754
755 // radix 4 butterfly with twiddles
756
757 scratch[0] = scratch_in[0];
758 scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
759 + (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
760 scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
761 - (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
762
763 scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
764 + (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
765 scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
766 - (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
767
768 scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
769 + (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
770 scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
771 - (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
772
773 // radix 2 butterfly
774 scratch[4].r = scratch[0].r + scratch[2].r;
775 scratch[4].i = scratch[0].i + scratch[2].i;
776
777 scratch[5].r = scratch[0].r - scratch[2].r;
778 scratch[5].i = scratch[0].i - scratch[2].i;
779
780 // radix 2 butterfly
781 scratch[6].r = scratch[1].r + scratch[3].r;
782 scratch[6].i = scratch[1].i + scratch[3].i;
783
784 scratch[7].r = scratch[1].r - scratch[3].r;
785 scratch[7].i = scratch[1].i - scratch[3].i;
786
787 // third result
788 scratch_out[2].r = scratch[4].r - scratch[6].r;
789 scratch_out[2].i = scratch[4].i - scratch[6].i;
790
791 // first result
792 scratch_out[0].r = scratch[4].r + scratch[6].r;
793 scratch_out[0].i = scratch[4].i + scratch[6].i;
794
795 // second result
796 scratch_out[1].r = scratch[5].r - scratch[7].i;
797 scratch_out[1].i = scratch[5].i + scratch[7].r;
798
799 // forth result
800 scratch_out[3].r = scratch[5].r + scratch[7].i;
801 scratch_out[3].i = scratch[5].i - scratch[7].r;
802
803 // store
804 *Fout1 = scratch_out[0];
805 Fout2 = Fout1 + mstride;
806 *Fout2 = scratch_out[1];
807 Fout2 += mstride;
808 *Fout2 = scratch_out[2];
809 Fout2 += mstride;
810 *Fout2 = scratch_out[3];
811
812 tw1++;
813 Fin1 ++;
814 Fout1 ++;
815 } // m_count
816 } // f_count
817 tw += mstride * 3;
818 mstride <<= 2;
819 // swap
820 Ftmp = buffer;
821 buffer = Fout;
822 Fout = Ftmp;
823 fstride >>= 2;
824 } // stage_count
825
826 // the last one
827 if (stage_count)
828 {
829 Fin1 = buffer;
830 // if stage count is even, output to the input array
831 Fout1 = Fout_ls;
832
833 for (f_count = 0; f_count < fstride; f_count ++)
834 {
835 tw1 = tw;
836 for (m_count = mstride; m_count ; m_count --)
837 {
838 // load
839 scratch_tw[0] = *tw1;
840 tw2 = tw1 + mstride;
841 scratch_tw[1] = *tw2;
842 tw2 += mstride;
843 scratch_tw[2] = *tw2;
844 scratch_in[0] = * Fin1;
845 Fin2 = Fin1 + N;
846 scratch_in[1] = * Fin2;
847 Fin2 += N;
848 scratch_in[2] = * Fin2;
849 Fin2 += N;
850 scratch_in[3] = * Fin2;
851 if (scaled_flag == 1)
852 {
853 NE10_F2I16_FIXDIV (scratch_in[0], 4);
854 NE10_F2I16_FIXDIV (scratch_in[1], 4);
855 NE10_F2I16_FIXDIV (scratch_in[2], 4);
856 NE10_F2I16_FIXDIV (scratch_in[3], 4);
857 }
858
859 // radix 4 butterfly with twiddles
860
861 scratch[0] = scratch_in[0];
862 scratch[1].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
863 + (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
864 scratch[1].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
865 - (NE10_F2I16_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> NE10_F2I16_SHIFT);
866
867 scratch[2].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
868 + (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
869 scratch[2].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
870 - (NE10_F2I16_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> NE10_F2I16_SHIFT);
871
872 scratch[3].r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
873 + (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
874 scratch[3].i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
875 - (NE10_F2I16_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> NE10_F2I16_SHIFT);
876
877 // radix 2 butterfly
878 scratch[4].r = scratch[0].r + scratch[2].r;
879 scratch[4].i = scratch[0].i + scratch[2].i;
880
881 scratch[5].r = scratch[0].r - scratch[2].r;
882 scratch[5].i = scratch[0].i - scratch[2].i;
883
884 // radix 2 butterfly
885 scratch[6].r = scratch[1].r + scratch[3].r;
886 scratch[6].i = scratch[1].i + scratch[3].i;
887
888 scratch[7].r = scratch[1].r - scratch[3].r;
889 scratch[7].i = scratch[1].i - scratch[3].i;
890
891 // third result
892 scratch_out[2].r = scratch[4].r - scratch[6].r;
893 scratch_out[2].i = scratch[4].i - scratch[6].i;
894
895 // first result
896 scratch_out[0].r = scratch[4].r + scratch[6].r;
897 scratch_out[0].i = scratch[4].i + scratch[6].i;
898
899 // second result
900 scratch_out[1].r = scratch[5].r - scratch[7].i;
901 scratch_out[1].i = scratch[5].i + scratch[7].r;
902
903 // forth result
904 scratch_out[3].r = scratch[5].r + scratch[7].i;
905 scratch_out[3].i = scratch[5].i - scratch[7].r;
906
907 // store
908 *Fout1 = scratch_out[0];
909 Fout2 = Fout1 + N;
910 *Fout2 = scratch_out[1];
911 Fout2 += N;
912 *Fout2 = scratch_out[2];
913 Fout2 += N;
914 *Fout2 = scratch_out[3];
915
916 tw1 ++;
917 Fin1 ++;
918 Fout1 ++;
919 } // m_count
920 } // f_count
921 } // last stage
922}
923
924static void ne10_fft_split_r2c_1d_int16 (ne10_fft_cpx_int16_t *dst,
925 const ne10_fft_cpx_int16_t *src,
926 ne10_fft_cpx_int16_t *twiddles,
927 ne10_int32_t ncfft,
928 ne10_int32_t scaled_flag)
929{
930 ne10_int32_t k;
931 ne10_fft_cpx_int16_t fpnk, fpk, f1k, f2k, tw, tdc;
932
933 tdc.r = src[0].r;
934 tdc.i = src[0].i;
935
936 if (scaled_flag)
937 NE10_F2I16_FIXDIV (tdc, 2);
938
939 dst[0].r = tdc.r + tdc.i;
940 dst[ncfft].r = tdc.r - tdc.i;
941 dst[ncfft].i = dst[0].i = 0;
942
943 for (k = 1; k <= ncfft / 2 ; ++k)
944 {
945 fpk = src[k];
946 fpnk.r = src[ncfft - k].r;
947 fpnk.i = - src[ncfft - k].i;
948 if (scaled_flag)
949 {
950 NE10_F2I16_FIXDIV (fpk, 2);
951 NE10_F2I16_FIXDIV (fpnk, 2);
952 }
953
954 f1k.r = fpk.r + fpnk.r;
955 f1k.i = fpk.i + fpnk.i;
956
957 f2k.r = fpk.r - fpnk.r;
958 f2k.i = fpk.i - fpnk.i;
959
960 tw.r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) f2k.r * (twiddles[k - 1]).r
961 - (NE10_F2I16_SAMPPROD) f2k.i * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
962 tw.i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) f2k.r * (twiddles[k - 1]).i
963 + (NE10_F2I16_SAMPPROD) f2k.i * (twiddles[k - 1]).r) >> NE10_F2I16_SHIFT);
964
965 dst[k].r = (f1k.r + tw.r) >> 1;
966 dst[k].i = (f1k.i + tw.i) >> 1;
967 dst[ncfft - k].r = (f1k.r - tw.r) >> 1;
968 dst[ncfft - k].i = (tw.i - f1k.i) >> 1;
969 }
970}
971
972static void ne10_fft_split_c2r_1d_int16 (ne10_fft_cpx_int16_t *dst,
973 const ne10_fft_cpx_int16_t *src,
974 ne10_fft_cpx_int16_t *twiddles,
975 ne10_int32_t ncfft,
976 ne10_int32_t scaled_flag)
977{
978
979 ne10_int32_t k;
980 ne10_fft_cpx_int16_t fk, fnkc, fek, fok, tmp;
981
982
983 dst[0].r = src[0].r + src[ncfft].r;
984 dst[0].i = src[0].r - src[ncfft].r;
985
986 if (scaled_flag)
987 NE10_F2I16_FIXDIV (dst[0], 2);
988
989 for (k = 1; k <= ncfft / 2; k++)
990 {
991 fk = src[k];
992 fnkc.r = src[ncfft - k].r;
993 fnkc.i = -src[ncfft - k].i;
994 if (scaled_flag)
995 {
996 NE10_F2I16_FIXDIV (fk, 2);
997 NE10_F2I16_FIXDIV (fnkc, 2);
998 }
999
1000 fek.r = fk.r + fnkc.r;
1001 fek.i = fk.i + fnkc.i;
1002
1003 tmp.r = fk.r - fnkc.r;
1004 tmp.i = fk.i - fnkc.i;
1005
1006 fok.r = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) tmp.r * (twiddles[k - 1]).r
1007 + (NE10_F2I16_SAMPPROD) tmp.i * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
1008 fok.i = (ne10_int16_t) ( ( (NE10_F2I16_SAMPPROD) tmp.i * (twiddles[k - 1]).r
1009 - (NE10_F2I16_SAMPPROD) tmp.r * (twiddles[k - 1]).i) >> NE10_F2I16_SHIFT);
1010
1011 dst[k].r = fek.r + fok.r;
1012 dst[k].i = fek.i + fok.i;
1013
1014 dst[ncfft - k].r = fek.r - fok.r;
1015 dst[ncfft - k].i = fok.i - fek.i;
1016 }
1017}
1018
1031{
1032 ne10_fft_cfg_int16_t st = NULL;
1033 ne10_uint32_t memneeded = sizeof (ne10_fft_state_int16_t)
1034 + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1035 + sizeof (ne10_fft_cpx_int16_t) * (nfft) /* twiddle */
1036 + sizeof (ne10_fft_cpx_int16_t) * nfft /* buffer */
1037 + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1038
1039 st = (ne10_fft_cfg_int16_t) NE10_MALLOC (memneeded);
1040
1041 if (st)
1042 {
1043 uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_int16_t);
1044 NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1045 st->factors = (ne10_int32_t*) address;
1046 st->twiddles = (ne10_fft_cpx_int16_t*) (st->factors + (NE10_MAXFACTORS * 2));
1047 st->buffer = st->twiddles + nfft;
1048 st->nfft = nfft;
1049
1050 ne10_int32_t result = ne10_factor (nfft, st->factors, NE10_FACTOR_DEFAULT);
1051 if (result == NE10_ERR)
1052 {
1053 NE10_FREE (st);
1054 return st;
1055 }
1056
1057 ne10_int32_t i, j;
1058 ne10_int32_t *factors = st->factors;
1059 ne10_fft_cpx_int16_t *twiddles = st->twiddles;
1061 ne10_int32_t stage_count = factors[0];
1062 ne10_int32_t fstride1 = factors[1];
1063 ne10_int32_t fstride2 = fstride1 * 2;
1064 ne10_int32_t fstride3 = fstride1 * 3;
1065 ne10_int32_t m;
1066
1067 const ne10_float32_t pi = NE10_PI;
1068 ne10_float32_t phase1;
1069 ne10_float32_t phase2;
1070 ne10_float32_t phase3;
1071
1072 for (i = stage_count - 1; i > 0; i--)
1073 {
1074 fstride1 >>= 2;
1075 fstride2 >>= 2;
1076 fstride3 >>= 2;
1077 m = factors[2 * i + 1];
1078 tw = twiddles;
1079 for (j = 0; j < m; j++)
1080 {
1081 phase1 = -2 * pi * fstride1 * j / nfft;
1082 phase2 = -2 * pi * fstride2 * j / nfft;
1083 phase3 = -2 * pi * fstride3 * j / nfft;
1084 tw->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase1));
1085 tw->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase1));
1086 (tw + m)->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase2));
1087 (tw + m)->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase2));
1088 (tw + m * 2)->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase3));
1089 (tw + m * 2)->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase3));
1090 tw++;
1091 }
1092 twiddles += m * 3;
1093 }
1094
1095 }
1096 return st;
1097}
1098
1113 ne10_int32_t inverse_fft,
1114 ne10_int32_t scaled_flag)
1115{
1116 if (inverse_fft)
1117 ne10_mixed_radix_butterfly_inverse_int16_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1118 else
1119 ne10_mixed_radix_butterfly_int16_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1120}
1121
1122
1123
1124
//end of C2C_FFT_IFFT group
1128
1141{
1142 ne10_fft_r2c_cfg_int16_t st = NULL;
1143 ne10_int32_t ncfft = nfft >> 1;
1144
1145 ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_int16_t)
1146 + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1147 + sizeof (ne10_fft_cpx_int16_t) * ncfft /* twiddle*/
1148 + sizeof (ne10_fft_cpx_int16_t) * ncfft / 2 /* super twiddles*/
1149 + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer*/
1150 + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1151
1152 st = (ne10_fft_r2c_cfg_int16_t) NE10_MALLOC (memneeded);
1153
1154 if (st)
1155 {
1156 uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_r2c_state_int16_t);
1157 NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1158 st->factors = (ne10_int32_t*) address;
1159 st->twiddles = (ne10_fft_cpx_int16_t*) (st->factors + (NE10_MAXFACTORS * 2));
1160 st->super_twiddles = st->twiddles + ncfft;
1161 st->buffer = st->super_twiddles + (ncfft / 2);
1162 st->ncfft = ncfft;
1163
1164 ne10_int32_t result = ne10_factor (ncfft, st->factors, NE10_FACTOR_DEFAULT);
1165 if (result == NE10_ERR)
1166 {
1167 NE10_FREE (st);
1168 return st;
1169 }
1170
1171 ne10_int32_t i, j;
1172 ne10_int32_t *factors = st->factors;
1173 ne10_fft_cpx_int16_t *twiddles = st->twiddles;
1175 ne10_int32_t stage_count = factors[0];
1176 ne10_int32_t fstride1 = factors[1];
1177 ne10_int32_t fstride2 = fstride1 * 2;
1178 ne10_int32_t fstride3 = fstride1 * 3;
1179 ne10_int32_t m;
1180
1181 const ne10_float32_t pi = NE10_PI;
1182 ne10_float32_t phase1;
1183 ne10_float32_t phase2;
1184 ne10_float32_t phase3;
1185
1186 for (i = stage_count - 1; i > 0; i--)
1187 {
1188 fstride1 >>= 2;
1189 fstride2 >>= 2;
1190 fstride3 >>= 2;
1191 m = factors[2 * i + 1];
1192 tw = twiddles;
1193 for (j = 0; j < m; j++)
1194 {
1195 phase1 = -2 * pi * fstride1 * j / ncfft;
1196 phase2 = -2 * pi * fstride2 * j / ncfft;
1197 phase3 = -2 * pi * fstride3 * j / ncfft;
1198 tw->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase1));
1199 tw->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase1));
1200 (tw + m)->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase2));
1201 (tw + m)->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase2));
1202 (tw + m * 2)->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase3));
1203 (tw + m * 2)->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase3));
1204 tw++;
1205 }
1206 twiddles += m * 3;
1207 }
1208
1209 tw = st->super_twiddles;
1210 for (i = 0; i < ncfft / 2; i++)
1211 {
1212 phase1 = -pi * ( (ne10_float32_t) (i + 1) / ncfft + 0.5f);
1213 tw->r = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * cos (phase1));
1214 tw->i = (ne10_int16_t) floor (0.5f + NE10_F2I16_MAX * sin (phase1));
1215 tw++;
1216 }
1217
1218 }
1219 return st;
1220}
1221
1234 ne10_int16_t *fin,
1236 ne10_int32_t scaled_flag)
1237{
1238 ne10_fft_cpx_int16_t * tmpbuf = cfg->buffer;
1239
1240 ne10_mixed_radix_butterfly_int16_c (tmpbuf, (ne10_fft_cpx_int16_t*) fin, cfg->factors, cfg->twiddles, fout, scaled_flag);
1241 ne10_fft_split_r2c_1d_int16 (fout, tmpbuf, cfg->super_twiddles, cfg->ncfft, scaled_flag);
1242}
1243
1255void ne10_fft_c2r_1d_int16_c (ne10_int16_t *fout,
1258 ne10_int32_t scaled_flag)
1259{
1260 ne10_fft_cpx_int16_t * tmpbuf1 = cfg->buffer;
1261 ne10_fft_cpx_int16_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
1262
1263 ne10_fft_split_c2r_1d_int16 (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft, scaled_flag);
1264 ne10_mixed_radix_butterfly_inverse_int16_c ( (ne10_fft_cpx_int16_t*) fout, tmpbuf1, cfg->factors, cfg->twiddles, tmpbuf2, scaled_flag);
1265}
1266
void ne10_fft_c2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 16-bit fixed point data.
ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
void ne10_fft_c2r_1d_int16_c(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int16 data.
ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
void ne10_fft_r2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int16 data.
structure for the 16 bits fixed point FFT function.
Definition NE10_types.h:298
structure for the 32 bits fixed point FFT function.
Definition NE10_types.h:329