NFFT 3.5.3alpha
nfsft.c
Go to the documentation of this file.
1/*
2 * Copyright (c) 2002, 2019 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
25#include "config.h"
26
27/* Include standard C headers. */
28#include <math.h>
29#include <stdlib.h>
30#include <string.h>
31#ifdef HAVE_COMPLEX_H
32#include <complex.h>
33#endif
34
35#ifdef _OPENMP
36#include <omp.h>
37#endif
38
39/* Include NFFT3 utilities header. */
40
41/* Include NFFT3 library header. */
42#include "nfft3.h"
43
44#include "infft.h"
45
46/* Include private associated Legendre functions header. */
47#include "legendre.h"
48
49/* Include private API header. */
50#include "api.h"
51
52#ifdef _OPENMP
53#include "../fpt/fpt.h"
54#endif
55
65#define NFSFT_DEFAULT_NFFT_CUTOFF 6
66
72#define NFSFT_DEFAULT_THRESHOLD 1000
73
79#define NFSFT_BREAK_EVEN 5
80
87#ifdef _OPENMP
88 static struct nfsft_wisdom wisdom = {false,0U,-1,-1,0,0,0,0,0,0};
89#else
90 static struct nfsft_wisdom wisdom = {false,0U,-1,-1,0,0,0,0,0};
91#endif
92
115static inline void c2e(nfsft_plan *plan)
116{
117 int k;
118 int n;
119 double _Complex last;
120 double _Complex act;
121 double _Complex *xp;
122 double _Complex *xm;
123 int low;
124 int up;
125 int lowe;
126 int upe;
128 /* Set the first row to order to zero since it is unused. */
129 memset(plan->f_hat_intern,0U,(2*plan->N+2)*sizeof(double _Complex));
130
131 /* Determine lower and upper bounds for loop processing even terms. */
132 lowe = -plan->N + (plan->N%2);
133 upe = -lowe;
134
135 /* Process even terms. */
136 for (n = lowe; n <= upe; n += 2)
137 {
138 /* Compute new coefficients \f$\left(c_k^n\right)_{k=-M,\ldots,M}\f$ from
139 * old coefficients $\left(b_k^n\right)_{k=0,\ldots,M}$. */
140 xm = &(plan->f_hat_intern[NFSFT_INDEX(-1,n,plan)]);
141 xp = &(plan->f_hat_intern[NFSFT_INDEX(+1,n,plan)]);
142 for(k = 1; k <= plan->N; k++)
143 {
144 *xp *= 0.5;
145 *xm-- = *xp++;
146 }
147 /* Set the first coefficient in the array corresponding to this order to
148 * zero since it is unused. */
149 *xm = 0.0;
150 }
151
152 /* Determine lower and upper bounds for loop processing odd terms. */
153 low = -plan->N + (1-plan->N%2);
154 up = -low;
155
156 /* Process odd terms incorporating the additional sine term
157 * \f$\sin \vartheta\f$. */
158 for (n = low; n <= up; n += 2)
159 {
160 /* Compute new coefficients \f$\left(c_k^n\right)_{k=-M,\ldots,M}\f$ from
161 * old coefficients $\left(b_k^n\right)_{k=0,\ldots,M-1}$ incorporating
162 * the additional term \f$\sin \vartheta\f$. */
163 plan->f_hat_intern[NFSFT_INDEX(0,n,plan)] *= 2.0;
164 xp = &(plan->f_hat_intern[NFSFT_INDEX(-plan->N-1,n,plan)]);
165 /* Set the first coefficient in the array corresponding to this order to zero
166 * since it is unused. */
167 *xp++ = 0.0;
168 xm = &(plan->f_hat_intern[NFSFT_INDEX(plan->N,n,plan)]);
169 last = *xm;
170 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
171 *xp++ = -(*xm--);
172 for (k = plan->N-1; k > 0; k--)
173 {
174 act = *xm;
175 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
176 *xp++ = -(*xm--);
177 last = act;
178 }
179 *xm = 0.0;
180 }
181}
182
193static inline void c2e_transposed(nfsft_plan *plan)
194{
195 int k;
196 int n;
197 double _Complex last;
198 double _Complex act;
199 double _Complex *xp;
200 double _Complex *xm;
201 int low;
202 int up;
203 int lowe;
204 int upe;
206 /* Determine lower and upper bounds for loop processing even terms. */
207 lowe = -plan->N + (plan->N%2);
208 upe = -lowe;
209
210 /* Process even terms. */
211 for (n = lowe; n <= upe; n += 2)
212 {
213 /* Compute new coefficients \f$\left(b_k^n\right)_{k=0,\ldots,M}\f$ from
214 * old coefficients $\left(c_k^n\right)_{k=-M,\ldots,M}$. */
215 xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
216 xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
217 for(k = 1; k <= plan->N; k++)
218 {
219 *xp += *xm--;
220 *xp++ *= 0.5;;
221 }
222 }
223
224 /* Determine lower and upper bounds for loop processing odd terms. */
225 low = -plan->N + (1-plan->N%2);
226 up = -low;
227
228 /* Process odd terms. */
229 for (n = low; n <= up; n += 2)
230 {
231 /* Compute new coefficients \f$\left(b_k^n\right)_{k=0,\ldots,M-1}\f$ from
232 * old coefficients $\left(c_k^n\right)_{k=0,\ldots,M-1}$. */
233 xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
234 xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
235 for(k = 1; k <= plan->N; k++)
236 {
237 *xp++ -= *xm--;
238 }
239
240 plan->f_hat[NFSFT_INDEX(0,n,plan)] =
241 -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(1,n,plan)];
242 last = plan->f_hat[NFSFT_INDEX(1,n,plan)];
243 plan->f_hat[NFSFT_INDEX(1,n,plan)] =
244 -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(2,n,plan)];
245
246 xp = &(plan->f_hat[NFSFT_INDEX(2,n,plan)]);
247 for (k = 2; k < plan->N; k++)
248 {
249 act = *xp;
250 *xp = -0.25 * _Complex_I * (xp[1] - last);
251 xp++;
252 last = act;
253 }
254 *xp = 0.25 * _Complex_I * last;
255
256 plan->f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
257 }
258}
259
260void nfsft_init(nfsft_plan *plan, int N, int M)
261{
262 /* Call nfsft_init_advanced with default flags. */
265}
266
267void nfsft_init_advanced(nfsft_plan* plan, int N, int M,
268 unsigned int flags)
269{
270 /* Call nfsft_init_guru with the flags and default NFFT cut-off. */
271 nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | NFFT_OMP_BLOCKWISE_ADJOINT,
273}
274
275void nfsft_init_guru(nfsft_plan *plan, int N, int M, unsigned int flags,
276 unsigned int nfft_flags, int nfft_cutoff)
277{
278 int *nfft_size; /*< NFFT size */
279 int *fftw_size; /*< FFTW size */
280
281 /* Save the flags in the plan. */
282 plan->flags = flags;
283
284 /* Save the bandwidth N and the number of samples M in the plan. */
285 plan->N = N;
286 plan->M_total = M;
287
288 /* M is fixed for FSFT algorithm */
289 if (plan->flags & NFSFT_EQUISPACED)
290 plan->M_total = (2*plan->N+2)*(plan->N+2);
291
292 /* Calculate the next greater power of two with respect to the bandwidth N
293 * and the corresponding exponent. */
294 //next_power_of_2_exp_int(plan->N,&plan->NPT,&plan->t);
295
296 /* Save length of array of Fourier coefficients. Owing to the data layout the
297 * length is (2N+2)(2N+2) */
298 plan->N_total = (2*plan->N+2)*(2*plan->N+2);
299
300 /* Allocate memory for auxiliary array of spherical Fourier coefficients,
301 * if necessary. */
302 if (plan->flags & NFSFT_PRESERVE_F_HAT)
303 {
304 plan->f_hat_intern = (double _Complex*) nfft_malloc(plan->N_total*
305 sizeof(double _Complex));
306 }
307
308 /* Allocate memory for spherical Fourier coefficients, if necessary. */
309 if (plan->flags & NFSFT_MALLOC_F_HAT)
310 {
311 plan->f_hat = (double _Complex*) nfft_malloc(plan->N_total*
312 sizeof(double _Complex));
313 }
314
315 /* Allocate memory for samples, if necessary. */
316 if (plan->flags & NFSFT_MALLOC_F)
317 {
318 plan->f = (double _Complex*) nfft_malloc(plan->M_total*sizeof(double _Complex));
319 }
320
321 /* Allocate memory for nodes, if necessary. */
322 if (plan->flags & NFSFT_MALLOC_X)
323 {
324 plan->x = (double*) nfft_malloc(plan->M_total*2*sizeof(double));
325 if (plan->flags & NFSFT_EQUISPACED)
326 /* Set equispaced nodes. This way also trafo_direct works correctly. */
327 for (int i=0; i<2*plan->N+2; i++)
328 for (int j=0; j<plan->N+2; j++)
329 {
330 plan->x[2*(i*(plan->N+1) + j)] = ((double)i-plan->N-1)/(2.0*plan->N+2);
331 plan->x[2*(i*(plan->N+1) + j) + 1] = ((double)j)/(2.0*plan->N+2);
332 }
333 }
334
335 /* Check if fast algorithm is activated. */
336 if ((plan->flags & NFSFT_NO_FAST_ALGORITHM) || (plan->flags & NFSFT_EQUISPACED))
337 {
338 }
339 else
340 {
341 nfft_size = (int*)nfft_malloc(2*sizeof(int));
342 fftw_size = (int*)nfft_malloc(2*sizeof(int));
343
345 nfft_size[0] = 2*plan->N+2;
346 nfft_size[1] = 2*plan->N+2;
347 fftw_size[0] = 4*plan->N;
348 fftw_size[1] = 4*plan->N;
349
351 nfft_init_guru(&plan->plan_nfft, 2, nfft_size, plan->M_total, fftw_size,
352 nfft_cutoff, nfft_flags,
353 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
354
355 /* Assign angle array. */
356 plan->plan_nfft.x = plan->x;
357 /* Assign result array. */
358 plan->plan_nfft.f = plan->f;
359 /* Assign Fourier coefficients array. */
360 plan->plan_nfft.f_hat = plan->f_hat;
361
364 /* Precompute. */
365 //nfft_precompute_one_psi(&plan->plan_nfft);
366
367 /* Free auxilliary arrays. */
368 nfft_free(nfft_size);
369 nfft_free(fftw_size);
370 }
371
372 plan->mv_trafo = (void (*) (void* ))nfsft_trafo;
373 plan->mv_adjoint = (void (*) (void* ))nfsft_adjoint;
374}
375
376void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags,
377 unsigned int fpt_flags)
378{
379 int n; /*< The order n */
380
381 /* Check if already initialized. */
382 if (wisdom.initialized == true)
383 {
384 return;
385 }
386
387#ifdef _OPENMP
388 #pragma omp parallel default(shared)
389 {
390 int nthreads = omp_get_num_threads();
391 #pragma omp single
392 {
393 wisdom.nthreads = nthreads;
394 }
395 }
396#endif
397
398 /* Save the precomputation flags. */
399 wisdom.flags = nfsft_flags;
400
401 /* Compute and save N_max = 2^{\ceil{log_2 N}} as next greater
402 * power of two with respect to N. */
403 X(next_power_of_2_exp_int)(N,&wisdom.N_MAX,&wisdom.T_MAX);
404
405 /* Check, if precomputation for direct algorithms needs to be performed. */
407 {
408 wisdom.alpha = NULL;
409 wisdom.beta = NULL;
410 wisdom.gamma = NULL;
411 }
412 else
413 {
414 /* Allocate memory for three-term recursion coefficients. */
415 wisdom.alpha = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
416 sizeof(double));
417 wisdom.beta = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
418 sizeof(double));
419 wisdom.gamma = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
420 sizeof(double));
422 /* Compute three-term recurrence coefficients alpha_k^n, beta_k^n, and
423 * gamma_k^n. */
427 }
428
429 /* Check, if precomputation for fast algorithms needs to be performed. */
431 {
432 }
433 else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
434 {
435 /* Precompute data for DPT/FPT. */
436
437 /* Check, if recursion coefficients have already been calculated. */
438 if (wisdom.alpha != NULL)
439 {
440#ifdef _OPENMP
441 #pragma omp parallel default(shared) private(n)
442 {
443 int nthreads = omp_get_num_threads();
444 int threadid = omp_get_thread_num();
445 #pragma omp single
446 {
447 wisdom.nthreads = nthreads;
448 wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
449 }
450
451 if (threadid == 0)
452 wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
454 else
455 wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
456 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA | FPT_NO_INIT_FPT_DATA);
457
458 #pragma omp barrier
459
460 if (threadid != 0)
461 wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
462
463 /* Perform the first part of precomputation which contains most allocations in one thread */
464 #pragma omp master
465 {
466 for (int n = 0; n <= wisdom.N_MAX; n++)
467 fpt_precompute_1(wisdom.set_threads[0],n,n);
468 }
469 #pragma omp barrier
470
471 #pragma omp for private(n) schedule(dynamic)
472 for (n = 0; n <= wisdom.N_MAX; n++)
473 fpt_precompute_2(wisdom.set_threads[threadid],n,&wisdom.alpha[ROW(n)],
474 &wisdom.beta[ROW(n)], &wisdom.gamma[ROW(n)],n,kappa);
475 }
476#else
477 /* Use the recursion coefficients to precompute FPT data using persistent
478 * arrays. */
479 wisdom.set = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
481 for (n = 0; n <= wisdom.N_MAX; n++)
482 {
483 /*fprintf(stderr,"%d\n",n);
484 fflush(stderr);*/
485 /* Precompute data for FPT transformation for order n. */
486 fpt_precompute(wisdom.set,n,&wisdom.alpha[ROW(n)],&wisdom.beta[ROW(n)],
487 &wisdom.gamma[ROW(n)],n,kappa);
488 }
489#endif
490 }
491 else
492 {
493#ifdef _OPENMP
494 #pragma omp parallel default(shared) private(n)
495 {
496 double *alpha, *beta, *gamma;
497 int nthreads = omp_get_num_threads();
498 int threadid = omp_get_thread_num();
499 #pragma omp single
500 {
501 wisdom.nthreads = nthreads;
502 wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
503 }
504
505 alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
506 beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
507 gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
508
509 if (threadid == 0)
510 wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
511 fpt_flags | FPT_AL_SYMMETRY);
512 else
513 wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
514 fpt_flags | FPT_AL_SYMMETRY | FPT_NO_INIT_FPT_DATA);
515
516 #pragma omp barrier
517
518 if (threadid != 0)
519 wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
520
521 /* Perform the first part of precomputation which contains most allocations in one thread */
522 #pragma omp master
523 {
524 for (int n = 0; n <= wisdom.N_MAX; n++)
525 fpt_precompute_1(wisdom.set_threads[0],n,n);
526 }
527 #pragma omp barrier
528
529 #pragma omp for private(n) schedule(dynamic)
530 for (n = 0; n <= wisdom.N_MAX; n++)
531 {
532 alpha_al_row(alpha,wisdom.N_MAX,n);
533 beta_al_row(beta,wisdom.N_MAX,n);
534 gamma_al_row(gamma,wisdom.N_MAX,n);
535
536 /* Precompute data for FPT transformation for order n. */
537 fpt_precompute_2(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
538 kappa);
539 }
540 /* Free auxilliary arrays. */
544 }
545#else
546 /* Allocate memory for three-term recursion coefficients. */
547 double *alpha, *beta, *gamma;
548 alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
549 beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
550 gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
551 wisdom.set = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
552 fpt_flags | FPT_AL_SYMMETRY);
553 for (n = 0; n <= wisdom.N_MAX; n++)
554 {
555 /*fprintf(stderr,"%d NO_DIRECT\n",n);
556 fflush(stderr);*/
557 /* Compute three-term recurrence coefficients alpha_k^n, beta_k^n, and
558 * gamma_k^n. */
559 alpha_al_row(alpha,wisdom.N_MAX,n);
560 beta_al_row(beta,wisdom.N_MAX,n);
561 gamma_al_row(gamma,wisdom.N_MAX,n);
562
563 /* Precompute data for FPT transformation for order n. */
564 fpt_precompute(wisdom.set,n,alpha,beta,gamma,n,
565 kappa);
566 }
567 /* Free auxilliary arrays. */
571#endif
572 }
573 }
574
575 /* Wisdom has been initialised. */
576 wisdom.initialized = true;
577}
578
579void nfsft_forget(void)
580{
581 /* Check if wisdom has been initialised. */
582 if (wisdom.initialized == false)
583 {
584 /* Nothing to do. */
585 return;
586 }
587
588 /* Check, if precomputation for direct algorithms has been performed. */
590 {
591 }
592 else
593 {
594 /* Free arrays holding three-term recurrence coefficients. */
598 wisdom.alpha = NULL;
599 wisdom.beta = NULL;
600 wisdom.gamma = NULL;
601 }
602
603 /* Check, if precomputation for fast algorithms has been performed. */
605 {
606 }
607 else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
608 {
609#ifdef _OPENMP
610 int k;
611 for (k = 0; k < wisdom.nthreads; k++)
612 fpt_finalize(wisdom.set_threads[k]);
613 nfft_free(wisdom.set_threads);
614#else
615 /* Free precomputed data for FPT transformation. */
616 fpt_finalize(wisdom.set);
617#endif
618 }
619
620 /* Wisdom is now uninitialised. */
621 wisdom.initialized = false;
622}
623
624
626{
627 if (!plan)
628 return;
629
630 if (!(plan->flags & NFSFT_NO_FAST_ALGORITHM) && !(plan->flags & NFSFT_EQUISPACED))
631 {
632 /* Finalise the nfft plan. */
633 nfft_finalize(&plan->plan_nfft);
634 }
635
636 /* De-allocate memory for auxilliary array of spherical Fourier coefficients,
637 * if neccesary. */
638 if (plan->flags & NFSFT_PRESERVE_F_HAT)
639 {
640 nfft_free(plan->f_hat_intern);
641 }
642
643 /* De-allocate memory for spherical Fourier coefficients, if necessary. */
644 if (plan->flags & NFSFT_MALLOC_F_HAT)
645 {
646 //fprintf(stderr,"deallocating f_hat\n");
647 nfft_free(plan->f_hat);
648 }
649
650 /* De-allocate memory for samples, if necessary. */
651 if (plan->flags & NFSFT_MALLOC_F)
652 {
653 //fprintf(stderr,"deallocating f\n");
654 nfft_free(plan->f);
655 }
656
657 /* De-allocate memory for nodes, if necessary. */
658 if (plan->flags & NFSFT_MALLOC_X)
659 {
660 //fprintf(stderr,"deallocating x\n");
661 nfft_free(plan->x);
662 }
663}
664
665static void nfsft_set_f_nan(nfsft_plan *plan)
666{
667 int m;
668 double nan_value = nan("");
669 for (m = 0; m < plan->M_total; m++)
670 plan->f[m] = nan_value;
671}
672
673void nfsft_trafo_direct(nfsft_plan *plan)
674{
675 int m; /*< The node index */
676 int k; /*< The degree k */
677 int n; /*< The order n */
678 int n_abs; /*< The absolute value of the order n, ie n_abs = |n| */
679 double *alpha; /*< Pointer to current three-term recurrence
680 coefficient alpha_k^n for associated Legendre
681 functions P_k^n */
682 double *gamma; /*< Pointer to current three-term recurrence
683 coefficient beta_k^n for associated Legendre
684 functions P_k^n */
685 double _Complex *a; /*< Pointer to auxiliary array for Clenshaw algor. */
686 double stheta; /*< Current angle theta for Clenshaw algorithm */
687 double sphi; /*< Current angle phi for Clenshaw algorithm */
688
689#ifdef MEASURE_TIME
690 plan->MEASURE_TIME_t[0] = 0.0;
691 plan->MEASURE_TIME_t[1] = 0.0;
692 plan->MEASURE_TIME_t[2] = 0.0;
693#endif
694
696 {
697 nfsft_set_f_nan(plan);
698 return;
699 }
700
701 /* Copy spherical Fourier coefficients, if necessary. */
702 if (plan->flags & NFSFT_PRESERVE_F_HAT)
703 {
704 memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
705 sizeof(double _Complex));
706 }
707 else
708 {
709 plan->f_hat_intern = plan->f_hat;
710 }
711
712 /* Check, if we compute with L^2-normalized spherical harmonics. If so,
713 * multiply spherical Fourier coefficients with corresponding normalization
714 * weight. */
715 if (plan->flags & NFSFT_NORMALIZED)
716 {
717 /* Traverse Fourier coefficients array. */
718#ifdef _OPENMP
719 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
720#endif
721 for (k = 0; k <= plan->N; k++)
722 {
723 for (n = -k; n <= k; n++)
724 {
725 /* Multiply with normalization weight. */
726 plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
727 sqrt((2*k+1)/(4.0*KPI));
728 }
729 }
730 }
731
732 /* Distinguish by bandwidth M. */
733 if (plan->N == 0)
734 {
735 /* N = 0 */
736
737 /* Constant function */
738 for (m = 0; m < plan->M_total; m++)
739 {
740 plan->f[m] = plan->f_hat_intern[NFSFT_INDEX(0,0,plan)];
741 }
742 }
743 else
744 {
745 /* N > 0 */
746
747 /* Evaluate
748 * \sum_{k=0}^N \sum_{n=-k}^k a_k^n P_k^{|n|}(cos theta_m) e^{i n phi_m}
749 * = \sum_{n=-N}^N \sum_{k=|n|}^N a_k^n P_k^{|n|}(cos theta_m)
750 * e^{i n phi_m}.
751 */
752#ifdef _OPENMP
753 #pragma omp parallel for default(shared) private(m,stheta,sphi,n,a,n_abs,alpha,gamma,k)
754#endif
755 for (m = 0; m < plan->M_total; m++)
756 {
757 /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
758 stheta = cos(2.0*KPI*plan->x[2*m+1]);
759 /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
760 sphi = 2.0*KPI*plan->x[2*m];
761
762 /* Initialize result for current node. */
763 plan->f[m] = 0.0;
764
765 /* For n = -N,...,N, evaluate
766 * b_n := \sum_{k=|n|}^N a_k^n P_k^{|n|}(cos theta_m)
767 * using Clenshaw's algorithm.
768 */
769 for (n = -plan->N; n <= plan->N; n++)
770 {
771 /* Get pointer to Fourier coefficients vector for current order n. */
772 a = &(plan->f_hat_intern[NFSFT_INDEX(0,n,plan)]);
773
774 /* Take absolute value of n. */
775 n_abs = abs(n);
776
777 /* Get pointers to three-term recurrence coefficients arrays. */
778 alpha = &(wisdom.alpha[ROW(n_abs)]);
779 gamma = &(wisdom.gamma[ROW(n_abs)]);
780
781 if (plan->N > 1024)
782 {
783 /* Clenshaw's algorithm */
784 long double _Complex it2 = a[plan->N];
785 long double _Complex it1 = a[plan->N-1];
786 for (k = plan->N; k > n_abs + 1; k--)
787 {
788 long double _Complex temp = a[k-2] + it2 * gamma[k];
789 it2 = it1 + it2 * alpha[k] * stheta;
790 it1 = temp;
791 }
792
793 /* Compute final step if neccesary. */
794 if (n_abs < plan->N)
795 {
796 it2 = it1 + it2 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
797 }
798
799 /* Compute final result by multiplying the fixed part
800 * gamma_|n| (1-cos^2(theta))^{|n|/2}
801 * for order n and the exponential part
802 * e^{i n phi}
803 * and add the result to f_m.
804 */
805 long double _Complex result = it2 * wisdom.gamma[ROW(n_abs)] *
806 powl(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
807
808 plan->f[m] += result;
809 }
810 else
811 {
812 /* Clenshaw's algorithm */
813 double _Complex it2 = a[plan->N];
814 double _Complex it1 = a[plan->N-1];
815 for (k = plan->N; k > n_abs + 1; k--)
816 {
817 double _Complex temp = a[k-2] + it2 * gamma[k];
818 it2 = it1 + it2 * alpha[k] * stheta;
819 it1 = temp;
820 }
821
822 /* Compute final step if neccesary. */
823 if (n_abs < plan->N)
824 {
825 it2 = it1 + it2 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
826 }
827
828 /* Compute final result by multiplying the fixed part
829 * gamma_|n| (1-cos^2(theta))^{|n|/2}
830 * for order n and the exponential part
831 * e^{i n phi}
832 * and add the result to f_m.
833 */
834 plan->f[m] += it2 * wisdom.gamma[ROW(n_abs)] *
835 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
836 }
837 }
838
839 /* Write result f_m for current node to array f. */
840// plan->f[m] = f_m;
841 }
842 }
843}
844
845static void nfsft_set_f_hat_nan(nfsft_plan *plan)
846{
847 int k, n;
848 double nan_value = nan("");
849 for (k = 0; k <= plan->N; k++)
850 for (n = -k; n <= k; n++)
851 plan->f_hat[NFSFT_INDEX(k,n,plan)] = nan_value;
852}
853
854void nfsft_adjoint_direct(nfsft_plan *plan)
855{
856 int m; /*< The node index */
857 int k; /*< The degree k */
858 int n; /*< The order n */
859 int n_abs; /*< The absolute value of the order n, ie n_abs = |n| */
860 double *alpha; /*< Pointer to current three-term recurrence
861 coefficient alpha_k^n for associated Legendre
862 functions P_k^n */
863 double *gamma; /*< Pointer to current three-term recurrence
864 coefficient beta_k^n for associated Legendre
865 functions P_k^n */
866
867#ifdef MEASURE_TIME
868 plan->MEASURE_TIME_t[0] = 0.0;
869 plan->MEASURE_TIME_t[1] = 0.0;
870 plan->MEASURE_TIME_t[2] = 0.0;
871#endif
872
874 {
875 nfsft_set_f_hat_nan(plan);
876 return;
877 }
878
879 /* Initialise spherical Fourier coefficients array with zeros. */
880 memset(plan->f_hat,0U,plan->N_total*sizeof(double _Complex));
881
882 /* Distinguish by bandwidth N. */
883 if (plan->N == 0)
884 {
885 /* N == 0 */
886
887 /* Constant function */
888 for (m = 0; m < plan->M_total; m++)
889 {
890 plan->f_hat[NFSFT_INDEX(0,0,plan)] += plan->f[m];
891 }
892 }
893 else
894 {
895 /* N > 0 */
896
897#ifdef _OPENMP
898 /* Traverse all orders n. */
899 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,k) schedule(dynamic)
900 for (n = -plan->N; n <= plan->N; n++)
901 {
902 /* Take absolute value of n. */
903 n_abs = abs(n);
904
905 /* Get pointers to three-term recurrence coefficients arrays. */
906 alpha = &(wisdom.alpha[ROW(n_abs)]);
907 gamma = &(wisdom.gamma[ROW(n_abs)]);
908
909 /* Traverse all nodes. */
910 for (m = 0; m < plan->M_total; m++)
911 {
912 /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
913 double stheta = cos(2.0*KPI*plan->x[2*m+1]);
914 /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
915 double sphi = 2.0*KPI*plan->x[2*m];
916
917 /* Transposed Clenshaw algorithm */
918 if (plan->N > 1024)
919 {
920 /* Initial step */
921 long double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
922 powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
923 plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
924 long double _Complex it2 = 0.0;
925
926 if (n_abs < plan->N)
927 {
928 it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
929 plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
930 }
931
932 /* Loop for transposed Clenshaw algorithm */
933 for (k = n_abs+2; k <= plan->N; k++)
934 {
935 long double _Complex temp = it2;
936 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
937 it1 = temp;
938 plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
939 }
940 }
941 else
942 {
943 /* Initial step */
944 double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
945 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
946 plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
947 double _Complex it2 = 0.0;
948
949 if (n_abs < plan->N)
950 {
951 it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
952 plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
953 }
954
955 /* Loop for transposed Clenshaw algorithm */
956 for (k = n_abs+2; k <= plan->N; k++)
957 {
958 double _Complex temp = it2;
959 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
960 it1 = temp;
961 plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
962 }
963 }
964 }
965 }
966#else
967 /* Traverse all nodes. */
968 for (m = 0; m < plan->M_total; m++)
969 {
970 /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
971 double stheta = cos(2.0*KPI*plan->x[2*m+1]);
972 /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
973 double sphi = 2.0*KPI*plan->x[2*m];
974
975 /* Traverse all orders n. */
976 for (n = -plan->N; n <= plan->N; n++)
977 {
978 /* Take absolute value of n. */
979 n_abs = abs(n);
980
981 /* Get pointers to three-term recurrence coefficients arrays. */
982 alpha = &(wisdom.alpha[ROW(n_abs)]);
983 gamma = &(wisdom.gamma[ROW(n_abs)]);
984
985 /* Transposed Clenshaw algorithm */
986 if (plan->N > 1024)
987 {
988 /* Initial step */
989 long double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
990 powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
991 plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
992 long double _Complex it2 = 0.0;
993
994 if (n_abs < plan->N)
995 {
996 it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
997 plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
998 }
999
1000 /* Loop for transposed Clenshaw algorithm */
1001 for (k = n_abs+2; k <= plan->N; k++)
1002 {
1003 long double _Complex temp = it2;
1004 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1005 it1 = temp;
1006 plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
1007 }
1008 }
1009 else
1010 {
1011 /* Initial step */
1012 double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
1013 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
1014 plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
1015 double _Complex it2 = 0.0;
1016
1017 if (n_abs < plan->N)
1018 {
1019 it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
1020 plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
1021 }
1022
1023 /* Loop for transposed Clenshaw algorithm */
1024 for (k = n_abs+2; k <= plan->N; k++)
1025 {
1026 double _Complex temp = it2;
1027 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1028 it1 = temp;
1029 plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
1030 }
1031 }
1032 }
1033 }
1034#endif
1035 }
1036
1037 /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1038 * multiply spherical Fourier coefficients with corresponding normalization
1039 * weight. */
1040 if (plan->flags & NFSFT_NORMALIZED)
1041 {
1042 /* Traverse Fourier coefficients array. */
1043#ifdef _OPENMP
1044 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1045#endif
1046 for (k = 0; k <= plan->N; k++)
1047 {
1048 for (n = -k; n <= k; n++)
1049 {
1050 /* Multiply with normalization weight. */
1051 plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
1052 sqrt((2*k+1)/(4.0*KPI));
1053 }
1054 }
1055 }
1056
1057 /* Set unused coefficients to zero. */
1058 if (plan->flags & NFSFT_ZERO_F_HAT)
1059 {
1060 for (n = -plan->N; n <= plan->N+1; n++)
1061 {
1062 memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1063 (plan->N+1+abs(n))*sizeof(double _Complex));
1064 }
1065 }
1066}
1067
1069{
1070 int k; /*< The degree k */
1071 int n; /*< The order n */
1072#ifdef MEASURE_TIME
1073 ticks t0, t1;
1074#endif
1075 #ifdef DEBUG
1076 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
1077 t_pre = 0.0;
1078 t_norm = 0.0;
1079 t_fpt = 0.0;
1080 t_c2e = 0.0;
1081 t_nfft = 0.0;
1082 #endif
1083
1084#ifdef MEASURE_TIME
1085 plan->MEASURE_TIME_t[0] = 0.0;
1086 plan->MEASURE_TIME_t[1] = 0.0;
1087 plan->MEASURE_TIME_t[2] = 0.0;
1088#endif
1089
1091 {
1092 nfsft_set_f_nan(plan);
1093 return;
1094 }
1095
1096 /* Check, if precomputation was done and that the bandwidth N is not too
1097 * big.
1098 */
1099 if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
1100 {
1101 nfsft_set_f_nan(plan);
1102 return;
1103 }
1104
1105 /* Check, if slow transformation should be used due to small bandwidth. */
1106 if (plan->N < NFSFT_BREAK_EVEN)
1107 {
1108 /* Use NDSFT. */
1109 nfsft_trafo_direct(plan);
1110 }
1111
1112 /* Check for correct value of the bandwidth N. */
1113 else if (plan->N <= wisdom.N_MAX)
1114 {
1115 /* Copy spherical Fourier coefficients, if necessary. */
1116 if (plan->flags & NFSFT_PRESERVE_F_HAT)
1117 {
1118 memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
1119 sizeof(double _Complex));
1120 }
1121 else
1122 {
1123 plan->f_hat_intern = plan->f_hat;
1124 }
1125
1126 /* Propagate pointer values to the internal NFFT plan to assure
1127 * consistency. Pointers may have been modified externally.
1128 */
1129 if (!(plan->flags & NFSFT_EQUISPACED))
1130 {
1131 plan->plan_nfft.x = plan->x;
1132 plan->plan_nfft.f = plan->f;
1133 plan->plan_nfft.f_hat = plan->f_hat_intern;
1134 }
1135
1136 /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1137 * multiply spherical Fourier coefficients with corresponding normalization
1138 * weight. */
1139 if (plan->flags & NFSFT_NORMALIZED)
1140 {
1141 /* Traverse Fourier coefficients array. */
1142#ifdef _OPENMP
1143 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1144#endif
1145 for (k = 0; k <= plan->N; k++)
1146 {
1147 for (n = -k; n <= k; n++)
1148 {
1149 /* Multiply with normalization weight. */
1150 plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
1151 sqrt((2*k+1)/(4.0*KPI));
1152 }
1153 }
1154 }
1155
1156#ifdef MEASURE_TIME
1157 t0 = getticks();
1158#endif
1159 /* Check, which polynomial transform algorithm should be used. */
1160 if (plan->flags & NFSFT_USE_DPT)
1161 {
1162#ifdef _OPENMP
1163 n = 0;
1164 fpt_trafo_direct(wisdom.set_threads[0],abs(n),
1165 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1166 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1167 plan->N,0U);
1168
1169 int n_abs;
1170 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1171 for (n_abs = 1; n_abs <= plan->N; n_abs++)
1172 {
1173 int threadid = omp_get_thread_num();
1174 n = -n_abs;
1175 fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1176 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1177 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1178 plan->N,0U);
1179 n = n_abs;
1180 fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1181 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1182 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1183 plan->N,0U);
1184 }
1185#else
1186 /* Use direct discrete polynomial transform DPT. */
1187 for (n = -plan->N; n <= plan->N; n++)
1188 {
1189 //fprintf(stderr,"nfsft_trafo: n = %d\n",n);
1190 //fflush(stderr);
1191 fpt_trafo_direct(wisdom.set,abs(n),
1192 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1193 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1194 plan->N,0U);
1195 }
1196#endif
1197 }
1198 else
1199 {
1200#ifdef _OPENMP
1201 n = 0;
1202 fpt_trafo(wisdom.set_threads[0],abs(n),
1203 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1204 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1205 plan->N,0U);
1206
1207 int n_abs;
1208 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1209 for (n_abs = 1; n_abs <= plan->N; n_abs++)
1210 {
1211 int threadid = omp_get_thread_num();
1212 n = -n_abs;
1213 fpt_trafo(wisdom.set_threads[threadid],abs(n),
1214 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1215 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1216 plan->N,0U);
1217 n = n_abs;
1218 fpt_trafo(wisdom.set_threads[threadid],abs(n),
1219 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1220 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1221 plan->N,0U);
1222 }
1223#else
1224 /* Use fast polynomial transform FPT. */
1225 for (n = -plan->N; n <= plan->N; n++)
1226 {
1227 //fprintf(stderr,"nfsft_trafo: n = %d\n",n);
1228 //fflush(stderr);
1229 fpt_trafo(wisdom.set,abs(n),
1230 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1231 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1232 plan->N,0U);
1233 }
1234#endif
1235 }
1236#ifdef MEASURE_TIME
1237 t1 = getticks();
1238 plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
1239#endif
1240
1241#ifdef MEASURE_TIME
1242 t0 = getticks();
1243#endif
1244 /* Convert Chebyshev coefficients to Fourier coefficients. */
1245 c2e(plan);
1246#ifdef MEASURE_TIME
1247 t1 = getticks();
1248 plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
1249#endif
1250
1251#ifdef MEASURE_TIME
1252 t0 = getticks();
1253#endif
1254 if (plan->flags & NFSFT_EQUISPACED)
1255 {
1256 /* Algorithm for equispaced nodes.
1257 * plan_nfft is not initialized if NFSFT_EQUISPACED is set. */
1258
1259#ifdef _OPENMP
1260 INT nthreads = Y(get_num_threads)();
1261#endif
1262
1263 int N[2];
1264 N[0] = 2*plan->N+2;
1265 N[1] = 2*plan->N+2;
1266 fftw_plan plan_fftw;
1267
1268 for (int j=0; j<N[0]; j++)
1269 for (int k=0; k<N[1]; k++)
1270 if ((j+k)%2)
1271 plan->f_hat_intern[j*N[1]+k] *= -1;
1272// f_hat[j*N[1]+k] = plan->f_hat_intern[j*N[1]+k] * CEXP(II*KPI*(j+k));
1273
1274#ifdef _OPENMP
1275#pragma omp critical (nfft_omp_critical_fftw_plan)
1276 {
1277 FFTW(plan_with_nthreads)(nthreads);
1278#endif
1279 plan_fftw = fftw_plan_dft(2, N, plan->f_hat_intern, plan->f_hat_intern, FFTW_FORWARD, FFTW_ESTIMATE);
1280#ifdef _OPENMP
1281 }
1282#endif
1283 fftw_execute(plan_fftw);
1284 for (int j=0; j<N[0]; j++)
1285// for (int k=j%2-1; k<N[1]; k+=2)
1286// plan->f[j*N[1]+k] *= -1;
1287 for (int k=N[1]/2; k<N[1]+1; k++)
1288 plan->f[j*(N[1]/2+1)+(k-N[1]/2)] = plan->f_hat_intern[j*N[1]+k%N[1]] * ((j+k)%2 ? -1 : 1);
1289// plan->f[j*N[1]+k] *= CEXP(II*KPI*(j-N[0]/2 + k-N[1]/2));
1290#ifdef _OPENMP
1291#pragma omp critical (nfft_omp_critical_fftw_plan)
1292#endif
1293 fftw_destroy_plan(plan_fftw);
1294 }
1295 /* Check, which nonequispaced discrete Fourier transform algorithm should
1296 * be used.
1297 */
1298 else if (plan->flags & NFSFT_USE_NDFT)
1299 {
1300 /* Use NDFT. */
1301 nfft_trafo_direct(&plan->plan_nfft);
1302 }
1303 else
1304 {
1305 /* Use NFFT. */
1306 //fprintf(stderr,"nfsft_adjoint: nfft_trafo\n");
1307 nfft_trafo_2d(&plan->plan_nfft);
1308 }
1309#ifdef MEASURE_TIME
1310 t1 = getticks();
1311 plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
1312#endif
1313 }
1314}
1315
1317{
1318 int k; /*< The degree k */
1319 int n; /*< The order n */
1320#ifdef MEASURE_TIME
1321 ticks t0, t1;
1322#endif
1323
1324#ifdef MEASURE_TIME
1325 plan->MEASURE_TIME_t[0] = 0.0;
1326 plan->MEASURE_TIME_t[1] = 0.0;
1327 plan->MEASURE_TIME_t[2] = 0.0;
1328#endif
1329
1331 {
1332 nfsft_set_f_hat_nan(plan);
1333 return;
1334 }
1335
1336 /* Check, if precomputation was done and that the bandwidth N is not too
1337 * big.
1338 */
1339 if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
1340 {
1341 nfsft_set_f_hat_nan(plan);
1342 return;
1343 }
1344
1345 /* Check, if slow transformation should be used due to small bandwidth. */
1346 if (plan->N < NFSFT_BREAK_EVEN)
1347 {
1348 /* Use adjoint NDSFT. */
1349 nfsft_adjoint_direct(plan);
1350 }
1351 /* Check for correct value of the bandwidth N. */
1352 else if (plan->N <= wisdom.N_MAX)
1353 {
1354 //fprintf(stderr,"nfsft_adjoint: Starting\n");
1355 //fflush(stderr);
1356 /* Propagate pointer values to the internal NFFT plan to assure
1357 * consistency. Pointers may have been modified externally.
1358 */
1359 if (!(plan->flags & NFSFT_EQUISPACED))
1360 {
1361 plan->plan_nfft.x = plan->x;
1362 plan->plan_nfft.f = plan->f;
1363 plan->plan_nfft.f_hat = plan->f_hat;
1364 }
1365
1366#ifdef MEASURE_TIME
1367 t0 = getticks();
1368#endif
1369 if (plan->flags & NFSFT_EQUISPACED)
1370 {
1371 /* Algorithm for equispaced nodes.
1372 * plan_nfft is not initialized if NFSFT_EQUISPACED is set. */
1373 int N[2];
1374 N[0] = 2*plan->N+2;
1375 N[1] = 2*plan->N+2;
1376
1377 for (int j=0; j<N[0]; j++)
1378 {
1379 for (int k=0; k<N[1]/2+1; k++)
1380 plan->f_hat[j*N[1]+k] = 0;
1381 for (int k=N[1]/2; k<N[1]+1; k++)
1382 plan->f_hat[j*N[1]+k%N[1]] = plan->f[j*(N[1]/2+1)+k-N[1]/2] * ((j+k)%2 ? -1 : 1);
1383 }
1384 fftw_plan plan_fftw = FFTW(plan_dft)(2, N, plan->f_hat, plan->f_hat, FFTW_BACKWARD, FFTW_ESTIMATE);
1385 fftw_execute(plan_fftw);
1386 for (int j=0; j<N[0]; j++)
1387 for (int k=0; k<N[1]; k++)
1388 if ((j+k)%2)
1389 plan->f_hat[j*N[1]+k] *= -1;
1390 fftw_destroy_plan(plan_fftw);
1391 }
1392 /* Check, which adjoint nonequispaced discrete Fourier transform algorithm
1393 * should be used.
1394 */
1395 else if (plan->flags & NFSFT_USE_NDFT)
1396 {
1397 //fprintf(stderr,"nfsft_adjoint: Executing nfft_adjoint_direct\n");
1398 //fflush(stderr);
1399 /* Use adjoint NDFT. */
1400 nfft_adjoint_direct(&plan->plan_nfft);
1401 }
1402 else
1403 {
1404 //fprintf(stderr,"nfsft_adjoint: Executing nfft_adjoint\n");
1405 //fflush(stderr);
1406 //fprintf(stderr,"nfsft_adjoint: nfft_adjoint\n");
1407 /* Use adjoint NFFT. */
1408 nfft_adjoint_2d(&plan->plan_nfft);
1409 }
1410#ifdef MEASURE_TIME
1411 t1 = getticks();
1412 plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
1413#endif
1414
1415 //fprintf(stderr,"nfsft_adjoint: Executing c2e_transposed\n");
1416 //fflush(stderr);
1417#ifdef MEASURE_TIME
1418 t0 = getticks();
1419#endif
1420 /* Convert Fourier coefficients to Chebyshev coefficients. */
1421 c2e_transposed(plan);
1422#ifdef MEASURE_TIME
1423 t1 = getticks();
1424 plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
1425#endif
1426
1427#ifdef MEASURE_TIME
1428 t0 = getticks();
1429#endif
1430 /* Check, which transposed polynomial transform algorithm should be used */
1431 if (plan->flags & NFSFT_USE_DPT)
1432 {
1433#ifdef _OPENMP
1434 n = 0;
1435 fpt_transposed_direct(wisdom.set_threads[0],abs(n),
1436 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1437 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1438 plan->N,0U);
1439
1440 int n_abs;
1441 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1442 for (n_abs = 1; n_abs <= plan->N; n_abs++)
1443 {
1444 int threadid = omp_get_thread_num();
1445 n = -n_abs;
1446 fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1447 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1448 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1449 plan->N,0U);
1450 n = n_abs;
1451 fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1452 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1453 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1454 plan->N,0U);
1455 }
1456#else
1457 /* Use transposed DPT. */
1458 for (n = -plan->N; n <= plan->N; n++)
1459 {
1460 //fprintf(stderr,"nfsft_adjoint: Executing dpt_transposed\n");
1461 //fflush(stderr);
1462 fpt_transposed_direct(wisdom.set,abs(n),
1463 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1464 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1465 plan->N,0U);
1466 }
1467#endif
1468 }
1469 else
1470 {
1471#ifdef _OPENMP
1472 n = 0;
1473 fpt_transposed(wisdom.set_threads[0],abs(n),
1474 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1475 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1476 plan->N,0U);
1477
1478 int n_abs;
1479 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1480 for (n_abs = 1; n_abs <= plan->N; n_abs++)
1481 {
1482 int threadid = omp_get_thread_num();
1483 n = -n_abs;
1484 fpt_transposed(wisdom.set_threads[threadid],abs(n),
1485 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1486 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1487 plan->N,0U);
1488 n = n_abs;
1489 fpt_transposed(wisdom.set_threads[threadid],abs(n),
1490 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1491 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1492 plan->N,0U);
1493 }
1494#else
1495 //fprintf(stderr,"nfsft_adjoint: fpt_transposed\n");
1496 /* Use transposed FPT. */
1497 for (n = -plan->N; n <= plan->N; n++)
1498 {
1499 //fprintf(stderr,"nfsft_adjoint: Executing fpt_transposed\n");
1500 //fflush(stderr);
1501 fpt_transposed(wisdom.set,abs(n),
1502 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1503 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1504 plan->N,0U);
1505 }
1506#endif
1507 }
1508#ifdef MEASURE_TIME
1509 t1 = getticks();
1510 plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
1511#endif
1512
1513 /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1514 * multiply spherical Fourier coefficients with corresponding normalization
1515 * weight. */
1516 if (plan->flags & NFSFT_NORMALIZED)
1517 {
1518 //fprintf(stderr,"nfsft_adjoint: Normalizing\n");
1519 //fflush(stderr);
1520 /* Traverse Fourier coefficients array. */
1521#ifdef _OPENMP
1522 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1523#endif
1524 for (k = 0; k <= plan->N; k++)
1525 {
1526 for (n = -k; n <= k; n++)
1527 {
1528 /* Multiply with normalization weight. */
1529 plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
1530 sqrt((2*k+1)/(4.0*KPI));
1531 }
1532 }
1533 }
1534
1535 /* Set unused coefficients to zero. */
1536 if (plan->flags & NFSFT_ZERO_F_HAT)
1537 {
1538 //fprintf(stderr,"nfsft_adjoint: Setting to zero\n");
1539 //fflush(stderr);
1540 for (n = -plan->N; n <= plan->N+1; n++)
1541 {
1542 memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1543 (plan->N+1+abs(n))*sizeof(double _Complex));
1544 }
1545 }
1546 //fprintf(stderr,"nfsft_adjoint: Finished\n");
1547 //fflush(stderr);
1548 }
1549}
1550
1551void nfsft_precompute_x(nfsft_plan *plan)
1552{
1553 if ((plan->flags & NFSFT_NO_FAST_ALGORITHM) || (plan->flags & NFSFT_EQUISPACED))
1554 return;
1555
1556 /* Pass angle array to NFFT plan. */
1557 plan->plan_nfft.x = plan->x;
1558
1559 /* Precompute. */
1560 if (plan->plan_nfft.flags & PRE_ONE_PSI)
1561 nfft_precompute_one_psi(&plan->plan_nfft);
1562}
1563/* \} */
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
Definition nfft3.h:646
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
Definition nfft3.h:651
#define PRE_ONE_PSI
Definition nfft3.h:200
#define PRE_PSI
Definition nfft3.h:191
#define FFTW_INIT
Definition nfft3.h:197
#define PRE_PHI_HUT
Definition nfft3.h:187
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
#define NFSFT_MALLOC_X
Definition nfft3.h:588
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
Definition nfsft.c:90
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
Definition nfsft.c:193
double * gamma
Precomputed recursion coefficients /f$\gamma_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
Definition nfsft.c:65
void nfsft_init_advanced(nfsft_plan *plan, int N, int M, unsigned int flags)
Definition nfsft.c:267
void nfsft_forget(void)
Definition nfsft.c:579
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials.
Definition nfsft.c:115
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
Definition nfsft.c:79
void nfsft_trafo(nfsft_plan *plan)
Definition nfsft.c:1068
#define NFSFT_NORMALIZED
Definition nfft3.h:585
void nfsft_init(nfsft_plan *plan, int N, int M)
Definition nfsft.c:260
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition legendre.c:89
#define NFSFT_USE_DPT
Definition nfft3.h:587
double * beta
Precomputed recursion coefficients /f$\beta_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
double * alpha
Precomputed recursion coefficients /f$\alpha_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
#define NFSFT_ZERO_F_HAT
Definition nfft3.h:602
void nfsft_adjoint(nfsft_plan *plan)
Definition nfsft.c:1316
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition legendre.c:107
#define NFSFT_INDEX(k, n, plan)
Definition nfft3.h:605
#define NFSFT_NO_DIRECT_ALGORITHM
Definition nfft3.h:600
#define NFSFT_NO_FAST_ALGORITHM
Definition nfft3.h:601
void nfsft_finalize(nfsft_plan *plan)
Definition nfsft.c:625
#define NFSFT_MALLOC_F_HAT
Definition nfft3.h:589
#define NFSFT_USE_NDFT
Definition nfft3.h:586
bool initialized
Indicates wether the structure has been initialized.
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
Definition nfsft.c:376
int N_MAX
Stores precomputation flags.
#define NFSFT_EQUISPACED
Definition nfft3.h:597
#define NFSFT_PRESERVE_F_HAT
Definition nfft3.h:591
int T_MAX
The logarithm /f$t = \log_2 N_{\text{max}}/f$ of the maximum bandwidth.
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition legendre.c:98
#define NFSFT_MALLOC_F
Definition nfft3.h:590
Internal header file for auxiliary definitions and functions.
Header file with internal API of the NFSFT module.
Header file for the nfft3 library.
void * nfft_malloc(size_t n)
void nfft_free(void *p)
Holds data for a set of cascade summations.
Definition fpt.h:66
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
Definition nfft3.h:581
NFFT_INT N_total
Total number of Fourier coefficients.
Definition nfft3.h:581
void(* mv_adjoint)(void *)
Adjoint transform.
Definition nfft3.h:581
NFFT_INT M_total
Total number of samples.
Definition nfft3.h:581
unsigned int flags
the planner flags
Definition nfft3.h:581
void(* mv_trafo)(void *)
Transform.
Definition nfft3.h:581
fftw_complex * f_hat_intern
Internally used pointer to spherical Fourier coefficients.
Definition nfft3.h:581
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
Definition nfft3.h:581
int N
the bandwidth
Definition nfft3.h:581
fftw_complex * f
Samples.
Definition nfft3.h:581
nfft_plan plan_nfft
the internal NFFT plan
Definition nfft3.h:581
double * x
the nodes for ,
Definition nfft3.h:581
fftw_complex * f_hat
Fourier coefficients.
Definition nfft3.h:581
Wisdom structure.