stensor.h
1 /*#############################################################################
2  *
3  * Copyright 2011 by Henrik Skibbe and Marco Reisert
4  *
5  *
6  * This file is part of the STA-ImageAnalysisToolbox
7  *
8  * STA-ImageAnalysisToolbox is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * STA-ImageAnalysisToolbox is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with STA-ImageAnalysisToolbox.
20  * If not, see <http://www.gnu.org/licenses/>.
21  *
22  *
23 *#############################################################################*/
24 
25 #ifndef STA_STENSOR_H
26 #define STA_STENSOR_H
27 
28 
29 
30 #ifndef _STA_NOGSL
31 #include "gsl/gsl_sf_coupling.h"
32 #include "gsl/gsl_sf_laguerre.h"
33 #include "gsl/gsl_sf_gamma.h"
34 #include "gsl/gsl_sf_legendre.h"
35 #include "gsl/gsl_sf_bessel.h"
36 #else
37 #include "no_gsl.h"
38 #endif
39 
40 
41 
42 
43 #include <cstddef>
44 #include <complex>
45 #include <cmath>
46 #include <sstream>
47 #include <cstddef>
48 #include <vector>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <cstdlib>
52 #include <string>
53 #include <limits>
54 
55 #ifdef __linux__
56 #include <unistd.h>
57 #else
58 #ifndef M_PI
59 #define M_PI 3.14159265358979323846
60 #endif
61 #endif
62 
63 
64 
65 #include "sta_omp_threads.h"
66 
67 
68 
69 
70 #ifdef _STA_LINK_FFTW
71 #ifdef __linux__
72 #include "fftw3.h"
73 #else
74 //#pragma comment (lib,"C:/Program Files/MATLAB/R2011b/bin/win32/libfftw3.dll");
75 //#pragma comment (lib,"C:/Program Files/MATLAB/R2011b/bin/win32/libfftw3f.dll");
76 
77 //#pragma comment (lib,"C:/files/libfftw3-3.lib");
78 //#pragma comment (lib,"C:/files/libfftw3f-3.lib");
79 
80 //#pragma comment (lib,"C:/files/fft/libfftw3.lib");
81 //#pragma comment (lib,"C:/files/fft/libfftw3f.lib");
82 
83 //#pragma comment (lib,"C:/files/gsl/lib/libgsl.lib");
84 //#pragma comment (lib,"C:/files/gsl/lib/libgslcblas.lib");
85 #include "fftw3.h";
86 #endif
87 // http://www.fftw.org/install/windows.html
88 //"c:\Program Files\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat"
89 //lib /def:libfftw3-3.def
90 //lib /def:libfftw3f-3.def
91 //lib /machine:x64 /def:libfftw3l-3.def
92 // !set path=%path%:C:\files
93 //mex -D_STA_LINK_FFTW -D_STA_NOGSL sta_fft3fb.cc -I'C:/Documents and Settings/reisertm/Desktop/STAtoolbox/sta_toolbox' -I'C:/Documents and Settings/reisertm/Desktop/STAtoolbox/sta_toolbox_matlab' -I'C:/Documents and Settings/reisertm/Desktop/STAoolbox/fftw/'
94 //#include "fftw3.h"
95 
96 
97 //mex -D_STA_LINK_FFTW -D_STA_NOGSL sta_fft3fb.cc -L'C:/files/fft/' -lfftw3 -lfftw3f -I'C:/Documents and Settings/reisertm/Desktop/STAtoolbox/sta_toolbox' -I'C:/Documents and Settings/reisertm/Desktop/STAtoolbox/sta_toolbox_matlab' -I'C:/files/fft/'
98 #endif
99 
100 
101 
102 //#define _STA_SINGLE_THREAD
103 
117 namespace hanalysis
119 {
120 
121 
122 
125  STA_RESULT_SUCCESS=0,
126  STA_RESULT_FAILED=-1,
127  STA_RESULT_SHAPE_MISMATCH=-2,
128  STA_RESULT_INVALID_PRODUCT=-3,
129  STA_RESULT_STORAGE_MISMATCH=-4,
130  STA_RESULT_INVALID_TENSOR_RANK=-5,
131  STA_RESULT_OFIELD_TYPE_MISMATCH=-6,
132  STA_RESULT_SAME_ADDRESS=-7,
133  STA_RESULT_NOT_IMPLEMENTED=-8,
134 };
135 
136 
137 
138 static int verbose=0;
139 
140 template<typename T>
141 T sigmoid(T x, T s, T o)
142 {
143  return (T)1/((T)1+std::exp(-s*(x-o)));
144 }
145 
146 //radius on sphere
147 inline
148 void getFourierFreqAndNorm(
149  double a,
150  unsigned int l,
151  unsigned int n,
152  double & kln,
153  double & Nln)
154 {
155  double zero=gsl_sf_bessel_zero_Jnu (l+0.5, n);
156  Nln=std::pow(a,3.0)*std::pow(gsl_sf_bessel_jl (l+1,zero),2.0);
157  Nln=1/(std::sqrt(Nln)+std::numeric_limits<float>::epsilon());
158  kln=zero/a;
159 }
160 
161 
162 
163 
164 /*
165  computes Clebsch Gordan coupling coefficients
166  \f$ \langle ja~ma, jb~mb \hspace{1.5pt}|\hspace{1.5pt} J M \rangle \f$. \n
167  \returns \f$
168  \left\{
169  \begin{array}{ll}
170  \langle ja~ma, jb~mb \hspace{1.5pt}|\hspace{1.5pt} J M \rangle & \mbox{if }
171  M=ma+mb \text{ and } \lvert ja-jb \rvert \leq J \text{ and } ja+jb \geq J \\
172  0 & \mbox{else }
173  \end{array}
174  \right.
175  \f$
176  */
177 inline
178 double clebschGordan ( const int ja,
179  const int ma,
180  const int jb,
181  const int mb,
182  const int J,
183  const int M )
184 {
185 #ifdef _STA_NOGSL
186 
187  return nogsl_clebschGordan<double>(ja,ma,jb,mb,J,M);
188 
189 
190 // int J2=2*J;
191 // double _result=0;
192 // int status=0;
193 // status=gsl_sf_coupling_3j ( 2*ja, 2*jb, J2,
194 // 2*ma, 2*mb, -2*M, _result );
195 //
196 // if(status!=0)
197 // {
198 // printf("error computing the Wigner 3j symbols\n");
199 // }
200 // double norm = sqrt ( ( double ) ( J2+1.0 ) );
201 // int phase = ( ja-jb+M );
202 // double sign = ( phase & 1 ) ? -1.0 : 1.0;
203 // return _result*sign*norm;
204 #else
205  int J2=2*J;
206  gsl_sf_result _result;
207  gsl_sf_coupling_3j_e ( 2*ja, 2*jb, J2,
208  2*ma, 2*mb, -2*M, &_result );
209 
210  //printf("YE_GSL: %f\n",_result.val);
211  double norm = sqrt ( ( double ) ( J2+1.0 ) );
212  int phase = ( ja-jb+M );
213  double sign = ( phase & 1 ) ? -1.0 : 1.0;
214 
215 
216  //printf("%d %d %d %d %d %d | org: %f , mine %f\n",ja,ma,jb,mb,J,M,_result.val*sign*norm,clebschGordan2<double>(ja,ma,jb,mb,J,M));
217  return _result.val*sign*norm;
218 #endif
219 }
220 
221 
222 /*
223  orthonormal spherical harmonic basis functions
224 */
225 inline
226 std::complex<double> basis_SphericalHarmonics(int l, int m, double theta,double phi)
227 {
228  bool sym=false;
229  if (m<0) {
230  sym=true;
231  m*=-1;
232  }
233  double legendre=gsl_sf_legendre_sphPlm (l, m, std::cos(theta)); //already normalized
234  std::complex<double> tmp;
235  tmp=legendre*std::exp(std::complex<double>(0,1)*(double)m*phi);
236  if (sym)
237  {
238  if (m%2==0) return std::conj(tmp);
239  else return -std::conj(tmp);
240  }
241  return tmp;
242 }
243 
244 
245 
246 /*
247  orthogonal spherical harmonic basis functions (semi schmidt)
248 */
249 inline
250 std::complex<double> basis_SphericalHarmonicsSemiSchmidt(int l, int m, double theta,double phi)
251 {
252 
253  double norm=std::sqrt(4.0*M_PI/(2.0*l+1.0));;
254  return norm*basis_SphericalHarmonics(l,m,theta,phi);
255 }
256 
257 
258 
259 
260 
261 
262 
263 
264 /*
265  precomputes the weights for the spherical tensor product (see \ref sta_product) \n
266  \param J1 \f$ J_1 \in \mathbb N \f$ tensor rank of the first field
267  \param J2 \f$ J_2 \in \mathbb N \f$ tensor rank of the second field
268  \param J \f$ J \in \mathbb N \f$ tensor rank of the resulting field
269  \param normalize normalized tensor products?: true=\f$ \bullet_{J}\f$ , false=\f$ \circ_{J}\f$
270  \param alpha \f$ \alpha \in \mathbb C \f$ additional weighting factor
271  \returns pointer to allocated memory with weights
272 */
273 template<typename T>
274 T * sta_product_precomputeCGcoefficients_C ( int J1,int J2, int J, bool normalized=false, T alpha=1 )
275 {
276  T norm= ( T ) 1;
277  if ( normalized )
278  {
279  //assert((J1+J2+J)%2==0);
280  norm= ( T ) 1/ ( T ) hanalysis::clebschGordan ( J1,0,J2,0,J,0 );
281  }
282  norm*=alpha;
283  std::size_t count=0;
284  for ( int m=-J; m<=J; m++ )
285  {
286  for ( int m1=-J1; m1<=J1; m1++ )
287  {
288  int m2=m-m1;
289  if ( abs ( m2 ) <=J2 )
290  {
291  count++;
292  }
293  }
294  }
295  T * cg= new T[count];
296  count=0;
297  for ( int m=-J; m<=J; m++ )
298  {
299  for ( int m1=-J1; m1<=J1; m1++ )
300  {
301  int m2=m-m1;
302  if ( abs ( m2 ) <=J2 )
303  {
304  cg[count++]=norm* ( T ) hanalysis::clebschGordan ( J1,m1,J2,m2,J,m );
305  }
306  }
307  }
308  return cg;
309 }
310 
311 
312 
313 
314 template<typename T>
315 bool sta_isnan (
316  const std::complex<T> * stIn,
317  const std::size_t shape[],
318  int components=0,
319  int stride = -1)
320 {
321  if ( stride == -1 )
322  stride = components;
323 
324  std::size_t numvoxel=shape[0]*shape[1]*shape[2];
325 
326 
327 
328 
329  stride*=2;
330 
331 
332 
333  const T * stIn_r=(const T*) stIn;
334 
335 
336  printf("isnan: stride: %d components: %d data ptr: %d",stride,components,stIn_r);
337 
338  for (std::size_t i=0;i<numvoxel;i++)
339  {
340  for (std::size_t j=0;j<components;j++)
341  {
342  if (std::isnan(stIn_r[j]))
343  return true;
344  }
345  stIn_r+=stride;
346  }
347  return false;
348 }
349 
350 template<typename T>
351 bool sta_isinf (
352  const std::complex<T> * stIn,
353  const std::size_t shape[],
354  int components=0,
355  int stride = -1)
356 {
357  if ( stride == -1 )
358  stride = components;
359 
360  std::size_t numvoxel=shape[0]*shape[1]*shape[2];
361 
362  stride*=2;
363 
364  const T * stIn_r=(const T*) stIn;
365 
366  for (std::size_t i=0;i<numvoxel;i++)
367  {
368  for (std::size_t j=0;j<components;j++)
369  {
370  if (std::isinf(stIn_r[j]))
371  return true;
372  }
373  stIn_r+=stride;
374  }
375  return false;
376 }
377 
378 
379 template<typename T,typename S>
380 STA_RESULT sta_product_C (
381  const std::complex<T> * stIn1,
382  const std::complex<T> * stIn2,
383  std::complex<T> * stOut ,
384  const std::size_t shape[],
385  int J1,
386  int J2,
387  int J,
388  S alpha,
389  bool normalize = false,
390  int stride_in1 = -1,
391  int stride_in2 = -1,
392  int stride_out = -1,
393  bool clear_field = false )
394 {
395 
396 
397  if ( ( std::abs ( J1-J2 ) >J ) || ( J>std::abs ( J1+J2 ) ) )
398  return STA_RESULT_INVALID_PRODUCT;
399 //printf("%d %d -> %d (%d)\n",J1,J2,J,normalize);
400  if ( ( ( J1+J2+J ) %2!=0 ) && ( normalize ) )
401  return STA_RESULT_INVALID_PRODUCT;
402 
403 
404  S * cg= hanalysis::sta_product_precomputeCGcoefficients_C<S> ( J1,J2, J,normalize,alpha );
405 
406  std::size_t vectorLengthJ1=J1*2+1;
407  std::size_t vectorLengthJ2=J2*2+1;
408  std::size_t vectorLengthJ=J*2+1;
409 
410 
411  if ( stride_in1 == -1 )
412  stride_in1 = vectorLengthJ1;
413  if ( stride_in2 == -1 )
414  stride_in2 = vectorLengthJ2;
415  if ( stride_out == -1 )
416  stride_out = vectorLengthJ;
417 
418  std::size_t jumpz=shape[1]*shape[2];
419 
420  #pragma omp parallel for num_threads(get_numCPUs())
421  for ( std::size_t z=0; z<shape[0]; z++ )
422  {
423  std::size_t Z=z;
424  Z*=jumpz;
425  const std::complex<T> * current_J1=&stIn1[Z*stride_in1];
426  const std::complex<T> * current_J2=&stIn2[Z*stride_in2];
427  std::complex<T> * current_J=&stOut[Z*stride_out];
428  current_J1+=J1;
429  current_J2+=J2;
430  current_J+=J;
431 
432  for ( std::size_t i=0; i<jumpz; i++ )
433  {
434  std::size_t count=0;
435  for ( int m=-J; m<=J; m++ )
436  {
437  std::complex<T> & current=current_J[m];
438  if ( clear_field ) current=T ( 0 );
439 
440  for ( int m1=-J1; m1<=J1; m1++ )
441  {
442  int m2=m-m1;
443  if ( std::abs ( m2 ) <=J2 )
444  {
445  current+= ( current_J1[m1] ) * ( current_J2[m2] ) *cg[count++];
446  }
447  }
448  }
449  current_J1+=stride_in1;
450  current_J2+=stride_in2;
451  current_J+=stride_out;
452  }
453  }
454  delete [] cg;
455  return STA_RESULT_SUCCESS;
456 }
457 
458 
459 
460 
461 
462 
463 
464 
465 
466 /*
467  computes the spherical tensor derivative of \f$ \mathbf{stIn} \in \mathcal T_{J}\f$ \n
468  \param stIn \f$ \mathbf{stIn} \in \mathcal T_{J}\f$
469  \param stOut \f$ \mathbf{stOut} \in \mathcal T_{(J+Jupdown)}\f$, the spherical tensor derivative of \f$ \mathbf{stIn} \f$
470  \param shape
471  \param J \f$ J \in \mathbb N \f$ tensor rank of the input field \f$ \mathbf{stIn} \f$
472  \param Jupdown
473  \f$
474  \left\{
475  \begin{array}{ll}
476  \mathbf{stOut}=\alpha({\nabla} \bullet_{(J+1)} \mathbf{stIn}), & \mbox{ if } Jupdown=1\\
477  \mathbf{stOut}=\alpha({\nabla} \circ_{J} \mathbf{stIn}), & \mbox{ if } Jupdown=0\\
478  \mathbf{stOut}=\alpha({\nabla} \bullet_{(J-1)} \mathbf{stIn}), & \mbox{ if } Jupdown=-1
479  \end{array}
480  \right.
481  \f$
482  \param conjugate if \b conjugate=true the conjugate operator \f$ \overline{{\nabla}} \f$ is used
483  \param alpha \f$ \alpha \in \mathbb C \f$ additional weighting factor
484  \returns \f$
485  \left\{
486  \begin{array}{ll}
487  J+Jupdown & \mbox{if derivative exists}\\
488  -1 & \mbox{ else }
489  \end{array}
490  \right.
491  \f$
492  \warning ensure that stIn, stOut and shape exist
493  and have been \b allocated properly!
494  */
495 // template<typename T>
496 // int sta_derivatives_C (
497 // const std::complex<T> * stIn,
498 // std::complex<T> * stOut ,
499 // const std::size_t shape[],
500 // int J,
501 // int Jupdown, // either -1 0 or 1
502 // bool conjugate=false,
503 // std::complex<T> alpha= ( T ) 1.0,
504 // const T v_size[]=NULL,
505 // int stride_in = -1,
506 // int stride_out = -1,
507 // bool clear_field = false )
508 // {
509 // bool alpha_real=(alpha.imag()==0);
510 //
511 // alpha/=T ( 2 );
512 // if ( abs ( Jupdown ) >1 ) return -1;
513 // if ( abs ( J+Jupdown ) <0 ) return -1;
514 //
515 //
516 // T sign=-1;
517 // if (conjugate) sign*=-1;
518 //
519 // T voxel_size[3];
520 // voxel_size[0]=voxel_size[1]=voxel_size[2]=T ( 1 );
521 // if ( v_size!=NULL )
522 // {
523 // voxel_size[0]/=v_size[0]; // Zdir
524 // voxel_size[1]/=v_size[1]; // Ydir
525 // voxel_size[2]/=v_size[2]; // Xdir
526 // }
527 //
528 // sign*=voxel_size[1];
529 //
530 // int J1=J+Jupdown;
531 //
532 // std::size_t vectorLengthJ=2*J+1;
533 // std::size_t vectorLengthJ1=2* ( J1 ) +1;
534 //
535 // if ( stride_in == -1 )
536 // stride_in = vectorLengthJ;
537 // if ( stride_out == -1 )
538 // stride_out = vectorLengthJ1;
539 //
540 //
541 //
542 // std::size_t jumpz=shape[1]*shape[2];
543 // std::size_t jumpy=shape[2];
544 //
545 //
546 // T * CGTable=new T[3*vectorLengthJ1];
547 // T shnorm=hanalysis::clebschGordan ( 1,0,J,0,J1,0 );
548 // if ( Jupdown==0 ) shnorm=1;
549 // for ( int M=- ( J1 );M<= ( J1 );M++ )
550 // {
551 // CGTable[M+ ( J1 ) ] =T ( 1.0/std::sqrt ( 2.0 ) ) *hanalysis::clebschGordan ( 1,-1,J,M+1,J1,M ) /shnorm;;
552 // CGTable[M+ ( J1 ) +vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan ( 1,0,J,M,J1,M ) /shnorm;
553 // CGTable[M+ ( J1 ) +2*vectorLengthJ1]=T ( 1.0/std::sqrt ( 2.0 ) ) *hanalysis::clebschGordan ( 1,1,J,M-1,J1,M ) /shnorm;
554 // }
555 // T * CGTable0=&CGTable[0];
556 // CGTable0+= ( J1 );
557 // T * CGTable1=&CGTable[vectorLengthJ1];
558 // CGTable1+= ( J1 );
559 // T * CGTable2=&CGTable[2*vectorLengthJ1];
560 // CGTable2+= ( J1 );
561 //
562 //
563 // stride_in*=2;
564 // stride_out*=2;
565 // int J_times2=2*J;
566 // int J1_times2=2*J1;
567 //
568 // const T * stIn_R=(const T *)stIn;
569 // T * stOut_R=(T *)stOut;
570 //
571 // #pragma omp parallel for num_threads(get_numCPUs())
572 // for ( std::size_t z=0;z<shape[0];z++ )
573 // {
574 // std::size_t Z[3];
575 // Z[1]=z+shape[0];
576 // Z[0]=Z[1]-1;
577 // Z[2]=Z[1]+1;
578 // Z[0]%=shape[0];
579 // Z[1]%=shape[0];
580 // Z[2]%=shape[0];
581 //
582 // Z[0]*=jumpz;
583 // Z[1]*=jumpz;
584 // Z[2]*=jumpz;
585 //
586 // const T * derivX1;
587 // const T * derivX0;
588 //
589 // const T* derivY1;
590 // const T * derivY0;
591 //
592 // const T* derivZ1;
593 // const T * derivZ0;
594 //
595 // for ( std::size_t y=0;y<shape[1];y++ )
596 // {
597 // std::size_t Y[3];
598 // Y[1]=y+shape[1];
599 // Y[0]=Y[1]-1;
600 // Y[2]=Y[1]+1;
601 // Y[0]%=shape[1];
602 // Y[1]%=shape[1];
603 // Y[2]%=shape[1];
604 //
605 // Y[0]*=jumpy;
606 // Y[1]*=jumpy;
607 // Y[2]*=jumpy;
608 //
609 // for ( std::size_t x=0;x<shape[2];x++ )
610 // {
611 // std::size_t X[3];
612 // X[1]=x+shape[2];
613 // X[0]=X[1]-1;
614 // X[2]=X[1]+1;
615 // X[0]%=shape[2];
616 // X[1]%=shape[2];
617 // X[2]%=shape[2];
618 //
619 // derivX1=stIn_R+ ( Z[1]+Y[1]+X[0] ) *stride_in+J_times2;
620 // derivX0=stIn_R+ ( Z[1]+Y[1]+X[2] ) *stride_in+J_times2;
621 //
622 // derivY1=stIn_R+ ( Z[1]+Y[0]+X[1] ) *stride_in+J_times2;
623 // derivY0=stIn_R+ ( Z[1]+Y[2]+X[1] ) *stride_in+J_times2;
624 //
625 // derivZ1=stIn_R+ ( Z[0]+Y[1]+X[1] ) *stride_in+J_times2;
626 // derivZ0=stIn_R+ ( Z[2]+Y[1]+X[1] ) *stride_in+J_times2;
627 //
628 // std::size_t offset= ( Z[1]+Y[1]+X[1] ) *stride_out+J1_times2;
629 //
630 // // T ctmp_r;
631 // // T ctmp_i;
632 //
633 // for ( int M=- ( J1 );M<= ( J1 );M++ )
634 // {
635 //
636 // T tmp_r=T ( 0 );
637 // T tmp_i=T ( 0 );
638 //
639 // if ( abs ( M+1 ) <=J )
640 // {
641 // int m2=2*(M+1);
642 // tmp_r+=CGTable0[M]*( voxel_size[2]* ( derivX0[m2]-derivX1[m2] ) -sign* ( derivY0[m2+1]-derivY1[m2+1] ) );
643 // tmp_i+=CGTable0[M]*( voxel_size[2]* ( derivX0[m2+1]-derivX1[m2+1] ) +sign* ( derivY0[m2]-derivY1[m2] ) );
644 // }
645 // if ( abs ( M ) <=J )
646 // {
647 // tmp_r+=CGTable1[M]* ( derivZ0[2*M]-derivZ1[2*M] );
648 // tmp_i+=CGTable1[M]* ( derivZ0[2*M+1]-derivZ1[2*M+1] );
649 // }
650 // if ( abs ( M-1 ) <=J )
651 // {
652 // int m2=2*(M-1);
653 // tmp_r+=CGTable2[M]* ( -voxel_size[2]* ( derivX0[m2]-derivX1[m2] ) -sign* ( derivY0[m2+1]-derivY1[m2+1] ) );
654 // tmp_i+=CGTable2[M]* ( -voxel_size[2]* ( derivX0[m2+1]-derivX1[m2+1] ) +sign* ( derivY0[m2]-derivY1[m2] ) );
655 // }
656 //
657 // T * current=stOut_R+offset+2*M;
658 // if (alpha_real)
659 // {
660 // if ( clear_field )
661 // {
662 // (*current++)=alpha.real()*tmp_r;
663 // (*current)=alpha.real()*tmp_i;
664 // }else
665 // {
666 // (*current++)+=alpha.real()*tmp_r;
667 // (*current)+=alpha.real()*tmp_i;
668 // }
669 // }else
670 // {
671 // if ( clear_field )
672 // {
673 // (*current++)=alpha.real()*tmp_r-alpha.imag()*tmp_i;
674 // (*current)=alpha.real()*tmp_i+alpha.imag()*tmp_r;
675 // }else
676 // {
677 // (*current++)+=alpha.real()*tmp_r-alpha.imag()*tmp_i;
678 // (*current)+=alpha.real()*tmp_i+alpha.imag()*tmp_r;
679 // }
680 // }
681 // }
682 // }
683 // }
684 // }
685 // delete [] CGTable;
686 // return ( J1 );
687 // }
688 
689 
690 template<typename T,typename S>
691 STA_RESULT sta_derivatives_C (
692  const S * stIn,
693  std::complex<T> * stOut ,
694  const std::size_t shape[],
695  int J,
696  int Jupdown, // either -1 0 or 1
697  bool conjugate=false,
698  std::complex<T> alpha= ( T ) 1.0,
699  const T v_size[]=NULL,
700  int stride_in = -1,
701  int stride_out = -1,
702  bool clear_field = false )
703 {
704  alpha/=T ( 2 );
705  if ( abs ( Jupdown ) >1 ) return STA_RESULT_INVALID_TENSOR_RANK;
706  if ( abs ( J+Jupdown ) <0 ) return STA_RESULT_INVALID_TENSOR_RANK;
707 
708 
709  std::complex<T> imag=-std::complex<T> ( 0,1 );
710  if (conjugate) imag*=T( -1 );
711 
712  T voxel_size[3];
713  voxel_size[0]=voxel_size[1]=voxel_size[2]=T ( 1 );
714  if ( v_size!=NULL )
715  {
716  voxel_size[0]/=v_size[0]; // Zdir
717  voxel_size[1]/=v_size[1]; // Ydir
718  voxel_size[2]/=v_size[2]; // Xdir
719  }
720 
721  imag*=voxel_size[1];
722 
723  int J1=J+Jupdown;
724 
725  std::size_t vectorLengthJ=2*J+1;
726  std::size_t vectorLengthJ1=2* ( J1 ) +1;
727 
728  if ( stride_in == -1 )
729  stride_in = vectorLengthJ;
730  if ( stride_out == -1 )
731  stride_out = vectorLengthJ1;
732 
733 
734  std::size_t jumpz=shape[1]*shape[2];
735  std::size_t jumpy=shape[2];
736 
737 
738  T * CGTable=new T[3*vectorLengthJ1];
739  T shnorm=hanalysis::clebschGordan ( 1,0,J,0,J1,0 );
740  if ( Jupdown==0 ) shnorm=1;
741  for ( int M=- ( J1 ); M<= ( J1 ); M++ )
742  {
743  CGTable[M+ ( J1 ) ] =T ( 1.0/std::sqrt ( 2.0 ) ) *hanalysis::clebschGordan ( 1,-1,J,M+1,J1,M ) /shnorm;;
744  CGTable[M+ ( J1 ) +vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan ( 1,0,J,M,J1,M ) /shnorm;
745  CGTable[M+ ( J1 ) +2*vectorLengthJ1]=T ( 1.0/std::sqrt ( 2.0 ) ) *hanalysis::clebschGordan ( 1,1,J,M-1,J1,M ) /shnorm;
746  }
747  T * CGTable0=&CGTable[0];
748  CGTable0+= ( J1 );
749  T * CGTable1=&CGTable[vectorLengthJ1];
750  CGTable1+= ( J1 );
751  T * CGTable2=&CGTable[2*vectorLengthJ1];
752  CGTable2+= ( J1 );
753 
754  #pragma omp parallel for num_threads(get_numCPUs())
755  for ( std::size_t z=0; z<shape[0]; z++ )
756  {
757  std::size_t Z[3];
758  Z[1]=z+shape[0];
759  Z[0]=Z[1]-1;
760  Z[2]=Z[1]+1;
761  Z[0]%=shape[0];
762  Z[1]%=shape[0];
763  Z[2]%=shape[0];
764 
765  Z[0]*=jumpz;
766  Z[1]*=jumpz;
767  Z[2]*=jumpz;
768 
769  const S * derivX1;
770  const S * derivX0;
771 
772  const S * derivY1;
773  const S * derivY0;
774 
775  const S * derivZ1;
776  const S * derivZ0;
777 
778  for ( std::size_t y=0; y<shape[1]; y++ )
779  {
780  std::size_t Y[3];
781  Y[1]=y+shape[1];
782  Y[0]=Y[1]-1;
783  Y[2]=Y[1]+1;
784  Y[0]%=shape[1];
785  Y[1]%=shape[1];
786  Y[2]%=shape[1];
787 
788  Y[0]*=jumpy;
789  Y[1]*=jumpy;
790  Y[2]*=jumpy;
791 
792  for ( std::size_t x=0; x<shape[2]; x++ )
793  {
794  std::size_t X[3];
795  X[1]=x+shape[2];
796  X[0]=X[1]-1;
797  X[2]=X[1]+1;
798  X[0]%=shape[2];
799  X[1]%=shape[2];
800  X[2]%=shape[2];
801 
802  derivX1=&stIn[ ( Z[1]+Y[1]+X[0] ) *stride_in]+J;
803  derivX0=&stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in]+J;
804 
805  derivY1=&stIn[ ( Z[1]+Y[0]+X[1] ) *stride_in]+J;
806  derivY0=&stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in]+J;
807 
808  derivZ1=&stIn[ ( Z[0]+Y[1]+X[1] ) *stride_in]+J;
809  derivZ0=&stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in]+J;
810 
811  std::size_t offset= ( Z[1]+Y[1]+X[1] ) *stride_out+J1;
812 
813  for ( int M=- ( J1 ); M<= ( J1 ); M++ )
814  {
815  std::complex<T> & current=stOut[offset+M];
816  if ( clear_field ) current=T ( 0 );
817  std::complex<T> tmp=T ( 0 );
818 
819  if ( abs ( M+1 ) <=J )
820  {
821  int m2=M+1;
822  tmp+=CGTable0[M]* ( voxel_size[2]* ( derivX0[m2]-derivX1[m2] ) +imag* ( derivY0[m2]-derivY1[m2] ) );
823  }
824  if ( abs ( M ) <=J )
825  {
826  tmp+=CGTable1[M]* ( derivZ0[M]-derivZ1[M] );
827  }
828  if ( abs ( M-1 ) <=J )
829  {
830  int m2=M-1;
831  tmp+=CGTable2[M]* ( -voxel_size[2]* ( derivX0[m2]-derivX1[m2] ) +imag* ( derivY0[m2]-derivY1[m2] ) );
832  }
833  current+=tmp*alpha;
834  }
835  }
836  }
837  }
838  delete [] CGTable;
839  return ( STA_RESULT_SUCCESS );
840 }
841 
842 
843 
844 
845 
846 /*
847  computes the spherical tensor double-derivative of \f$ \mathbf{stIn} \in \mathcal T_{J}\f$ \n
848  \param stIn \f$ \mathbf{stIn} \in \mathcal T_{J}\f$
849  \param stOut \f$ \mathbf{stOut} \in \mathcal T_{(J+Jupdown)}\f$, the spherical tensor double-derivative of \f$ \mathbf{stIn} \f$
850  \param shape
851  \param J \f$ J \in \mathbb N \f$ tensor rank of the input field \f$ \mathbf{stIn} \f$
852  \param Jupdown
853  \f$
854  \left\{
855  \begin{array}{ll}
856  \mathbf{stOut}=\alpha({\nabla} \bullet_{(J+2)} \mathbf{stIn}), & \mbox{ if } Jupdown=2\\
857  \mathbf{stOut}=\alpha({\nabla} \bullet_{J} \mathbf{stIn}), & \mbox{ if } Jupdown=0\\
858  \mathbf{stOut}=\alpha({\nabla} \bullet_{(J-2)} \mathbf{stIn}), & \mbox{ if } Jupdown=-2
859  \end{array}
860  \right.
861  \f$
862  \param conjugate if \b conjugate=true the conjugate operator \f$ \overline{{\nabla}} \f$ is used
863  \param alpha \f$ \alpha \in \mathbb C \f$ additional weighting factor
864  \returns \f$
865  \left\{
866  \begin{array}{ll}
867  J+Jupdown & \mbox{if derivative exists}\\
868  -1 & \mbox{ else }
869  \end{array}
870  \right.
871  \f$
872  \warning ensure that stIn, stOut and shape exist
873  and have been \b allocated properly!
874  */
875 template<typename T,typename S>
876 STA_RESULT sta_derivatives2_C (
877  const S * stIn,
878  std::complex<T> * stOut ,
879  const std::size_t shape[],
880  int J,
881  int Jupdown, // either +2 or -2 or 0
882  bool conjugate=false,
883  std::complex<T> alpha= ( T ) 1.0,
884  const T v_size[]=NULL,
885  int stride_in = -1,
886  int stride_out = -1,
887  bool clear_field = false )
888 {
889  if ( abs ( Jupdown ) >2 ) return STA_RESULT_INVALID_TENSOR_RANK;
890  if ( abs ( Jupdown ) ==1 ) return STA_RESULT_INVALID_TENSOR_RANK;
891  if ( abs ( J+Jupdown ) <0 ) return STA_RESULT_INVALID_TENSOR_RANK;
892 
893 
894 
895  T voxel_size[3];
896  voxel_size[0]=voxel_size[1]=voxel_size[2]=T ( 1 );
897  if ( v_size!=NULL )
898  {
899  voxel_size[0]/=v_size[0];
900  voxel_size[1]/=v_size[1];
901  voxel_size[2]/=v_size[2];
902  if (hanalysis::verbose>0)
903  printf("element size not considered yet: sta_derivatives2 \n");
904  }
905 
906  std::complex<T> imag=-std::complex<T> ( 0,1 );
907  if (conjugate) imag*=T( -1 );
908 
909  alpha*=T ( sqrt ( 3.0/2.0 ) );
910 
911  int J1=J+Jupdown;
912 
913  int vectorLengthJ=J*2+1;
914  int vectorLengthJ1= ( J1 ) *2+1;
915 
916  if ( stride_in == -1 )
917  stride_in = vectorLengthJ;
918  if ( stride_out == -1 )
919  stride_out = vectorLengthJ1;
920 
921  std::size_t jumpz=shape[1]*shape[2];
922  std::size_t jumpy=shape[2];
923 
924 
925  T * CGTable=new T[5*vectorLengthJ1];
926  T shnorm=hanalysis::clebschGordan ( 2,0,J,0,J1,0 );
927  for ( int M=- ( J1 ); M<= ( J1 ); M++ )
928  {
929  CGTable[M+ ( J1 ) ] =hanalysis::clebschGordan ( 2,-2,J,M+2,J1,M ) /shnorm;
930  CGTable[M+ ( J1 ) +vectorLengthJ1] =hanalysis::clebschGordan ( 2,-1,J,M+1,J1,M ) /shnorm;;
931  CGTable[M+ ( J1 ) +2*vectorLengthJ1]=hanalysis::clebschGordan ( 2,0,J,M,J1,M ) /shnorm;
932  CGTable[M+ ( J1 ) +3*vectorLengthJ1]=hanalysis::clebschGordan ( 2,1,J,M-1,J1,M ) /shnorm;
933  CGTable[M+ ( J1 ) +4*vectorLengthJ1]=hanalysis::clebschGordan ( 2,2,J,M-2,J1,M ) /shnorm;
934  }
935  T * CGTable0=&CGTable[0];
936  CGTable0+= ( J1 );
937  T * CGTable1=&CGTable[vectorLengthJ1];
938  CGTable1+= ( J1 );
939  T * CGTable2=&CGTable[2*vectorLengthJ1];
940  CGTable2+= ( J1 );
941  T * CGTable3=&CGTable[3*vectorLengthJ1];
942  CGTable3+= ( J1 );
943  T * CGTable4=&CGTable[4*vectorLengthJ1];
944  CGTable4+= ( J1 );
945 
946  #pragma omp parallel for num_threads(get_numCPUs())
947  for ( std::size_t z=0; z<shape[0]; z++ )
948  {
949  std::size_t Z[5];
950  Z[2]=z+shape[0];
951  Z[0]=Z[2]-2;
952  Z[1]=Z[2]-1;
953  Z[3]=Z[2]+1;
954  Z[4]=Z[2]+2;
955  Z[0]%=shape[0];
956  Z[1]%=shape[0];
957  Z[2]%=shape[0];
958  Z[3]%=shape[0];
959  Z[4]%=shape[0];
960 
961  Z[0]*=jumpz;
962  Z[1]*=jumpz;
963  Z[2]*=jumpz;
964  Z[3]*=jumpz;
965  Z[4]*=jumpz;
966 
967 
968 
969  //const S * X1Y1Z1;
970  const S * X1Y1Z2;
971  //const S * X1Y1Z3;
972  const S * X1Y2Z1;
973  const S * X1Y2Z2;
974  const S * X1Y2Z3;
975  //const S * X1Y3Z1;
976  const S * X1Y3Z2;
977  //const S * X1Y3Z3;
978 
979 
980  const S * X2Y1Z1;
981  const S * X2Y1Z2;
982  const S * X2Y1Z3;
983  const S * X2Y2Z1;
984  const S * X2Y2Z2;
985  const S * X2Y2Z3;
986  const S * X2Y3Z1;
987  const S * X2Y3Z2;
988  const S * X2Y3Z3;
989 
990 
991  //const S * X3Y1Z1;
992  const S * X3Y1Z2;
993  //const S * X3Y1Z3;
994  const S * X3Y2Z1;
995  const S * X3Y2Z2;
996  const S * X3Y2Z3;
997  //const S * X3Y3Z1;
998  const S * X3Y3Z2;
999  //const S * X3Y3Z3;
1000 
1001 
1002  std::complex<T> current;
1003 
1004 
1005  for ( std::size_t y=0; y<shape[1]; y++ )
1006  {
1007  std::size_t Y[5];
1008  Y[2]=y+shape[1];
1009  Y[0]=Y[2]-2;
1010  Y[1]=Y[2]-1;
1011  Y[3]=Y[2]+1;
1012  Y[4]=Y[2]+2;
1013  Y[0]%=shape[1];
1014  Y[1]%=shape[1];
1015  Y[2]%=shape[1];
1016  Y[3]%=shape[1];
1017  Y[4]%=shape[1];
1018 
1019  Y[0]*=jumpy;
1020  Y[1]*=jumpy;
1021  Y[2]*=jumpy;
1022  Y[3]*=jumpy;
1023  Y[4]*=jumpy;
1024 
1025 
1026  for ( std::size_t x=0; x<shape[2]; x++ )
1027  {
1028  std::size_t X[5];
1029  X[2]=x+shape[0];
1030  X[0]=X[2]-2;
1031  X[1]=X[2]-1;
1032  X[3]=X[2]+1;
1033  X[4]=X[2]+2;
1034  X[0]%=shape[2];
1035  X[1]%=shape[2];
1036  X[2]%=shape[2];
1037  X[3]%=shape[2];
1038  X[4]%=shape[2];
1039 
1040 
1041  //X1Y1Z1=&stIn[ ( Z[1]+Y[1]+X[1] ) *stride_in]+J;
1042  X1Y1Z2=&stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in]+J;
1043  //X1Y1Z3=&stIn[ ( Z[3]+Y[1]+X[1] ) *stride_in]+J;
1044  X1Y2Z1=&stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in]+J;
1045  X1Y2Z2=&stIn[ ( Z[2]+Y[2]+X[1] ) *stride_in]+J;
1046  X1Y2Z3=&stIn[ ( Z[3]+Y[2]+X[1] ) *stride_in]+J;
1047  //X1Y3Z1=&stIn[ ( Z[1]+Y[3]+X[1] ) *stride_in]+J;
1048  X1Y3Z2=&stIn[ ( Z[2]+Y[3]+X[1] ) *stride_in]+J;
1049  //X1Y3Z3=&stIn[ ( Z[3]+Y[3]+X[1] ) *stride_in]+J;
1050 
1051 
1052  X2Y1Z1=&stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in]+J;
1053  X2Y1Z2=&stIn[ ( Z[2]+Y[1]+X[2] ) *stride_in]+J;
1054  X2Y1Z3=&stIn[ ( Z[3]+Y[1]+X[2] ) *stride_in]+J;
1055  X2Y2Z1=&stIn[ ( Z[1]+Y[2]+X[2] ) *stride_in]+J;
1056  X2Y2Z2=&stIn[ ( Z[2]+Y[2]+X[2] ) *stride_in]+J;
1057  X2Y2Z3=&stIn[ ( Z[3]+Y[2]+X[2] ) *stride_in]+J;
1058  X2Y3Z1=&stIn[ ( Z[1]+Y[3]+X[2] ) *stride_in]+J;
1059  X2Y3Z2=&stIn[ ( Z[2]+Y[3]+X[2] ) *stride_in]+J;
1060  X2Y3Z3=&stIn[ ( Z[3]+Y[3]+X[2] ) *stride_in]+J;
1061 
1062 
1063  //X3Y1Z1=&stIn[ ( Z[1]+Y[1]+X[3] ) *stride_in]+J;
1064  X3Y1Z2=&stIn[ ( Z[2]+Y[1]+X[3] ) *stride_in]+J;
1065  //X3Y1Z3=&stIn[ ( Z[3]+Y[1]+X[3] ) *stride_in]+J;
1066  X3Y2Z1=&stIn[ ( Z[1]+Y[2]+X[3] ) *stride_in]+J;
1067  X3Y2Z2=&stIn[ ( Z[2]+Y[2]+X[3] ) *stride_in]+J;
1068  X3Y2Z3=&stIn[ ( Z[3]+Y[2]+X[3] ) *stride_in]+J;
1069  //X3Y3Z1=&stIn[ ( Z[1]+Y[3]+X[3] ) *stride_in]+J;
1070  X3Y3Z2=&stIn[ ( Z[2]+Y[3]+X[3] ) *stride_in]+J;
1071  //X3Y3Z3=&stIn[ ( Z[3]+Y[3]+X[3] ) *stride_in]+J;
1072 
1073 
1074  std::size_t offset= ( Z[2]+Y[2]+X[2] ) *stride_out+J1;
1075 
1076 
1077  for ( int M=- ( J1 ); M<= ( J1 ); M++ )
1078  {
1079  std::complex<T> & current=stOut[offset+M];
1080  if ( clear_field ) current=T ( 0 );
1081  std::complex<T> tmp=T ( 0 );
1082 
1083 
1084  if ( abs ( M+2 ) <=J ) // m1=-1 m2=M+1 M
1085  {
1086  int m2=M+2;
1087  std::complex<T> Dxx= ( X1Y2Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X3Y2Z2[m2] );
1088  std::complex<T> Dyy= ( X2Y1Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X2Y3Z2[m2] );
1089  std::complex<T> Dxy=- ( T ) 0.25* ( X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2] );
1090  tmp+= ( T ) 0.5*CGTable0[M]* ( ( Dxx-Dyy )-imag* ( ( T ) 2.0*Dxy ) );
1091  }
1092 
1093 
1094  if ( abs ( M+1 ) <=J ) // m1=-1 m2=M+1 M
1095  {
1096  int m2=M+1;
1097  std::complex<T> Dxz= ( T ) 0.25* ( X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2] );
1098  std::complex<T> Dyz=- ( T ) 0.25* ( X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2] );
1099  tmp+=CGTable1[M]* ( ( Dxz )-imag* ( Dyz ) );
1100  }
1101 
1102 
1103 
1104  if ( abs ( M ) <=J ) // m1=-1 m2=M+1 M
1105  {
1106  int m2=M;
1107  std::complex<T> Dxx= ( X1Y2Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X3Y2Z2[m2] );
1108  std::complex<T> Dyy= ( X2Y1Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X2Y3Z2[m2] );
1109  std::complex<T> Dzz= ( X2Y2Z1[m2]- ( T ) 2*X2Y2Z2[m2]+X2Y2Z3[m2] );
1110  const T SQRT6= ( T ) ( -1.0/std::sqrt ( 6.0 ) );
1111  tmp+=CGTable2[M]* ( ( Dxx+Dyy- ( T ) 2.0*Dzz ) * ( SQRT6 ) );
1112  }
1113 
1114  if ( abs ( M-1 ) <=J ) // m1=-1 m2=M+1 M
1115  {
1116  int m2=M-1;
1117  std::complex<T> Dxz= ( T ) 0.25* ( X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2] );
1118  std::complex<T> Dyz=- ( T ) 0.25* ( X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2] );
1119  tmp-=CGTable3[M]* ( ( Dxz ) +imag* ( Dyz ) );
1120  }
1121 
1122 
1123  if ( abs ( M-2 ) <=J ) // m1=-1 m2=M+1 M
1124  {
1125  int m2=M-2;
1126  std::complex<T> Dxx= ( X1Y2Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X3Y2Z2[m2] );
1127  std::complex<T> Dyy= ( X2Y1Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X2Y3Z2[m2] );
1128  std::complex<T> Dxy=- ( T ) 0.25* ( X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2] );
1129  tmp+= ( T ) 0.5*CGTable4[M]* ( ( Dxx-Dyy ) +imag* ( ( T ) 2.0*Dxy ) );
1130  }
1131 
1132  current+=tmp*alpha;
1133 
1134  }
1135 
1136  }
1137  }
1138  }
1139  delete [] CGTable;
1140  return STA_RESULT_SUCCESS;
1141 }
1142 
1143 
1144 
1145 
1146 
1147 
1148 
1149 
1150 
1151 
1152 template<typename T,typename S>
1153 STA_RESULT sta_laplace_1component (
1154  const S * stIn,std::complex<T> * stOut ,
1155  const std::size_t shape[],
1156  int type=1,
1157  std::complex<T> alpha= ( T ) 1.0,
1158  const T v_size[]=NULL,
1159  bool clear_field=false)
1160 {
1161 
1162 // if ( v_size!=NULL )
1163 // {
1164 // if (hanalysis::verbose>0)
1165 // printf ( "WARNING! element size is not considered yet!\n" );
1166 // }
1167 
1168 
1169 // printf("..");
1170 // return STA_RESULT_SUCCESS;
1171 
1172  T voxel_weights[4];
1173  voxel_weights[0]=voxel_weights[1]=voxel_weights[2]=voxel_weights[3]=1;
1174 
1175 
1176  if ( v_size!=NULL )// && ((v_size[0]!=1)||(v_size[1]!=1)||(v_size[2]!=1)))
1177  {
1178  if ( type!=1 )
1179  {
1180  if (hanalysis::verbose>0)
1181  printf ( "WARNING! element size is not considered yet!\n" );
1182  } else
1183  {
1184  type=2;
1185  voxel_weights[0]/=v_size[0]*v_size[0]; // Zdir
1186  voxel_weights[1]/=v_size[1]*v_size[1]; // Ydir
1187  voxel_weights[2]/=v_size[2]*v_size[2]; // Xdir
1188 
1189  }
1190  if (hanalysis::verbose>0)
1191  printf ( "v_size: [%f %f %f]\n",v_size[0],v_size[1],v_size[2] );
1192  }
1193 
1194  voxel_weights[3]*=2*(voxel_weights[0]+voxel_weights[1]+voxel_weights[2]); // Center
1195 
1196 
1197  if (hanalysis::verbose>0)
1198  printf ( "laplace_1: [%f %f %f %f]\n",voxel_weights[0],voxel_weights[1],voxel_weights[2],voxel_weights[3] );
1199 
1200  std::size_t jumpz=shape[1]*shape[2];
1201  std::size_t jumpy=shape[2];
1202 
1203  switch ( type )
1204  {
1205  case 0:
1206  {
1207  alpha*= ( T ) 0.2;
1208  #pragma omp parallel for num_threads(get_numCPUs())
1209  for ( std::size_t z=0; z<shape[0]; z++ )
1210  {
1211  std::size_t Z[3];
1212  Z[1]=z+shape[0];
1213  Z[0]=Z[1]-1;
1214  Z[2]=Z[1]+1;
1215  Z[0]%=shape[0];
1216  Z[1]%=shape[0];
1217  Z[2]%=shape[0];
1218 
1219  Z[0]*=jumpz;
1220  Z[1]*=jumpz;
1221  Z[2]*=jumpz;
1222 
1223  for ( std::size_t y=0; y<shape[1]; y++ )
1224  {
1225  std::size_t Y[3];
1226  Y[1]=y+shape[1];
1227  Y[0]=Y[1]-1;
1228  Y[2]=Y[1]+1;
1229  Y[0]%=shape[1];
1230  Y[1]%=shape[1];
1231  Y[2]%=shape[1];
1232 
1233  Y[0]*=jumpy;
1234  Y[1]*=jumpy;
1235  Y[2]*=jumpy;
1236 
1237  for ( std::size_t x=0; x<shape[2]; x++ )
1238  {
1239  std::size_t X[3];
1240  X[1]=x+shape[2];
1241  X[0]=X[1]-1;
1242  X[2]=X[1]+1;
1243  X[0]%=shape[2];
1244  X[1]%=shape[2];
1245  X[2]%=shape[2];
1246 
1247  std::size_t offset= ( Z[1]+Y[1]+X[1] );
1248  std::complex<T> & current=stOut[offset];
1249  if ( clear_field ) current=T ( 0 );
1250 
1251  current+=alpha* (
1252  stIn[ ( Z[0]+Y[0]+X[1] ) ] +
1253  stIn[ ( Z[0]+Y[1]+X[0] ) ] +
1254  stIn[ ( Z[0]+Y[1]+X[1] ) ] +
1255  stIn[ ( Z[0]+Y[1]+X[2] ) ] +
1256  stIn[ ( Z[0]+Y[2]+X[1] ) ] +
1257  stIn[ ( Z[1]+Y[0]+X[0] ) ] +
1258  stIn[ ( Z[1]+Y[0]+X[1] ) ] +
1259  stIn[ ( Z[1]+Y[0]+X[2] ) ] +
1260  stIn[ ( Z[1]+Y[1]+X[0] ) ]-
1261  T ( 18 ) *stIn[ ( Z[1]+Y[1]+X[1] ) ] +
1262  stIn[ ( Z[1]+Y[1]+X[2] ) ] +
1263  stIn[ ( Z[1]+Y[2]+X[0] ) ] +
1264  stIn[ ( Z[1]+Y[2]+X[1] ) ] +
1265  stIn[ ( Z[1]+Y[2]+X[2] ) ] +
1266  stIn[ ( Z[2]+Y[0]+X[1] ) ] +
1267  stIn[ ( Z[2]+Y[1]+X[0] ) ] +
1268  stIn[ ( Z[2]+Y[1]+X[1] ) ] +
1269  stIn[ ( Z[2]+Y[1]+X[2] ) ] +
1270  stIn[ ( Z[2]+Y[2]+X[1] ) ]
1271  );
1272 
1273  }
1274 
1275  }
1276  }
1277  }
1278  break;
1279  case 1:
1280  {
1281  alpha*=1;
1282  #pragma omp parallel for num_threads(get_numCPUs())
1283  for ( std::size_t z=0; z<shape[0]; z++ )
1284  {
1285  std::size_t Z[3];
1286  Z[1]=z+shape[0];
1287  Z[0]=Z[1]-1;
1288  Z[2]=Z[1]+1;
1289  Z[0]%=shape[0];
1290  Z[1]%=shape[0];
1291  Z[2]%=shape[0];
1292 
1293  Z[0]*=jumpz;
1294  Z[1]*=jumpz;
1295  Z[2]*=jumpz;
1296 
1297  for ( std::size_t y=0; y<shape[1]; y++ )
1298  {
1299  std::size_t Y[3];
1300  Y[1]=y+shape[1];
1301  Y[0]=Y[1]-1;
1302  Y[2]=Y[1]+1;
1303  Y[0]%=shape[1];
1304  Y[1]%=shape[1];
1305  Y[2]%=shape[1];
1306 
1307  Y[0]*=jumpy;
1308  Y[1]*=jumpy;
1309  Y[2]*=jumpy;
1310 
1311  for ( std::size_t x=0; x<shape[2]; x++ )
1312  {
1313  std::size_t X[3];
1314  X[1]=x+shape[2];
1315  X[0]=X[1]-1;
1316  X[2]=X[1]+1;
1317  X[0]%=shape[2];
1318  X[1]%=shape[2];
1319  X[2]%=shape[2];
1320 
1321 
1322  std::complex<T> & current=stOut[Z[1]+Y[1]+X[1]];
1323 
1324  if ( clear_field ) current=T ( 0 );
1325 
1326  current+=alpha* (
1327  stIn[ ( Z[0]+Y[1]+X[1] ) ] +
1328  stIn[ ( Z[1]+Y[0]+X[1] ) ] +
1329  stIn[ ( Z[1]+Y[1]+X[0] ) ] -
1330  T ( 6 ) *stIn[ ( Z[1]+Y[1]+X[1] ) ] +
1331  stIn[ ( Z[1]+Y[1]+X[2] ) ] +
1332  stIn[ ( Z[1]+Y[2]+X[1] ) ] +
1333  stIn[ ( Z[2]+Y[1]+X[1] ) ]
1334  );
1335 
1336  }
1337 
1338  }
1339  }
1340  }
1341  break;
1342 
1343 
1344  case 2:
1345  {
1346  alpha*=1;
1347  #pragma omp parallel for num_threads(get_numCPUs())
1348  for ( std::size_t z=0; z<shape[0]; z++ )
1349  {
1350  std::size_t Z[3];
1351  Z[1]=z+shape[0];
1352  Z[0]=Z[1]-1;
1353  Z[2]=Z[1]+1;
1354  Z[0]%=shape[0];
1355  Z[1]%=shape[0];
1356  Z[2]%=shape[0];
1357 
1358  Z[0]*=jumpz;
1359  Z[1]*=jumpz;
1360  Z[2]*=jumpz;
1361 
1362  for ( std::size_t y=0; y<shape[1]; y++ )
1363  {
1364  std::size_t Y[3];
1365  Y[1]=y+shape[1];
1366  Y[0]=Y[1]-1;
1367  Y[2]=Y[1]+1;
1368  Y[0]%=shape[1];
1369  Y[1]%=shape[1];
1370  Y[2]%=shape[1];
1371 
1372  Y[0]*=jumpy;
1373  Y[1]*=jumpy;
1374  Y[2]*=jumpy;
1375 
1376  for ( std::size_t x=0; x<shape[2]; x++ )
1377  {
1378  std::size_t X[3];
1379  X[1]=x+shape[2];
1380  X[0]=X[1]-1;
1381  X[2]=X[1]+1;
1382  X[0]%=shape[2];
1383  X[1]%=shape[2];
1384  X[2]%=shape[2];
1385 
1386 
1387  std::complex<T> & current=stOut[Z[1]+Y[1]+X[1]];
1388 
1389  if ( clear_field ) current=T ( 0 );
1390 
1391  current+=alpha* (
1392  voxel_weights[0]*stIn[ ( Z[0]+Y[1]+X[1] ) ] +
1393  voxel_weights[1]*stIn[ ( Z[1]+Y[0]+X[1] ) ] +
1394  voxel_weights[2]*stIn[ ( Z[1]+Y[1]+X[0] ) ] -
1395  voxel_weights[3]*stIn[ ( Z[1]+Y[1]+X[1] ) ] +
1396  voxel_weights[2]*stIn[ ( Z[1]+Y[1]+X[2] ) ] +
1397  voxel_weights[1]*stIn[ ( Z[1]+Y[2]+X[1] ) ] +
1398  voxel_weights[0]*stIn[ ( Z[2]+Y[1]+X[1] ) ]
1399  );
1400 
1401  }
1402 
1403  }
1404  }
1405  }
1406  break;
1407 
1408  default:
1409  printf ( "unsoported operator\n" );
1410  }
1411  return STA_RESULT_SUCCESS;
1412 }
1413 
1414 
1415 
1416 
1417 
1418 template<typename T,typename S>
1419 STA_RESULT sta_laplace_Ncomponents_C (
1420  const S * stIn,std::complex<T> * stOut ,
1421  const std::size_t shape[],
1422  int dim=0, // number of components
1423  int type=1,
1424  std::complex<T> alpha=1,
1425  const T v_size[]=NULL,
1426  int stride_in = -1,
1427  int stride_out = -1,
1428  bool clear_field=false)
1429 
1430 {
1431 
1432  if ( v_size!=NULL )
1433  {
1434  if (hanalysis::verbose>0)
1435  printf ( "WARNING! element size is not considered yet!\n" );
1436  }
1437 
1438 
1439 
1440 // /*T voxel_size[3];
1441 // voxel_size[0]=voxel_size[1]=voxel_size[2]=1;*/
1442  T voxel_weights[4];
1443  voxel_weights[0]=voxel_weights[1]=voxel_weights[2]=voxel_weights[3]=1;
1444 
1445 // if ( v_size!=NULL )
1446 // {
1447 // voxel_size[0]/=v_size[0]; // Zdir
1448 // voxel_size[1]/=v_size[1]; // Ydir
1449 // voxel_size[2]/=v_size[2]; // Xdir
1450 // }
1451 
1452  if ( v_size!=NULL )// && ((v_size[0]!=1)||(v_size[1]!=1)||(v_size[2]!=1)))
1453  {
1454  if ( type!=1 )
1455  {
1456  if (hanalysis::verbose>0)
1457  printf ( "WARNING! element size is not considered yet!\n" );
1458  } else
1459  {
1460 
1461  voxel_weights[0]/=v_size[0]*v_size[0]; // Zdir
1462  voxel_weights[1]/=v_size[1]*v_size[1]; // Ydir
1463  voxel_weights[2]/=v_size[2]*v_size[2]; // Xdir
1464 
1465  }
1466  if (hanalysis::verbose>0)
1467  printf ( "v_size: [%f %f %f]\n",v_size[0],v_size[1],v_size[2] );
1468  }
1469 
1470  voxel_weights[3]*=2*(voxel_weights[0]+voxel_weights[1]+voxel_weights[2]); // Center
1471 
1472 
1473  if (hanalysis::verbose>0)
1474  printf ( "laplace_C: [%f %f %f %f]\n",voxel_weights[0],voxel_weights[1],voxel_weights[2],voxel_weights[3] );
1475 
1476 
1477 
1478  if ( stride_in == -1 )
1479  stride_in = dim;
1480  if ( stride_out == -1 )
1481  stride_out = dim;
1482 
1483 
1484  std::size_t jumpz=shape[1]*shape[2];
1485  std::size_t jumpy=shape[2];
1486 
1487  switch ( type )
1488  {
1489  case 0:
1490  {
1491  alpha*= ( T ) 0.2;
1492  #pragma omp parallel for num_threads(get_numCPUs())
1493  for ( std::size_t z=0; z<shape[0]; z++ )
1494  {
1495  std::size_t Z[3];
1496  Z[1]=z+shape[0];
1497  Z[0]=Z[1]-1;
1498  Z[2]=Z[1]+1;
1499  Z[0]%=shape[0];
1500  Z[1]%=shape[0];
1501  Z[2]%=shape[0];
1502 
1503  Z[0]*=jumpz;
1504  Z[1]*=jumpz;
1505  Z[2]*=jumpz;
1506 
1507  for ( std::size_t y=0; y<shape[1]; y++ )
1508  {
1509  std::size_t Y[3];
1510  Y[1]=y+shape[1];
1511  Y[0]=Y[1]-1;
1512  Y[2]=Y[1]+1;
1513  Y[0]%=shape[1];
1514  Y[1]%=shape[1];
1515  Y[2]%=shape[1];
1516 
1517  Y[0]*=jumpy;
1518  Y[1]*=jumpy;
1519  Y[2]*=jumpy;
1520 
1521  for ( std::size_t x=0; x<shape[2]; x++ )
1522  {
1523  std::size_t X[3];
1524  X[1]=x+shape[2];
1525  X[0]=X[1]-1;
1526  X[2]=X[1]+1;
1527  X[0]%=shape[2];
1528  X[1]%=shape[2];
1529  X[2]%=shape[2];
1530 
1531  for ( int j=0; j<dim; j++ )
1532  {
1533  std::size_t offset= ( Z[1]+Y[1]+X[1] ) *stride_out+j;
1534  std::complex<T> & current=stOut[offset];
1535  if ( clear_field ) current=T ( 0 );
1536 
1537  current+=alpha* (
1538  stIn[ ( Z[0]+Y[0]+X[1] ) *stride_in+j] +
1539  stIn[ ( Z[0]+Y[1]+X[0] ) *stride_in+j] +
1540  stIn[ ( Z[0]+Y[1]+X[1] ) *stride_in+j] +
1541  stIn[ ( Z[0]+Y[1]+X[2] ) *stride_in+j] +
1542  stIn[ ( Z[0]+Y[2]+X[1] ) *stride_in+j] +
1543  stIn[ ( Z[1]+Y[0]+X[0] ) *stride_in+j] +
1544  stIn[ ( Z[1]+Y[0]+X[1] ) *stride_in+j] +
1545  stIn[ ( Z[1]+Y[0]+X[2] ) *stride_in+j] +
1546  stIn[ ( Z[1]+Y[1]+X[0] ) *stride_in+j]-
1547  T ( 18 ) *stIn[ ( Z[1]+Y[1]+X[1] ) *stride_in+j] +
1548  stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in+j] +
1549  stIn[ ( Z[1]+Y[2]+X[0] ) *stride_in+j] +
1550  stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in+j] +
1551  stIn[ ( Z[1]+Y[2]+X[2] ) *stride_in+j]+
1552  stIn[ ( Z[2]+Y[0]+X[1] ) *stride_in+j] +
1553  stIn[ ( Z[2]+Y[1]+X[0] ) *stride_in+j] +
1554  stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in+j] +
1555  stIn[ ( Z[2]+Y[1]+X[2] ) *stride_in+j] +
1556  stIn[ ( Z[2]+Y[2]+X[1] ) *stride_in+j]
1557  );
1558  }
1559  }
1560 
1561  }
1562  }
1563  }
1564  break;
1565  case 1:
1566  {
1567  alpha*= ( T ) 1.0;
1568  #pragma omp parallel for num_threads(get_numCPUs())
1569  for ( std::size_t z=0; z<shape[0]; z++ )
1570  {
1571  std::size_t Z[3];
1572  Z[1]=z+shape[0];
1573  Z[0]=Z[1]-1;
1574  Z[2]=Z[1]+1;
1575  Z[0]%=shape[0];
1576  Z[1]%=shape[0];
1577  Z[2]%=shape[0];
1578 
1579  Z[0]*=jumpz;
1580  Z[1]*=jumpz;
1581  Z[2]*=jumpz;
1582 
1583 
1584 
1585  for ( std::size_t y=0; y<shape[1]; y++ )
1586  {
1587  std::size_t Y[3];
1588  Y[1]=y+shape[1];
1589  Y[0]=Y[1]-1;
1590  Y[2]=Y[1]+1;
1591  Y[0]%=shape[1];
1592  Y[1]%=shape[1];
1593  Y[2]%=shape[1];
1594 
1595  Y[0]*=jumpy;
1596  Y[1]*=jumpy;
1597  Y[2]*=jumpy;
1598 
1599  for ( std::size_t x=0; x<shape[2]; x++ )
1600  {
1601  std::size_t X[3];
1602  X[1]=x+shape[2];
1603  X[0]=X[1]-1;
1604  X[2]=X[1]+1;
1605  X[0]%=shape[2];
1606  X[1]%=shape[2];
1607  X[2]%=shape[2];
1608 
1609  for ( int j=0; j<dim; j++ )
1610  {
1611 
1612  std::size_t offset= ( Z[1]+Y[1]+X[1] ) *stride_out+j;
1613  std::complex<T> & current=stOut[offset];
1614  if ( clear_field ) current=T ( 0 );
1615 
1616 /* current+=alpha* (
1617  stIn[ ( Z[0]+Y[1]+X[1] ) *stride_in+j] +
1618  stIn[ ( Z[1]+Y[0]+X[1] ) *stride_in+j] +
1619  stIn[ ( Z[1]+Y[1]+X[0] ) *stride_in+j] -
1620  T ( 6 ) *stIn[ ( Z[1]+Y[1]+X[1] ) *stride_in+j] +
1621  stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in+j] +
1622  stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in+j] +
1623  stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in+j]
1624  ); */
1625 
1626  current+=alpha* (
1627  voxel_weights[0]*stIn[ ( Z[0]+Y[1]+X[1] ) *stride_in+j] +
1628  voxel_weights[1]*stIn[ ( Z[1]+Y[0]+X[1] ) *stride_in+j] +
1629  voxel_weights[2]*stIn[ ( Z[1]+Y[1]+X[0] ) *stride_in+j] -
1630  voxel_weights[3]*stIn[ ( Z[1]+Y[1]+X[1] ) *stride_in+j] +
1631  voxel_weights[2]*stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in+j] +
1632  voxel_weights[1]*stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in+j] +
1633  voxel_weights[0]*stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in+j]
1634  );
1635 
1636 
1637 
1638 
1639 
1640  }
1641  }
1642 
1643  }
1644  }
1645  }
1646  break;
1647  default:
1648  printf ( "unsupported operator\n" );
1649  }
1650 
1651 
1652 
1653 
1654  return STA_RESULT_SUCCESS;
1655 }
1656 
1657 
1658 
1659 
1660 template<typename T>
1661 STA_RESULT sta_laplace_Ncomponents_R (
1662  const std::complex<T> * stIn,
1663  std::complex<T> * stOut ,
1664  const std::size_t shape[],
1665  int dim=0, // number of components
1666  int type=1,
1667  T alpha=1,
1668  const T v_size[]=NULL,
1669  int stride_in = -1,
1670  int stride_out = -1,
1671  bool clear_field=false)
1672 
1673 {
1674 
1675 // if ( v_size!=NULL )
1676 // {
1677 // if (hanalysis::verbose>0)
1678 // printf ( "WARNING! element size is not considered yet!\n" );
1679 // }
1680 
1681 
1682 
1683  T voxel_weights[4];
1684  voxel_weights[0]=voxel_weights[1]=voxel_weights[2]=voxel_weights[3]=1;
1685 // if ( v_size!=NULL )
1686 // {
1687 // voxel_size[0]/=v_size[0]; // Zdir
1688 // voxel_size[1]/=v_size[1]; // Ydir
1689 // voxel_size[2]/=v_size[2]; // Xdir
1690 // }
1691 
1692 
1693 
1694  if ( v_size!=NULL )// && ((v_size[0]!=1)||(v_size[1]!=1)||(v_size[2]!=1)))
1695  {
1696  if ( type!=1 )
1697  {
1698  if (hanalysis::verbose>0)
1699  printf ( "WARNING! element size is not considered yet!\n" );
1700  } else
1701  {
1702 
1703  voxel_weights[0]/=v_size[0]*v_size[0]; // Zdir
1704  voxel_weights[1]/=v_size[1]*v_size[1]; // Ydir
1705  voxel_weights[2]/=v_size[2]*v_size[2]; // Xdir
1706  }
1707  if (hanalysis::verbose>0)
1708  printf ( "v_size: [%f %f %f]\n",v_size[0],v_size[1],v_size[2] );
1709  }
1710 
1711  voxel_weights[3]*=2*(voxel_weights[0]+voxel_weights[1]+voxel_weights[2]);
1712 
1713 
1714  if (hanalysis::verbose>0)
1715  printf ( "laplace_R: [%f %f %f %f]\n",voxel_weights[0],voxel_weights[1],voxel_weights[2],voxel_weights[3] );
1716 
1717 
1718 
1719 
1720  if ( stride_in == -1 )
1721  stride_in = dim;
1722  if ( stride_out == -1 )
1723  stride_out = dim;
1724 
1725 
1726  std::size_t jumpz=shape[1]*shape[2];
1727  std::size_t jumpy=shape[2];
1728 
1729  stride_in*=2;
1730  stride_out*=2;
1731  dim*=2;
1732 
1733  const T * stIn_r=(const T*) stIn;
1734  T * stOut_r=( T*) stOut;
1735 
1736  switch ( type )
1737  {
1738  case 0:
1739  {
1740  alpha*= ( T ) 0.2;
1741  #pragma omp parallel for num_threads(get_numCPUs())
1742  for ( std::size_t z=0; z<shape[0]; z++ )
1743  {
1744  const T * p001;
1745  const T * p010;
1746  const T * p011;
1747  const T * p012;
1748  const T * p021;
1749  const T * p100;
1750  const T * p101;
1751  const T * p102;
1752  const T * p110;
1753 
1754  const T * p111;
1755 
1756  const T * p112;
1757  const T * p120;
1758  const T * p121;
1759  const T * p122;
1760  const T * p201;
1761  const T * p210;
1762  const T * p211;
1763  const T * p212;
1764  const T * p221;
1765 
1766 
1767 
1768 
1769 
1770  std::size_t Z[3];
1771  Z[1]=z+shape[0];
1772  Z[0]=Z[1]-1;
1773  Z[2]=Z[1]+1;
1774  Z[0]%=shape[0];
1775  Z[1]%=shape[0];
1776  Z[2]%=shape[0];
1777 
1778  Z[0]*=jumpz;
1779  Z[1]*=jumpz;
1780  Z[2]*=jumpz;
1781 
1782  for ( std::size_t y=0; y<shape[1]; y++ )
1783  {
1784  std::size_t Y[3];
1785  Y[1]=y+shape[1];
1786  Y[0]=Y[1]-1;
1787  Y[2]=Y[1]+1;
1788  Y[0]%=shape[1];
1789  Y[1]%=shape[1];
1790  Y[2]%=shape[1];
1791 
1792  Y[0]*=jumpy;
1793  Y[1]*=jumpy;
1794  Y[2]*=jumpy;
1795 
1796  for ( std::size_t x=0; x<shape[2]; x++ )
1797  {
1798  std::size_t X[3];
1799  X[1]=x+shape[2];
1800  X[0]=X[1]-1;
1801  X[2]=X[1]+1;
1802  X[0]%=shape[2];
1803  X[1]%=shape[2];
1804  X[2]%=shape[2];
1805 
1806 
1807 // T * p001=stIn_r+ ( Z[0]+Y[0]+X[1] ) *stride_in;
1808 // T * p010=stIn_r+ ( Z[0]+Y[1]+X[0] ) *stride_in;
1809 // T * p011=stIn_r+ ( Z[0]+Y[1]+X[1] ) *stride_in;
1810 // T * p012=stIn_r+ ( Z[0]+Y[1]+X[2] ) *stride_in;
1811 // T * p021=stIn_r+ ( Z[0]+Y[2]+X[1] ) *stride_in;
1812 // T * p100=stIn_r+ ( Z[1]+Y[0]+X[0] ) *stride_in;
1813 // T * p101=stIn_r+ ( Z[1]+Y[0]+X[1] ) *stride_in;
1814 // T * p102=stIn_r+ ( Z[1]+Y[0]+X[2] ) *stride_in;
1815 // T * p110=stIn_r+ ( Z[1]+Y[1]+X[0] ) *stride_in;
1816 //
1817 // T * p111=stIn_r+ ( Z[1]+Y[1]+X[1] ) *stride_in;
1818 //
1819 // T * p112=stIn_r+ ( Z[1]+Y[1]+X[2] ) *stride_in;
1820 // T * p120=stIn_r+ ( Z[1]+Y[2]+X[0] ) *stride_in;
1821 // T * p121=stIn_r+ ( Z[1]+Y[2]+X[1] ) *stride_in;
1822 // T * p122=stIn_r+ ( Z[1]+Y[2]+X[2] ) *stride_in;
1823 // T * p201=stIn_r+ ( Z[2]+Y[0]+X[1] ) *stride_in;
1824 // T * p210=stIn_r+ ( Z[2]+Y[1]+X[0] ) *stride_in;
1825 // T * p211=stIn_r+ ( Z[2]+Y[1]+X[1] ) *stride_in;
1826 // T * p212=stIn_r+ ( Z[2]+Y[1]+X[2] ) *stride_in;
1827 // T * p221=stIn_r+ ( Z[2]+Y[2]+X[1] ) *stride_in;
1828 
1829 
1830  p001=stIn_r+ ( Z[0]+Y[0]+X[1] ) *stride_in;
1831  p010=stIn_r+ ( Z[0]+Y[1]+X[0] ) *stride_in;
1832  p011=stIn_r+ ( Z[0]+Y[1]+X[1] ) *stride_in;
1833  p012=stIn_r+ ( Z[0]+Y[1]+X[2] ) *stride_in;
1834  p021=stIn_r+ ( Z[0]+Y[2]+X[1] ) *stride_in;
1835  p100=stIn_r+ ( Z[1]+Y[0]+X[0] ) *stride_in;
1836  p101=stIn_r+ ( Z[1]+Y[0]+X[1] ) *stride_in;
1837  p102=stIn_r+ ( Z[1]+Y[0]+X[2] ) *stride_in;
1838  p110=stIn_r+ ( Z[1]+Y[1]+X[0] ) *stride_in;
1839 
1840  p111=stIn_r+ ( Z[1]+Y[1]+X[1] ) *stride_in;
1841 
1842  p112=stIn_r+ ( Z[1]+Y[1]+X[2] ) *stride_in;
1843  p120=stIn_r+ ( Z[1]+Y[2]+X[0] ) *stride_in;
1844  p121=stIn_r+ ( Z[1]+Y[2]+X[1] ) *stride_in;
1845  p122=stIn_r+ ( Z[1]+Y[2]+X[2] ) *stride_in;
1846  p201=stIn_r+ ( Z[2]+Y[0]+X[1] ) *stride_in;
1847  p210=stIn_r+ ( Z[2]+Y[1]+X[0] ) *stride_in;
1848  p211=stIn_r+ ( Z[2]+Y[1]+X[1] ) *stride_in;
1849  p212=stIn_r+ ( Z[2]+Y[1]+X[2] ) *stride_in;
1850  p221=stIn_r+ ( Z[2]+Y[2]+X[1] ) *stride_in;
1851 
1852 
1853  T * current=stOut_r+( Z[1]+Y[1]+X[1] ) *stride_out;
1854 
1855  for ( int j=0; j<dim; j++ )
1856  {
1857 
1858  if ( clear_field ) (*current)=T ( 0 );
1859 
1860  (*current++)+=alpha* (
1861  (*p001++) +
1862  (*p010++) +
1863  (*p011++) +
1864  (*p012++) +
1865  (*p021++) +
1866  (*p100++) +
1867  (*p101++) +
1868  (*p102++) +
1869  (*p110++) -
1870  T ( 18 ) * (*p111++) +
1871  (*p112++) +
1872  (*p120++) +
1873  (*p121++) +
1874  (*p122++) +
1875  (*p201++) +
1876  (*p210++) +
1877  (*p211++) +
1878  (*p212++) +
1879  (*p221++)
1880  );
1881  }
1882 
1883 
1884 // for ( int j=0;j<dim;j++ )
1885 // {
1886 //
1887 // T * current=stOut_r+( Z[1]+Y[1]+X[1] ) *stride_out+j;
1888 // if ( clear_field ) (*current)=T ( 0 );
1889 //
1890 // (*current)+=alpha* (
1891 // stIn_r[ ( Z[0]+Y[0]+X[1] ) *stride_in+j] +
1892 // stIn_r[ ( Z[0]+Y[1]+X[0] ) *stride_in+j] +
1893 // stIn_r[ ( Z[0]+Y[1]+X[1] ) *stride_in+j] +
1894 // stIn_r[ ( Z[0]+Y[1]+X[2] ) *stride_in+j] +
1895 // stIn_r[ ( Z[0]+Y[2]+X[1] ) *stride_in+j] +
1896 // stIn_r[ ( Z[1]+Y[0]+X[0] ) *stride_in+j] +
1897 // stIn_r[ ( Z[1]+Y[0]+X[1] ) *stride_in+j] +
1898 // stIn_r[ ( Z[1]+Y[0]+X[2] ) *stride_in+j] +
1899 // stIn_r[ ( Z[1]+Y[1]+X[0] ) *stride_in+j]-
1900 // T ( 18 ) *stIn_r[ ( Z[1]+Y[1]+X[1] ) *stride_in+j] +
1901 // stIn_r[ ( Z[1]+Y[1]+X[2] ) *stride_in+j] +
1902 // stIn_r[ ( Z[1]+Y[2]+X[0] ) *stride_in+j] +
1903 // stIn_r[ ( Z[1]+Y[2]+X[1] ) *stride_in+j] +
1904 // stIn_r[ ( Z[1]+Y[2]+X[2] ) *stride_in+j]+
1905 // stIn_r[ ( Z[2]+Y[0]+X[1] ) *stride_in+j] +
1906 // stIn_r[ ( Z[2]+Y[1]+X[0] ) *stride_in+j] +
1907 // stIn_r[ ( Z[2]+Y[1]+X[1] ) *stride_in+j] +
1908 // stIn_r[ ( Z[2]+Y[1]+X[2] ) *stride_in+j] +
1909 // stIn_r[ ( Z[2]+Y[2]+X[1] ) *stride_in+j]
1910 // );
1911 // }
1912  }
1913 
1914  }
1915  }
1916  }
1917  break;
1918  case 1:
1919  {
1920  alpha*= ( T ) 1.0;
1921  #pragma omp parallel for num_threads(get_numCPUs())
1922  for ( std::size_t z=0; z<shape[0]; z++ )
1923  {
1924  const T * p011;
1925  const T * p101;
1926  const T * p110;
1927  const T * p111;
1928  const T * p112;
1929  const T * p121;
1930  const T * p211;
1931 
1932 
1933  std::size_t Z[3];
1934  Z[1]=z+shape[0];
1935  Z[0]=Z[1]-1;
1936  Z[2]=Z[1]+1;
1937  Z[0]%=shape[0];
1938  Z[1]%=shape[0];
1939  Z[2]%=shape[0];
1940 
1941  Z[0]*=jumpz;
1942  Z[1]*=jumpz;
1943  Z[2]*=jumpz;
1944 
1945 
1946 
1947  for ( std::size_t y=0; y<shape[1]; y++ )
1948  {
1949  std::size_t Y[3];
1950  Y[1]=y+shape[1];
1951  Y[0]=Y[1]-1;
1952  Y[2]=Y[1]+1;
1953  Y[0]%=shape[1];
1954  Y[1]%=shape[1];
1955  Y[2]%=shape[1];
1956 
1957  Y[0]*=jumpy;
1958  Y[1]*=jumpy;
1959  Y[2]*=jumpy;
1960 
1961  for ( std::size_t x=0; x<shape[2]; x++ )
1962  {
1963  std::size_t X[3];
1964  X[1]=x+shape[2];
1965  X[0]=X[1]-1;
1966  X[2]=X[1]+1;
1967  X[0]%=shape[2];
1968  X[1]%=shape[2];
1969  X[2]%=shape[2];
1970 
1971  p011=stIn_r+ ( Z[0]+Y[1]+X[1] ) *stride_in;
1972  p101=stIn_r+ ( Z[1]+Y[0]+X[1] ) *stride_in;
1973  p110=stIn_r+ ( Z[1]+Y[1]+X[0] ) *stride_in;
1974  p111=stIn_r+ ( Z[1]+Y[1]+X[1] ) *stride_in;
1975  p112=stIn_r+ ( Z[1]+Y[1]+X[2] ) *stride_in;
1976  p121=stIn_r+ ( Z[1]+Y[2]+X[1] ) *stride_in;
1977  p211=stIn_r+ ( Z[2]+Y[1]+X[1] ) *stride_in;
1978 
1979 // const T * p011=stIn_r+ ( Z[0]+Y[1]+X[1] ) *stride_in;
1980 // const T * p101=stIn_r+ ( Z[1]+Y[0]+X[1] ) *stride_in;
1981 // const T * p110=stIn_r+ ( Z[1]+Y[1]+X[0] ) *stride_in;
1982 // const T * p111=stIn_r+ ( Z[1]+Y[1]+X[1] ) *stride_in;
1983 // const T * p112=stIn_r+ ( Z[1]+Y[1]+X[2] ) *stride_in;
1984 // const T * p121=stIn_r+ ( Z[1]+Y[2]+X[1] ) *stride_in;
1985 // const T * p211=stIn_r+ ( Z[2]+Y[1]+X[1] ) *stride_in;
1986 
1987  T * current=stOut_r+( Z[1]+Y[1]+X[1] ) *stride_out;
1988 
1989  for ( int j=0; j<dim; j++ )
1990  {
1991  if ( clear_field ) (*current)=T ( 0 );
1992 
1993 // (*current++)+=alpha* (
1994 // (*p011++) +
1995 // (*p101++) +
1996 // (*p110++) -
1997 // T ( 6 ) *(*p111++) +
1998 // (*p112++) +
1999 // (*p121++) +
2000 // (*p211++)
2001 // );
2002 
2003  (*current++)+=alpha* (
2004  voxel_weights[0]*(*p011++) +
2005  voxel_weights[1]*(*p101++) +
2006  voxel_weights[2]*(*p110++) -
2007  voxel_weights[3]*(*p111++) +
2008  voxel_weights[2]*(*p112++) +
2009  voxel_weights[1]*(*p121++) +
2010  voxel_weights[0]*(*p211++)
2011  );
2012 
2013 
2014  }
2015 
2016 
2017 // for ( int j=0;j<dim;j++ )
2018 // {
2019 //
2020 // T * current=stOut_r+( Z[1]+Y[1]+X[1] ) *stride_out+j;
2021 // if ( clear_field ) (*current)=T ( 0 );
2022 //
2023 // (*current)+=alpha* (
2024 // stIn_r[ ( Z[0]+Y[1]+X[1] ) *stride_in+j] +
2025 // stIn_r[ ( Z[1]+Y[0]+X[1] ) *stride_in+j] +
2026 // stIn_r[ ( Z[1]+Y[1]+X[0] ) *stride_in+j] -
2027 // T ( 6 ) *stIn_r[ ( Z[1]+Y[1]+X[1] ) *stride_in+j] +
2028 // stIn_r[ ( Z[1]+Y[1]+X[2] ) *stride_in+j] +
2029 // stIn_r[ ( Z[1]+Y[2]+X[1] ) *stride_in+j] +
2030 // stIn_r[ ( Z[2]+Y[1]+X[1] ) *stride_in+j]
2031 // );
2032 //
2033 //
2034 // }
2035  }
2036 
2037  }
2038  }
2039  }
2040  break;
2041  default:
2042  printf ( "unsupported operator\n" );
2043  }
2044 
2045 
2046 
2047 
2048  return STA_RESULT_SUCCESS;
2049 }
2050 
2051 
2052 
2053 
2054 
2055 
2056 
2057 
2058 
2059 
2060 
2061 
2062 
2063 #ifdef _STA_LINK_FFTW
2064 
2065 
2066 
2067 
2068  STA_RESULT fft ( const std::complex<double> * IN,
2069  std::complex<double> * OUT,
2070  int shape[],int numComponents,
2071  bool forward,
2072  int flag=FFTW_ESTIMATE )
2073  {
2074  #ifdef _STA_FFT_MULTI_THREAD
2075  fftw_init_threads();
2076  fftw_plan_with_nthreads ( get_numCPUs() );
2077  if ( verbose>0 ) printf ( "FFTW with %d threads \n",get_numCPUs() );
2078  #else
2079  if ( verbose>0 ) printf ( "FFTW is single threaded\n" );
2080  #endif
2081 
2082 
2083 
2084 
2085  int rank=3;
2086  int * n=shape;
2087  int howmany=numComponents;
2088  fftw_complex * in = ( fftw_complex * ) IN;
2089  int * inembed=NULL;
2090  int istride=numComponents;
2091  int idist=1;
2092  fftw_complex * out = ( fftw_complex * ) OUT;
2093  int * onembed=inembed;
2094  int odist=idist;
2095  int ostride=istride;
2096  int sign=FFTW_FORWARD;
2097  if ( !forward ) sign=FFTW_BACKWARD;
2098  unsigned flags=flag | FFTW_PRESERVE_INPUT;//FFTW_ESTIMATE;//FFTW_MEASURE | FFTW_PRESERVE_INPUT;//FFTW_ESTIMATE; //FFTW_MEASURE
2099 
2100  // switch (flags)
2101  // {
2102  // case FFTW_ESTIMATE:
2103  // printf("FFTW_ESTIMATE");
2104  // break;
2105  // case FFTW_MEASURE:
2106  // printf("FFTW_MEASURE");
2107  // break;
2108  // }
2109 
2110 
2111 
2112  #if defined (__linux__) && ! defined (_STA_MWFFTW)
2113  //#ifdef __linux__
2114  char buffer[255];
2115  gethostname ( buffer,255 );
2116  std::string s;
2117  s=std::string ( getenv ( "HOME" ) ) +std::string ( "/.stensor_mywisdom_" ) +std::string ( buffer ) +".wisdom";
2118 
2119  // if (fftw_import_wisdom_from_filename(s.c_str())==0)
2120  // printf ( "Error reading wisdom file: %s!\n",s.c_str() );
2121 
2122  FILE *ifp;
2123  ifp = fopen ( s.c_str(),"r" );
2124  if ( ifp!=NULL )
2125  {
2126  if ( 0==fftw_import_wisdom_from_file ( ifp ) )
2127  printf ( "Error reading wisdom file: %s!\n",s.c_str() );
2128  fclose ( ifp );
2129  }
2130  else printf ( "Wisdom file does not exist!\n" );
2131 
2132  #endif
2133 
2134  fftw_plan plan=fftw_plan_many_dft ( rank,n,howmany,in,inembed,istride, idist,out,onembed, ostride, odist, sign, flags | FFTW_PRESERVE_INPUT );
2135  if ( plan==NULL )
2136  {
2137  printf ( "no plan\n" );
2138  return STA_RESULT_FAILED;
2139  }
2140 
2141  fftw_execute_dft ( plan,in, out );
2142 
2143  //#ifdef __linux__
2144  #if defined (__linux__) && ! defined (_STA_MWFFTW)
2145  // fftw_export_wisdom_to_file(s.c_str());
2146  ifp = fopen ( s.c_str(),"w" );
2147  if ( ifp!=NULL )
2148  {
2149  fftw_export_wisdom_to_file ( ifp );
2150  fclose ( ifp );
2151  }
2152  else printf ( "Error creating file!\n" );
2153 
2154  #endif
2155 
2156  fftw_destroy_plan ( plan );
2157 
2158  return STA_RESULT_SUCCESS;
2159  //fftw_cleanup()
2160  //fftw_cleanup_threads();
2161 
2162  }
2163 
2164 
2165 
2166  STA_RESULT fft ( const std::complex<float> * IN,
2167  std::complex<float> * OUT,
2168  int shape[],
2169  int numComponents,
2170  bool forward,
2171  int flag=FFTW_ESTIMATE )
2172  {
2173  #ifdef _STA_FFT_MULTI_THREAD
2174  fftwf_init_threads();
2175  fftwf_plan_with_nthreads ( get_numCPUs() );
2176  if ( verbose>0 ) printf ( "FFTW with %d threads \n",get_numCPUs() );
2177  #else
2178  if ( verbose>0 ) printf ( "FFTW is single threaded\n" );
2179  #endif
2180 
2181  int rank=3;
2182  int * n=shape;
2183  int howmany=numComponents;
2184  fftwf_complex * in = ( fftwf_complex * ) IN;
2185  int * inembed=NULL;
2186  int istride=numComponents;
2187  int idist=1;
2188  fftwf_complex * out = ( fftwf_complex * ) OUT;
2189  int * onembed=inembed;
2190  int odist=idist;
2191  int ostride=istride;
2192  int sign=FFTW_FORWARD;
2193  if ( !forward ) sign=FFTW_BACKWARD;
2194  unsigned flags=flag | FFTW_PRESERVE_INPUT;//FFTW_ESTIMATE;//FFTW_MEASURE | FFTW_PRESERVE_INPUT;//FFTW_ESTIMATE; //FFTW_MEASURE
2195 
2196 
2197  //#ifdef __linux__
2198  #if defined (__linux__) && ! defined (_STA_MWFFTW)
2199  char buffer[255];
2200  gethostname ( buffer,255 );
2201  std::string s;
2202  s=std::string ( getenv ( "HOME" ) ) +std::string ( "/.stensor_mywisdom_" ) +std::string ( buffer ) +"_single.wisdom";
2203 
2204 
2205  // if (fftwf_import_wisdom_from_filename(s.c_str())==0)
2206  // printf ( "Error reading wisdom file: %s!\n",s.c_str() );
2207 
2208  FILE *ifp;
2209  ifp = fopen ( s.c_str(),"r" );
2210  if ( ifp!=NULL )
2211  {
2212  if ( 0==fftwf_import_wisdom_from_file ( ifp ) )
2213  printf ( "Error reading wisdom file: %s!\n",s.c_str() );
2214  fclose ( ifp );
2215  }
2216  else printf ( "Wisdom file does not exist!\n" );
2217 
2218  #endif
2219 
2220  fftwf_plan plan=fftwf_plan_many_dft ( rank,n,howmany,in,inembed,istride, idist,out,onembed, ostride, odist, sign, flags | FFTW_PRESERVE_INPUT );
2221  if ( plan==NULL )
2222  {
2223  printf ( "no plan\n" );
2224  return STA_RESULT_FAILED;
2225  }
2226 
2227  fftwf_execute_dft ( plan,in, out );
2228 
2229 
2230  //#ifdef __linux__
2231  #if defined (__linux__) && ! defined (_STA_MWFFTW)
2232  // fftwf_export_wisdom_to_file(s.c_str());
2233  ifp = fopen ( s.c_str(),"w" );
2234  if ( ifp!=NULL )
2235  {
2236  fftwf_export_wisdom_to_file ( ifp );
2237  fclose ( ifp );
2238  }
2239  else printf ( "Error creating file!\n" );
2240 
2241  #endif
2242 
2243  fftwf_destroy_plan ( plan );
2244  return STA_RESULT_SUCCESS;
2245  }
2246 
2247 
2248 
2249 #else
2250  #if ! defined (_STA_CUDA)
2251 
2252  #include "matlab_fft.icc"
2253 
2254  #endif
2255 #endif
2256 
2257 
2258 
2259 
2260 
2261 
2262 
2263 
2264 
2265 
2266 
2267 
2268 
2269 
2270 
2271 
2272 
2273 
2274 
2275 
2276 
2277 
2278 
2279 
2280 /*#####################################################
2281 
2282 
2283  FIELD in V^l
2284 
2285 
2286 #######################################################*/
2287 
2288 template<typename T>
2289 T * sta_th_precomputeCGcoefficients_R(int J,int L, int j, T norm)
2290 {
2291  std::size_t count=0;
2292 
2293  int rank=j+L;
2294  for (int m=-rank; m<=0; m++)
2295  {
2296  for (int M=-J; M<=J; M++)
2297  {
2298  //if ((rank+L>=J)&&(std::abs(rank-L)<=J))
2299  {
2300  int n=M-m;
2301  if (std::abs(n)<=L)
2302  {
2303  count++;
2304  }
2305  }
2306  }
2307  }
2308 
2309  T * cg= new T[count];
2310  count=0;
2311  for (int m=-rank; m<=0; m++)
2312  {
2313  for (int M=-J; M<=J; M++)
2314  {
2315  //if ((rank+L>=J)&&(std::abs(rank-L)<=J))
2316  {
2317  int n=M-m;
2318  if (std::abs(n)<=L)
2319  {
2320  double sign=((L+n) & 1) ? -1.0 : 1.0;
2321  cg[count++]=sign*norm*(T)hanalysis::clebschGordan(L,-n,J,M,rank,m);
2322 
2323  //double cg2=std::pow(-1.0f,(float)(L+n))*hanalysis::clebschGordan(L,-n,J,M,rank,m);
2324  //printf("%f %f\n",cg[count-1],cg2);
2325 
2326 
2327  }
2328  }
2329  }
2330  }
2331  return cg;
2332 }
2333 
2334 
2335 
2336 
2337 template<typename T>
2338 STA_RESULT sta_th_R (
2339  const std::complex<T> ** stIn,
2340  std::complex<T> * stOut ,
2341  const std::size_t shape[],
2342  int J,
2343  int L,
2344  int j,
2345  T alpha,
2346  int stride_in = -1,
2347  int stride_out = -1,
2348  bool clear_field=false)
2349 {
2350  if (J<0)
2351  return STA_RESULT_INVALID_TENSOR_RANK;
2352  if (L<0)
2353  return STA_RESULT_INVALID_TENSOR_RANK;
2354  if (std::abs(j)>J)
2355  return STA_RESULT_INVALID_TENSOR_RANK;
2356 // if ((L+j)%2!=0)
2357 // {
2358 // printf("result in inv, doing nothing\n");
2359 // return -1;
2360 // }
2361 
2362 
2363  bool inIV=false;
2364  if ((J+j)%2!=0)
2365  {
2366  printf("result in inv\n");
2367  inIV=true;
2368  }
2369 // int sign=0; if (inIV) sign=1;
2370 
2371 
2372  T * Cg = sta_th_precomputeCGcoefficients_R<T> ( J,L,j,alpha);
2373 
2374  std::size_t vectorLengthIn= (2* L+1 );
2375  std::size_t vectorLengthOut= ( j+L+1 );
2376 
2377  if ( stride_in == -1 )
2378  stride_in = vectorLengthIn;
2379  if ( stride_out == -1 )
2380  stride_out = vectorLengthOut;
2381 
2382  stride_in*=2;
2383  stride_out*=2;
2384 
2385  int rankIn=L;
2386  int rankOut=L+j;
2387 
2388  if (!((rankOut>=0)&&(std::abs(j)<=J)&&(J<=j+2*L)&&(L>=0)))
2389  {
2390  printf("trotz test bis hierher??");
2391  return STA_RESULT_FAILED;
2392  }
2393 
2394 // //if (!((rankOut+L>=J)&&(std::abs(rankOut-L)<=J)))
2395 // if (!((rankOut>=0)&&(std::abs(j)<=J)))
2396 // {
2397 // printf("trotz test bis hierher??");
2398 // return STA_RESULT_FAILED;
2399 // }
2400 //
2401 // //if (!((std::abs(L-J)<=L+j)&&(j<=J)))
2402 // if (!(std::abs(L)+std::abs(j)>=J))
2403 // {
2404 // printf("trotz test bis hierher2??");
2405 // return STA_RESULT_FAILED;
2406 // }
2407 
2408  int rankIn_times_2=rankIn*2;
2409  int rankOut_times_2=rankOut*2;
2410 
2411  std::size_t jumpz=shape[1]*shape[2];
2412 
2413  T * stOutR= ( T * ) stOut;
2414 
2415  #pragma omp parallel for num_threads(get_numCPUs())
2416  for ( std::size_t z=0; z<shape[0]; z++ )
2417  {
2418  std::size_t Z=z;
2419  Z*=jumpz;
2420 
2421  const T ** current_In = new const T *[J+1];
2422  for (int a=0; a<=J; a++)
2423  current_In[a]=(const T * )stIn[a]+ ( Z*stride_in+rankIn_times_2 );
2424 
2425  T * current_Out=stOutR+ ( Z*stride_out+rankOut_times_2 );
2426 
2427  T tmp0R;
2428  T tmp0I;
2429 
2430  T tmp1R;
2431  T tmp1I;
2432  for ( std::size_t i=0; i<jumpz; i++ )
2433  {
2434  std::size_t count=0;
2435  for (int m=-rankOut; m<=0; m++)
2436  {
2437  if ( clear_field )
2438  {
2439  current_Out[m*2]=T ( 0 );
2440  current_Out[m*2+1]=T ( 0 );
2441  }
2442  tmp1R=0;
2443  tmp1I=0;
2444 
2445  for (int M=-J; M<=J; M++)
2446  {
2447  int n=M-m;
2448  if (std::abs(n)<=L)
2449  {
2450  tmp0R=0;
2451  tmp0I=0;
2452 
2453  //printf("%d %d %d\n",M,m,n);
2454  if (M>0)
2455  {
2456  if ( n>0 )
2457  {
2458  if ( (n+M)%2==0 )
2459  {
2460  tmp0R=current_In[-M+J][-n*2];
2461  tmp0I=-current_In[-M+J][-n*2+1];
2462  }
2463  else
2464  {
2465  tmp0R=-current_In[-M+J][-n*2];
2466  tmp0I=current_In[-M+J][-n*2+1];
2467  }
2468  }
2469  } else
2470  {
2471  tmp0R=current_In[M+J][n*2];
2472  tmp0I=current_In[M+J][n*2+1];
2473  }
2474  tmp1R+=Cg[count]*tmp0R;
2475  tmp1I+=Cg[count++]*tmp0I;
2476  }
2477  }
2478 // if (m<0)
2479 // {
2480 // if ((sign+m)%2!=0)
2481 // {
2482 // tmp1R*=-1;
2483 // }else
2484 // {
2485 // tmp1I*=-1;
2486 // }
2487 // }
2488 
2489  if (inIV)
2490  {
2491  //TODO
2492  // times I
2493  }
2494 
2495  current_Out[m*2]+=tmp1R;
2496  current_Out[m*2+1]-=tmp1I;
2497 
2498  }
2499  for (int a=0; a<=J; a++)
2500  current_In[a]+=stride_in;
2501  current_Out+=stride_out;
2502 
2503  /*delete [] current_In;
2504  delete [] Cg;
2505  return 0; */
2506  }
2507 
2508  delete [] current_In;
2509  }
2510  delete [] Cg;
2511  return STA_RESULT_SUCCESS;
2512 }
2513 
2514 
2515 
2516 
2517 
2518 
2519 
2520 
2521 
2522 
2523 
2524 
2525 
2526 template<typename T>
2527 T * sta_product_precomputeCGcoefficients_R( int J1,int J2, int J, bool normalized, T fact)
2528 {
2529  T norm=(T)1;
2530  if (normalized)
2531  {
2532  //assert((J1+J2+J)%2==0);
2533  norm=(T)1/(T)hanalysis::clebschGordan(J1,0,J2,0,J,0);
2534  }
2535 
2536 
2537  norm*=fact;
2538  std::size_t count=0;
2539  for (int m=-J; m<=0; m++)
2540  {
2541  for (int m1=-J1; m1<=J1; m1++)
2542  {
2543  int m2=m-m1;
2544  if (abs(m2)<=J2)
2545  {
2546  count++;
2547  }
2548  }
2549  }
2550  T * cg= new T[count];
2551  count=0;
2552  for (int m=-J; m<=0; m++)
2553  {
2554  for (int m1=-J1; m1<=J1; m1++)
2555  {
2556  int m2=m-m1;
2557  if (abs(m2)<=J2)
2558  {
2559  cg[count++]=norm*(T)hanalysis::clebschGordan(J1,m1,J2,m2,J,m);
2560  }
2561  }
2562  }
2563  return cg;
2564 }
2565 
2566 
2567 
2568 
2569 template<typename T>
2570 T * sta_tripleproduct_precomputeCGcoefficients_R( int J1,int J2,int J3, int Jprod1,int Jprod2, bool normalized, T fact)
2571 {
2572  T norm=(T)1;
2573  if (normalized)
2574  {
2575  //assert((J1+J2+J)%2==0);
2576  norm=(T)1/((T)(hanalysis::clebschGordan(J1,0,J2,0,Jprod1,0)*hanalysis::clebschGordan(Jprod1,0,J3,0,Jprod2,0)));
2577  }
2578 
2579 
2580  norm*=fact;
2581  std::size_t count=0;
2582 
2583  for ( int m=-Jprod2; m<=0; m++ )
2584  {
2585  for ( int m3=-J3; m3<=J3; m3++ )
2586  {
2587  int mL=m-m3;
2588  if ( abs ( mL ) <=Jprod1 )
2589  {
2590  for ( int m1=-J1; m1<=J1; m1++ )
2591  {
2592  int m2=mL-m1;
2593  if ( abs ( m2 ) <=J2 )
2594  {
2595  count++;
2596  }
2597  }
2598  }
2599  }
2600  }
2601  //printf("!! %d !!\n",count);
2602  T * cg= new T[count];
2603  count=0;
2604 
2605  for ( int m=-Jprod2; m<=0; m++ )
2606  {
2607  for ( int m3=-J3; m3<=J3; m3++ )
2608  {
2609  int mL=m-m3;
2610  if ( abs ( mL ) <=Jprod1 )
2611  {
2612  for ( int m1=-J1; m1<=J1; m1++ )
2613  {
2614  int m2=mL-m1;
2615  if ( abs ( m2 ) <=J2 )
2616  {
2617  cg[count++]=norm*(T)hanalysis::clebschGordan(J1,m1,J2,m2,Jprod1,mL)*(T)hanalysis::clebschGordan(Jprod1,mL,J3,m3,Jprod2,m);
2618  //cg[count++]=1;
2619 // if (std::abs(cg[count-1])<0.0000000000001)
2620 // {
2621 // printf("%f (%d %d, %d %d| %d %d) [%f] (%d %d, %d %d| %d %d) [%f] \n",cg[count-1],J1,m1,J2,m2,Jprod1,mL,hanalysis::clebschGordan(J1,m1,J2,m2,Jprod1,mL),Jprod1,mL,J3,m3,Jprod2,m,hanalysis::clebschGordan(Jprod1,mL,J3,m3,Jprod2,m));
2622 // }
2623 
2624  }
2625  }
2626  }
2627  }
2628  }
2629  return cg;
2630 }
2631 
2632 template<typename T>
2633 STA_RESULT sta_tripleproduct_R (
2634  const std::complex<T> * stIn1,
2635  const std::complex<T> * stIn2,
2636  const std::complex<T> * stIn3,
2637  std::complex<T> * stOut ,
2638  const std::size_t shape[],
2639  int J1,
2640  int J2,
2641  int J3,
2642  int Jprod1,
2643  int Jprod2,
2644  T alpha,
2645  bool normalize,
2646  int stride_in1 = -1,
2647  int stride_in2 = -1,
2648  int stride_in3 = -1,
2649  int stride_out = -1,
2650  bool clear_field=false)
2651 {
2652 
2653 
2654 
2655 
2656  if ( ( std::abs ( J1-J2 ) >Jprod1 ) || ( Jprod1>std::abs ( J1+J2 ) ) )
2657  return STA_RESULT_INVALID_PRODUCT;
2658  if ( ( ( J1+J2+Jprod1 ) %2!=0 ) && ( normalize ) )
2659  return STA_RESULT_INVALID_PRODUCT;
2660 
2661  if ( ( std::abs ( Jprod1-J3 ) >Jprod2 ) || ( Jprod2>std::abs ( Jprod1+J3 ) ) )
2662  return STA_RESULT_INVALID_PRODUCT;
2663  if ( ( ( J3+Jprod1+Jprod2 ) %2!=0 ) && ( normalize ) )
2664  return STA_RESULT_INVALID_PRODUCT;
2665 
2666 
2667 
2668 // return STA_RESULT_SUCCESS;
2669 
2670  //bool resultInIv= ( ( J1+J2+Jprod1 ) %2!=0 ) && ( ( J2+Jprod2+Jprod1 ) %2==0 )||( ( J1+J2+Jprod1 ) %2==0 ) && ( ( J2+Jprod2+Jprod1 ) %2!=0 );
2671  //bool resultInIv= ( ( J2+Jprod2+Jprod1 ) %2!=0 );
2672  bool tmp1InIv = ( ( J1+J2+Jprod1 ) %2!=0 );
2673  bool tmp2InIv = ( ( J3+Jprod2+Jprod1 ) %2!=0 );
2674  bool resultInIv= (tmp1InIv&&(!tmp2InIv))||((!tmp1InIv)&&(tmp2InIv));
2675  if (tmp1InIv) alpha*=-1;
2676 
2677 
2678  T * Cg= sta_tripleproduct_precomputeCGcoefficients_R(J1,J2,J3,Jprod1,Jprod2,normalize,alpha);
2679 
2680  //(((tmp1InIv && tmp2InIv) || (!(tmp1InIv && tmp2InIv))) && ( ( J2+Jprod2+Jprod1 ) %2!=0 ));
2681 
2682 
2683 
2684  std::size_t vectorLengthJ1= ( J1+1 );
2685  std::size_t vectorLengthJ2= ( J2+1 );
2686  std::size_t vectorLengthJ3= ( J3+1 );
2687  std::size_t vectorLengthJprod2= ( Jprod2+1 );
2688 
2689  if ( stride_in1 == -1 )
2690  stride_in1 = vectorLengthJ1;
2691  if ( stride_in2 == -1 )
2692  stride_in2 = vectorLengthJ2;
2693  if ( stride_in3 == -1 )
2694  stride_in3 = vectorLengthJ3;
2695  if ( stride_out == -1 )
2696  stride_out = vectorLengthJprod2;
2697 
2698  stride_in1*=2; // because input field is complex but pointer are real
2699  stride_in2*=2;
2700  stride_in3*=2;
2701  stride_out*=2;
2702 
2703  int J3_times_2=J3*2;
2704  int J2_times_2=J2*2;
2705  int J1_times_2=J1*2;
2706  int J_times_2=Jprod2*2;
2707 
2708 
2709  std::size_t jumpz=shape[1]*shape[2];
2710 
2711  const T * stIn1R= ( const T * ) stIn1;
2712  const T * stIn2R= ( const T * ) stIn2;
2713  const T * stIn3R= ( const T * ) stIn3;
2714  T * stOutR= ( T * ) stOut;
2715 
2716  #pragma omp parallel for num_threads(get_numCPUs())
2717  for ( std::size_t z=0; z<shape[0]; z++ )
2718  {
2719  std::size_t Z=z;
2720  Z*=jumpz;
2721  const T * current_J1R=stIn1R+ ( Z*stride_in1+J1_times_2 );
2722  const T * current_J2R=stIn2R+ ( Z*stride_in2+J2_times_2 );
2723  const T * current_J3R=stIn3R+ ( Z*stride_in3+J3_times_2 );
2724  T * current_JR=stOutR+ ( Z*stride_out+J_times_2 );
2725 
2726  T a;
2727  T b;
2728 
2729  T c;
2730  T d;
2731 
2732  T e;
2733  T f;
2734 
2735  for ( std::size_t i=0; i<jumpz; i++ )
2736  {
2737  std::size_t count=0;
2738  for ( int m=-Jprod2; m<=0; m++ )
2739  {
2740  if ( clear_field )
2741  {
2742  current_JR[m*2]=T ( 0 );
2743  current_JR[m*2+1]=T ( 0 );
2744  }
2745 
2746  for ( int m3=-J3; m3<=J3; m3++ )
2747  {
2748  int mL=m-m3;
2749  if ( abs ( mL ) <=Jprod1 )
2750  {
2751  if ( m3>0 )
2752  {
2753  if ( m3%2==0 )
2754  {
2755  e=current_J3R[-m3*2];
2756  f=-current_J3R[-m3*2+1];
2757  }
2758  else
2759  {
2760  e=-current_J3R[-m3*2];
2761  f=current_J3R[-m3*2+1];
2762  }
2763  }
2764  else
2765  {
2766  e=current_J3R[m3*2];
2767  f=current_J3R[m3*2+1];
2768  }
2769 
2770  for ( int m1=-J1; m1<=J1; m1++ )
2771  {
2772  int m2=mL-m1;
2773  if ( abs ( m2 ) <=J2 )
2774  {
2775  if ( m1>0 )
2776  {
2777  if ( m1%2==0 )
2778  {
2779  a=current_J1R[-m1*2];
2780  b=-current_J1R[-m1*2+1];
2781  }
2782  else
2783  {
2784  a=-current_J1R[-m1*2];
2785  b=current_J1R[-m1*2+1];
2786  }
2787  }
2788  else
2789  {
2790  a=current_J1R[m1*2];
2791  b=current_J1R[m1*2+1];
2792  }
2793 
2794 
2795  if ( m2>0 )
2796  {
2797  if ( m2%2==0 )
2798  {
2799  c=current_J2R[-m2*2];
2800  d=-current_J2R[-m2*2+1];
2801  }
2802  else
2803  {
2804  c=-current_J2R[-m2*2];
2805  d=current_J2R[-m2*2+1];
2806  }
2807  }
2808  else
2809  {
2810  c=current_J2R[m2*2];
2811  d=current_J2R[m2*2+1];
2812  }
2813 
2814 // if ( tmpInIv )
2815 // {
2816 // if ( resultInIv )
2817 // {
2818 // current_JR[m*2]-=Cg[count]* ( a*(c*e-d*f)-b*(d*e+f*c));
2819 // current_JR[m*2+1]+=Cg[count++]*( -a*(e*d+c*f)+b*(d*f-e*c));
2820 // }else
2821 // {
2822 // current_JR[m*2]+=Cg[count]* ( -a*(e*d+c*f)+b*(d*f-e*c));
2823 // current_JR[m*2+1]+=Cg[count++]* ( a*(c*e-d*f)-b*(d*e+f*c));
2824 // }
2825 // }else
2826 // {
2827 // if ( resultInIv )
2828 // {
2829 // current_JR[m*2]-=Cg[count]* ( a*(e*d+c*f)+b*(e*c-d*f));
2830 // current_JR[m*2+1]+=Cg[count++]* ( a*(e*c-d*f)-b*(e*d+c*f));
2831 // }else
2832 // {
2833 // current_JR[m*2]+=Cg[count]* ( a*(e*c-d*f)-b*(e*d+c*f));
2834 // current_JR[m*2+1]+=Cg[count++]* ( a*(e*d+c*f)+b*(e*c-d*f));
2835 // }
2836 // }
2837 
2838  if ( resultInIv )
2839  {
2840 // current_JR[m*2]-=Cg[count]* ( tmp0R*(tmp1I*tmp2R-tmp1R*tmp2I)+tmp0I*(tmp1R*tmp2R-tmp1I*tmp2I));
2841 // current_JR[m*2+1]+=Cg[count++]* ( tmp0R*(tmp1R*tmp2R-tmp1I*tmp1I)-tmp0I*(tmp1I*tmp2R+tmp1R*tmp2I));
2842  current_JR[m*2]-=Cg[count]* ( a*(e*d+c*f)+b*(e*c-d*f));
2843  current_JR[m*2+1]+=Cg[count++]* ( a*(e*c-d*f)-b*(e*d+c*f));
2844 
2845 // current_JR[m*2]-=Cg[count]* ( tmp0R*tmp1I+tmp0I*tmp1R );
2846 // current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1R-tmp0I*tmp1I );
2847  }
2848  else
2849  {
2850  current_JR[m*2]+=Cg[count]* ( a*(e*c-d*f)-b*(e*d+c*f));
2851  current_JR[m*2+1]+=Cg[count++]* ( a*(e*d+c*f)+b*(e*c-d*f));
2852 
2853 
2854 // current_JR[m*2]+=( a*(e*c-d*f)-b*(e*d+c*f));
2855 // current_JR[m*2+1]+= ( a*(e*d+c*f)+b*(e*c-d*f));
2856 
2857 // current_JR[m*2]+=Cg[count]* ( tmp0R*(tmp1R*tmp2R-tmp1I*tmp1I)-tmp0I*(tmp1I*tmp2R+tmp1R*tmp2I));
2858 // current_JR[m*2+1]+=Cg[count++]* ( tmp0R*(tmp1I*tmp2R-tmp1R*tmp2I)+tmp0I*(tmp1R*tmp2R-tmp1I*tmp2I));
2859 // current_JR[m*2]+=Cg[count]* ( tmp0R*tmp1R-tmp0I*tmp1I );
2860 // current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1I+tmp0I*tmp1R );
2861  }
2862 
2863  }
2864  }
2865  }
2866  }
2867  }
2868 
2869  current_J1R+=stride_in1;
2870  current_J2R+=stride_in2;
2871  current_J3R+=stride_in3;
2872  current_JR+=stride_out;
2873  }
2874  }
2875  delete [] Cg;
2876  return STA_RESULT_SUCCESS;
2877 }
2878 
2879 
2880 /*
2881  computes the spherical tensor product \f$ \alpha(\mathbf{stIn1} \circ_{J} \mathbf{stIn2}) \f$ and \f$ \alpha(\mathbf{stIn1} \bullet_{J} \mathbf{stIn2}) \f$, respectively \n
2882  \param stIn1 \f$ \mathbf{stIn1} \in \mathcal T_{J_1}\f$
2883  \param stIn2 \f$ \mathbf{stIn2} \in \mathcal T_{J_2} \f$
2884  \param stOut \f$ \alpha(\mathbf{stIn1} \bullet_{J} \mathbf{stIn2}) \in \mathcal T_{J}\f$ if normalized, \f$ \alpha(\mathbf{stIn1} \circ_{J} \mathbf{stIn2}) \in \mathcal T_{J}\f$ else
2885  \param shape
2886  \param J1 \f$ J_1 \in \mathbb N \f$ tensor rank of the first field
2887  \param J2 \f$ J_2 \in \mathbb N \f$ tensor rank of the second field
2888  \param J \f$ J \in \mathbb N \f$ tensor rank of the resulting field
2889  \param alpha \f$ \alpha \in \mathbb C \f$ additional weighting factor
2890  \param normalize normalized tensor products?: true=\f$ \bullet_{J}\f$ , false=\f$ \circ_{J}\f$
2891  \returns \f$
2892  \left\{
2893  \begin{array}{ll}
2894  0 & \mbox{if tensor product exists}\\
2895  -1 & \mbox{ else }
2896  \end{array}
2897  \right.
2898  \f$
2899  \warning ensure that stIn1, stIn2, stOut and shape exist
2900  and have been \b allocated properly!
2901 */
2902 
2903 /*
2904 template<typename T>
2905 int sta_product_R (
2906  const std::complex<T> * stIn1,
2907  const std::complex<T> * stIn2,
2908  std::complex<T> * stOut ,
2909  const std::size_t shape[],
2910  int J1,
2911  int J2,
2912  int J,
2913  std::complex<T> alpha,
2914  bool normalize,
2915  int stride_in1 = -1,
2916  int stride_in2 = -1,
2917  int stride_out = -1,
2918  bool clear_field=false)
2919 {
2920  if ( ( std::abs ( J1-J2 ) >J ) || ( J>std::abs ( J1+J2 ) ) )
2921  return -1;
2922  if ( ( ( J1+J2+J ) %2!=0 ) && ( normalize ) )
2923  return -1;
2924 
2925  if ( ( J1+J2+J ) %2!=0 )
2926  {
2927  alpha*=std::complex<T> ( 0,1 );
2928  }
2929 
2930  std::complex<T> * cg= sta_product_precomputeCGcoefficients_R<std::complex<T> > ( J1,J2, J,normalize,alpha );
2931  T * Cg = ( T* ) cg;
2932  //
2933  std::size_t vectorLengthJ1= ( J1+1 );
2934  std::size_t vectorLengthJ2= ( J2+1 );
2935  std::size_t vectorLengthJ= ( J+1 );
2936 
2937  if ( stride_in1 == -1 )
2938  stride_in1 = vectorLengthJ1;
2939  if ( stride_in2 == -1 )
2940  stride_in2 = vectorLengthJ2;
2941  if ( stride_out == -1 )
2942  stride_out = vectorLengthJ;
2943 
2944  stride_in1*=2; // because input field is complex but pointer are real
2945  stride_in2*=2;
2946  stride_out*=2;
2947 
2948  std::size_t jumpz=shape[1]*shape[2];
2949 
2950  const T * stIn1R= ( const T * ) stIn1;
2951  const T * stIn2R= ( const T * ) stIn2;
2952  T * stOutR= ( T * ) stOut;
2953 
2954 #pragma omp parallel for num_threads(get_numCPUs())
2955  for ( std::size_t z=0;z<shape[0];z++ )
2956  {
2957  std::size_t Z=z;
2958  Z*=jumpz;
2959 
2960  const T * current_J1R=stIn1R+ ( Z*stride_in1+2*J1 );
2961 
2962  const T * current_J2R=stIn2R+ ( Z*stride_in2+2*J2 );
2963 
2964  T * current_JR=stOutR+ ( Z*stride_out+2*J );
2965 
2966  T tmp0R;
2967  T tmp0I;
2968 
2969  T tmp1R;
2970  T tmp1I;
2971 
2972  for ( std::size_t i=0;i<jumpz;i++ )
2973  {
2974  std::size_t count=0;
2975  for ( int m=-J;m<=0;m++ )
2976  {
2977 
2978  if ( clear_field )
2979  {
2980  current_JR[m*2]=T ( 0 );
2981  current_JR[m*2+1]=T ( 0 );
2982  }
2983 
2984  for ( int m1=-J1;m1<=J1;m1++ )
2985  {
2986  int m2=m-m1;
2987  if ( abs ( m2 ) <=J2 )
2988  {
2989  if ( m1>0 )
2990  {
2991  if ( m1%2==0 )
2992  {
2993  tmp0R=current_J1R[-m1*2];
2994  tmp0I=-current_J1R[-m1*2+1];
2995  }
2996  else
2997  {
2998  tmp0R=-current_J1R[-m1*2];
2999  tmp0I=current_J1R[-m1*2+1];
3000  }
3001  }
3002  else
3003  {
3004  tmp0R=current_J1R[m1*2];
3005  tmp0I=current_J1R[m1*2+1];
3006  }
3007 
3008 
3009  if ( m2>0 )
3010  {
3011  if ( m2%2==0 )
3012  {
3013  tmp1R=current_J2R[-m2*2];
3014  tmp1I=-current_J2R[-m2*2+1];
3015  }
3016  else
3017  {
3018  tmp1R=-current_J2R[-m2*2];
3019  tmp1I=current_J2R[-m2*2+1];
3020  }
3021  }
3022  else
3023  {
3024  tmp1R=current_J2R[m2*2];
3025  tmp1I=current_J2R[m2*2+1];
3026  }
3027  T ce=tmp1R*Cg[count];
3028  T de=tmp1I*Cg[count++];
3029  T cf=tmp1R*Cg[count];
3030  T df=tmp1I*Cg[count++];
3031  ce=ce-df;
3032  cf=cf+de;
3033 
3034  current_JR[m*2]+=tmp0R*ce-tmp0I*cf;
3035  current_JR[m*2+1]+=tmp0R*cf+tmp0I*ce;;
3036  }
3037  }
3038  }
3039 
3040 
3041 
3042 
3043  current_J1R+=stride_in1;
3044  current_J2R+=stride_in2;
3045  current_JR+=stride_out;
3046  }
3047  }
3048  delete [] cg;
3049  return 0;
3050 }
3051 */
3052 
3053 
3054 
3055 /*
3056  computes the spherical tensor product \f$ \alpha(\mathbf{stIn1} \circ_{J} \mathbf{stIn2}) \f$ and \f$ \alpha(\mathbf{stIn1} \bullet_{J} \mathbf{stIn2}) \f$, respectively \n
3057  \param stIn1 \f$ \mathbf{stIn1} \in \mathcal T_{J_1}\f$
3058  \param stIn2 \f$ \mathbf{stIn2} \in \mathcal T_{J_2} \f$
3059  \param stOut \f$ \alpha(\mathbf{stIn1} \bullet_{J} \mathbf{stIn2}) \in \mathcal T_{J}\f$ if normalized, \f$ \alpha(\mathbf{stIn1} \circ_{J} \mathbf{stIn2}) \in \mathcal T_{J}\f$ else
3060  \param shape
3061  \param J1 \f$ J_1 \in \mathbb N \f$ tensor rank of the first field
3062  \param J2 \f$ J_2 \in \mathbb N \f$ tensor rank of the second field
3063  \param J \f$ J \in \mathbb N \f$ tensor rank of the resulting field
3064  \param alpha \f$ \alpha \in \mathbb R \f$ additional weighting factor
3065  \param normalize normalized tensor products?: true=\f$ \bullet_{J}\f$ , false=\f$ \circ_{J}\f$
3066  \returns \f$
3067  \left\{
3068  \begin{array}{ll}
3069  0 & \mbox{if tensor product exists}\\
3070  -1 & \mbox{ else }
3071  \end{array}
3072  \right.
3073  \f$
3074  \warning ensure that stIn1, stIn2, stOut and shape exist
3075  and have been \b allocated properly!
3076 */
3077 template<typename T>
3078 STA_RESULT sta_product_R (
3079  const std::complex<T> * stIn1,
3080  const std::complex<T> * stIn2,
3081  std::complex<T> * stOut ,
3082  const std::size_t shape[],
3083  int J1,
3084  int J2,
3085  int J,
3086  T alpha,
3087  bool normalize,
3088  int stride_in1 = -1,
3089  int stride_in2 = -1,
3090  int stride_out = -1,
3091  bool clear_field=false)
3092 {
3093 
3094  if ( ( std::abs ( J1-J2 ) >J ) || ( J>std::abs ( J1+J2 ) ) )
3095  return STA_RESULT_INVALID_PRODUCT;
3096  if ( ( ( J1+J2+J ) %2!=0 ) && ( normalize ) )
3097  return STA_RESULT_INVALID_PRODUCT;
3098 
3099 
3100 
3101 
3102 
3103 
3104 
3105  T * Cg= sta_product_precomputeCGcoefficients_R<T> ( J1,J2, J,normalize,alpha );
3106 
3107  bool resultInIv= ( ( J1+J2+J ) %2!=0 );
3108 
3109  std::size_t vectorLengthJ1= ( J1+1 );
3110  std::size_t vectorLengthJ2= ( J2+1 );
3111  std::size_t vectorLengthJ= ( J+1 );
3112 
3113  if ( stride_in1 == -1 )
3114  stride_in1 = vectorLengthJ1;
3115  if ( stride_in2 == -1 )
3116  stride_in2 = vectorLengthJ2;
3117  if ( stride_out == -1 )
3118  stride_out = vectorLengthJ;
3119 
3120  stride_in1*=2; // because input field is complex but pointer are real
3121  stride_in2*=2;
3122  stride_out*=2;
3123 
3124  int J2_times_2=J2*2;
3125  int J1_times_2=J1*2;
3126  int J_times_2=J*2;
3127 
3128 
3129  std::size_t jumpz=shape[1]*shape[2];
3130 
3131  const T * stIn1R= ( const T * ) stIn1;
3132  const T * stIn2R= ( const T * ) stIn2;
3133  T * stOutR= ( T * ) stOut;
3134 
3135  #pragma omp parallel for num_threads(get_numCPUs())
3136  for ( std::size_t z=0; z<shape[0]; z++ )
3137  {
3138  std::size_t Z=z;
3139  Z*=jumpz;
3140  const T * current_J1R=stIn1R+ ( Z*stride_in1+J1_times_2 );
3141  const T * current_J2R=stIn2R+ ( Z*stride_in2+J2_times_2 );
3142  T * current_JR=stOutR+ ( Z*stride_out+J_times_2 );
3143 
3144  T tmp0R;
3145  T tmp0I;
3146 
3147  T tmp1R;
3148  T tmp1I;
3149 
3150  for ( std::size_t i=0; i<jumpz; i++ )
3151  {
3152  std::size_t count=0;
3153  for ( int m=-J; m<=0; m++ )
3154  {
3155  if ( clear_field )
3156  {
3157  current_JR[m*2]=T ( 0 );
3158  current_JR[m*2+1]=T ( 0 );
3159  }
3160 
3161 // for ( int m1=-J1;m1<=J1;m1++ )
3162 // {
3163 // int m2=2*(m-m1);
3164 // if ( abs ( m2 ) <=J2_times_2 )
3165 // {
3166 // if ( m1>0 )
3167 // {
3168 // if ( m1%2==0 )
3169 // {
3170 // tmp0R=current_J1R[-m1*2];
3171 // tmp0I=-current_J1R[-m1*2+1];
3172 // }
3173 // else
3174 // {
3175 // tmp0R=-current_J1R[-m1*2];
3176 // tmp0I=current_J1R[-m1*2+1];
3177 // }
3178 // }
3179 // else
3180 // {
3181 // tmp0R=current_J1R[m1*2];
3182 // tmp0I=current_J1R[m1*2+1];
3183 // }
3184 // if ( m2>0 )
3185 // {
3186 // if ( (m-m1)%2==0 )
3187 // {
3188 // tmp1R=current_J2R[-m2];
3189 // tmp1I=-current_J2R[-m2+1];
3190 // }
3191 // else
3192 // {
3193 // tmp1R=-current_J2R[-m2];
3194 // tmp1I=current_J2R[-m2+1];
3195 // }
3196 // }
3197 // else
3198 // {
3199 // tmp1R=current_J2R[m2];
3200 // tmp1I=current_J2R[m2+1];
3201 // }
3202 // if ( resultInIv )
3203 // {
3204 // // if ((z==0)&&(i==0))
3205 // // printf("result in iv\n");
3206 // current_JR[m*2]-=Cg[count]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3207 // current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3208 // }
3209 // else
3210 // {
3211 // // if ((z==0)&&(i==0))
3212 // // printf("result in v\n");
3213 // current_JR[m*2]+=Cg[count]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3214 // current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3215 // }
3216 // }
3217 // }
3218 
3219  for ( int m1=-J1; m1<=J1; m1++ )
3220  {
3221  int m2=m-m1;
3222  if ( abs ( m2 ) <=J2 )
3223  {
3224  if ( m1>0 )
3225  {
3226  if ( m1%2==0 )
3227  {
3228  tmp0R=current_J1R[-m1*2];
3229  tmp0I=-current_J1R[-m1*2+1];
3230  }
3231  else
3232  {
3233  tmp0R=-current_J1R[-m1*2];
3234  tmp0I=current_J1R[-m1*2+1];
3235  }
3236  }
3237  else
3238  {
3239  tmp0R=current_J1R[m1*2];
3240  tmp0I=current_J1R[m1*2+1];
3241  }
3242  if ( m2>0 )
3243  {
3244  if ( m2%2==0 )
3245  {
3246  tmp1R=current_J2R[-m2*2];
3247  tmp1I=-current_J2R[-m2*2+1];
3248  }
3249  else
3250  {
3251  tmp1R=-current_J2R[-m2*2];
3252  tmp1I=current_J2R[-m2*2+1];
3253  }
3254  }
3255  else
3256  {
3257  tmp1R=current_J2R[m2*2];
3258  tmp1I=current_J2R[m2*2+1];
3259  }
3260  if ( resultInIv )
3261  {
3262 // if ((z==0)&&(i==0))
3263 // printf("result in iv\n");
3264  current_JR[m*2]-=Cg[count]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3265  current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3266  }
3267  else
3268  {
3269 // if ((z==0)&&(i==0))
3270 // printf("result in v\n");
3271  current_JR[m*2]+=Cg[count]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3272  current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3273  }
3274  }
3275  }
3276  }
3277  current_J1R+=stride_in1;
3278  current_J2R+=stride_in2;
3279  current_JR+=stride_out;
3280  }
3281  }
3282  delete [] Cg;
3283  return STA_RESULT_SUCCESS;
3284 }
3285 
3286 
3287 
3288 
3289 
3290 
3291 
3292 
3293 /*
3294  computes the spherical tensor product \f$ \alpha(\mathbf{stIn1} \circ_{J} \mathbf{stIn2}) \f$ and \f$ \alpha(\mathbf{stIn1} \bullet_{J} \mathbf{stIn2}) \f$, respectively \n
3295  \param stIn1 \f$ \mathbf{stIn1} \in \mathcal T_{J_1}\f$
3296  \param stIn2 \f$ \mathbf{stIn2} \in \mathcal T_{J_2} \f$
3297  \param stOut \f$ \alpha(\mathbf{stIn1} \bullet_{J} \mathbf{stIn2}) \in \mathcal T_{J}\f$ if normalized, \f$ \alpha(\mathbf{stIn1} \circ_{J} \mathbf{stIn2}) \in \mathcal T_{J}\f$ else
3298  \param shape
3299  \param J1 \f$ J_1 \in \mathbb N \f$ tensor rank of the first field
3300  \param J2 \f$ J_2 \in \mathbb N \f$ tensor rank of the second field
3301  \param J \f$ J \in \mathbb N \f$ tensor rank of the resulting field
3302  \param alpha \f$ \alpha \in \mathbb R \f$ additional weighting factor
3303  \param normalize normalized tensor products?: true=\f$ \bullet_{J}\f$ , false=\f$ \circ_{J}\f$
3304  \returns \f$
3305  \left\{
3306  \begin{array}{ll}
3307  0 & \mbox{if tensor product exists}\\
3308  -1 & \mbox{ else }
3309  \end{array}
3310  \right.
3311  \f$
3312  \warning ensure that stIn1, stIn2, stOut and shape exist
3313  and have been \b allocated properly!
3314 */
3315 template<typename T>
3316 STA_RESULT sta_product_Rft (
3317  const std::complex<T> * stIn1,
3318  const std::complex<T> * stIn2,
3319  std::complex<T> * stOut ,
3320  const std::size_t shape[],
3321  int J1,
3322  int J2,
3323  int J,
3324  T alpha,
3325  bool normalize,
3326  int stride_in1 = -1,
3327  int stride_in2 = -1,
3328  int stride_out = -1,
3329  bool clear_field=false)
3330 {
3331  if ( ( std::abs ( J1-J2 ) >J ) || ( J>std::abs ( J1+J2 ) ) )
3332  return STA_RESULT_INVALID_PRODUCT;
3333  if ( ( ( J1+J2+J ) %2!=0 ) && ( normalize ) )
3334  return STA_RESULT_INVALID_PRODUCT;
3335 
3336 
3337 
3338 
3339 
3340 
3341 
3342  T * Cg= sta_product_precomputeCGcoefficients_R<T> ( J1,J2, J,normalize,alpha );
3343 
3344  bool resultInIv= ( ( J1+J2+J ) %2!=0 );
3345 
3346  std::size_t vectorLengthJ1= ( J1+1 );
3347  std::size_t vectorLengthJ2= ( J2+1 );
3348  std::size_t vectorLengthJ= ( J+1 );
3349 
3350  if ( stride_in1 == -1 )
3351  stride_in1 = vectorLengthJ1;
3352  if ( stride_in2 == -1 )
3353  stride_in2 = vectorLengthJ2;
3354  if ( stride_out == -1 )
3355  stride_out = vectorLengthJ;
3356 
3357  stride_in1*=2; // because input field is complex but pointer are real
3358  stride_in2*=2;
3359  stride_out*=2;
3360 
3361 
3362  std::size_t jumpz=shape[1]*shape[2];
3363  std::size_t jumpy=shape[2];
3364 
3365  const T * stIn1R= ( const T * ) stIn1;
3366  const T * stIn2R= ( const T * ) stIn2;
3367  T * stOutR= ( T * ) stOut;
3368 
3369  #pragma omp parallel for num_threads(get_numCPUs())
3370  for ( std::size_t z=0; z<shape[0]; z++ )
3371  {
3372  std::size_t Z=z;
3373  Z*=jumpz;
3374  const T * current_J1R=stIn1R+ ( Z*stride_in1+2*J1 );
3375  const T * current_J2R=stIn2R+ ( Z*stride_in2+2*J2 );
3376  T * current_JR=stOutR+ ( Z*stride_out+2*J );
3377 
3378  T tmp0R;
3379  T tmp0I;
3380 
3381  T tmp1R;
3382  T tmp1I;
3383 
3384  for ( std::size_t i=0; i<jumpz; i++ )
3385  {
3386 
3387  std::size_t Y=i/jumpy;
3388  std::size_t X=i%jumpy;
3389  std::size_t mpos= ((shape[0]-z)%shape[0])*jumpz+
3390  ((shape[1]-Y)%shape[1])*jumpy+
3391  ((shape[2]-X)%shape[2]);
3392 
3393  const T * current_J1Rmirrowed=stIn1R+ (mpos*stride_in1+2*J1 );
3394  const T * current_J2Rmirrowed=stIn2R+ (mpos*stride_in2+2*J2 );
3395 
3396  std::size_t count=0;
3397  for ( int m=-J; m<=0; m++ )
3398  {
3399  if ( clear_field )
3400  {
3401  current_JR[m*2]=T ( 0 );
3402  current_JR[m*2+1]=T ( 0 );
3403  }
3404 
3405 
3406  for ( int m1=-J1; m1<=J1; m1++ )
3407  {
3408  int m2=m-m1;
3409  if ( abs ( m2 ) <=J2 )
3410  {
3411  if ( m1>0 )
3412  {
3413  if ( m1%2==0 )
3414  {
3415  tmp0R=current_J1Rmirrowed[-m1*2];
3416  tmp0I=-current_J1Rmirrowed[-m1*2+1];
3417  }
3418  else
3419  {
3420  tmp0R=-current_J1Rmirrowed[-m1*2];
3421  tmp0I=current_J1Rmirrowed[-m1*2+1];
3422  }
3423  }
3424  else
3425  {
3426  tmp0R=current_J1R[m1*2];
3427  tmp0I=current_J1R[m1*2+1];
3428  }
3429  if ( m2>0 )
3430  {
3431  if ( m2%2==0 )
3432  {
3433  tmp1R=current_J2Rmirrowed[-m2*2];
3434  tmp1I=-current_J2Rmirrowed[-m2*2+1];
3435  }
3436  else
3437  {
3438  tmp1R=-current_J2Rmirrowed[-m2*2];
3439  tmp1I=current_J2Rmirrowed[-m2*2+1];
3440  }
3441  }
3442  else
3443  {
3444  tmp1R=current_J2R[m2*2];
3445  tmp1I=current_J2R[m2*2+1];
3446  }
3447  if ( resultInIv )
3448  {
3449  current_JR[m*2]-=Cg[count]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3450  current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3451  }
3452  else
3453  {
3454  current_JR[m*2]+=Cg[count]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3455  current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3456  }
3457  }
3458  }
3459  }
3460  current_J1R+=stride_in1;
3461  current_J2R+=stride_in2;
3462  current_JR+=stride_out;
3463  }
3464  }
3465  delete [] Cg;
3466  return STA_RESULT_SUCCESS;
3467 }
3468 
3469 
3470 
3471 
3472 template<typename T,typename S >
3473 STA_RESULT sta_feature_product_R (
3474  const std::complex<T> * stIn1,
3475  const std::complex<T> * stIn2,
3476  S * stOut ,
3477  const std::size_t shape[],
3478  int J,
3479  T alpha,
3480  bool normalize,
3481  int stride_in1 = -1,
3482  int stride_in2 = -1,
3483  int stride_out = -1,
3484  bool clear_field=false)
3485 {
3486 
3487  T * Cg= sta_product_precomputeCGcoefficients_R<T> ( J,J, 0,normalize,alpha );
3488 
3489  std::size_t vectorLengthJ1= ( J+1 );
3490  std::size_t vectorLengthJ2= ( J+1 );
3491  std::size_t vectorLengthJ= ( 1 );
3492 
3493  if ( stride_in1 == -1 )
3494  stride_in1 = vectorLengthJ1;
3495  if ( stride_in2 == -1 )
3496  stride_in2 = vectorLengthJ2;
3497  if ( stride_out == -1 )
3498  stride_out = vectorLengthJ;
3499 
3500  stride_in1*=2; // because input field is complex but pointer are real
3501  stride_in2*=2;
3502 
3503 
3504  std::size_t jumpz=shape[1]*shape[2];
3505 
3506  const T * stIn1R= ( const T * ) stIn1;
3507  const T * stIn2R= ( const T * ) stIn2;
3508  S * stOutR= ( S * ) stOut;
3509 
3510  #pragma omp parallel for num_threads(get_numCPUs())
3511  for ( std::size_t z=0; z<shape[0]; z++ )
3512  {
3513  std::size_t Z=z;
3514  Z*=jumpz;
3515  const T * current_J1R=stIn1R+ ( Z*stride_in1+2*J );
3516  const T * current_J2R=stIn2R+ ( Z*stride_in2+2*J );
3517  S * current_JR=stOutR+ ( Z*stride_out );
3518 
3519  for ( std::size_t i=0; i<jumpz; i++ )
3520  {
3521  std::size_t count=0;
3522  {
3523  if ( clear_field )
3524  {
3525  current_JR[0]=T ( 0 );
3526  }
3527 
3528  for ( int m1=-J; m1<=0; m1++ )
3529  {
3530  if (m1!=0)
3531  {
3532  current_JR[0]+=2*Cg[count]* ( current_J1R[m1*2]*current_J2R[m1*2]
3533  +current_J1R[m1*2+1]*current_J2R[m1*2+1] );
3534  }
3535  else
3536  current_JR[0]+=Cg[count]* ( current_J1R[m1*2]*current_J2R[m1*2]
3537  +current_J1R[m1*2+1]*current_J2R[m1*2+1] );
3538  }
3539  }
3540  current_J1R+=stride_in1;
3541  current_J2R+=stride_in2;
3542  current_JR+=stride_out;
3543  }
3544  }
3545  delete [] Cg;
3546  return STA_RESULT_SUCCESS;
3547 }
3548 
3549 
3550 
3551 
3552 /*
3553 template<typename T>
3554 int sta_product_Rft (
3555  const std::complex<T> * stIn1,
3556  const std::complex<T> * stIn2,
3557  std::complex<T> * stOut ,
3558  const std::size_t shape[],
3559  int J1,
3560  int J2,
3561  int J,
3562  std::complex<T> alpha,
3563  bool normalize,
3564  int stride_in1 = -1,
3565  int stride_in2 = -1,
3566  int stride_out = -1,
3567  bool clear_field=false)
3568 {
3569  if ( ( std::abs ( J1-J2 ) >J ) || ( J>std::abs ( J1+J2 ) ) )
3570  return -1;
3571  if ( ( ( J1+J2+J ) %2!=0 ) && ( normalize ) )
3572  return -1;
3573 
3574  if ( ( J1+J2+J ) %2!=0 )
3575  {
3576  alpha*=std::complex<T> ( 0,1 );
3577  }
3578 
3579 
3580  std::complex<T> * cg= sta_product_precomputeCGcoefficients_R<std::complex<T> > ( J1,J2, J,normalize,alpha );
3581  T * Cg = ( T* ) cg;
3582  //
3583  std::size_t vectorLengthJ1= ( J1+1 );
3584  std::size_t vectorLengthJ2= ( J2+1 );
3585  std::size_t vectorLengthJ= ( J+1 );
3586 
3587  if ( stride_in1 == -1 )
3588  stride_in1 = vectorLengthJ1;
3589  if ( stride_in2 == -1 )
3590  stride_in2 = vectorLengthJ2;
3591  if ( stride_out == -1 )
3592  stride_out = vectorLengthJ;
3593 
3594  stride_in1*=2; // because input field is complex but pointer are real
3595  stride_in2*=2;
3596  stride_out*=2;
3597 
3598  std::size_t jumpz=shape[1]*shape[2];
3599  std::size_t jumpy=shape[2];
3600 
3601  const T * stIn1R= ( const T * ) stIn1;
3602  const T * stIn2R= ( const T * ) stIn2;
3603  T * stOutR= ( T * ) stOut;
3604 
3605 #pragma omp parallel for num_threads(get_numCPUs())
3606  for ( std::size_t z=0;z<shape[0];z++ )
3607  {
3608  std::size_t Z=z;
3609  Z*=jumpz;
3610 
3611  const T * current_J1R=stIn1R+ ( Z*stride_in1+2*J1 );
3612 
3613 
3614  const T * current_J2R=stIn2R+ ( Z*stride_in2+2*J2 );
3615 
3616 
3617  T * current_JR=stOutR+ ( Z*stride_out+2*J );
3618 
3619  T tmp0R;
3620  T tmp0I;
3621 
3622  T tmp1R;
3623  T tmp1I;
3624 
3625  for ( std::size_t i=0;i<jumpz;i++ )
3626  {
3627  std::size_t Y=i/jumpy;
3628  std::size_t X=i%jumpy;
3629  std::size_t mpos= ((shape[0]-z)%shape[0])*jumpz+
3630  ((shape[1]-Y)%shape[1])*jumpy+
3631  ((shape[2]-X)%shape[2]);
3632 
3633  const T * current_J1Rmirrowed=stIn1R+ (mpos*stride_in1+2*J1 );
3634  const T * current_J2Rmirrowed=stIn2R+ (mpos*stride_in2+2*J2 );
3635 
3636 
3637  std::size_t count=0;
3638  for ( int m=-J;m<=0;m++ )
3639  {
3640 
3641  if ( clear_field )
3642  {
3643  current_JR[m*2]=T ( 0 );
3644  current_JR[m*2+1]=T ( 0 );
3645  }
3646 
3647  for ( int m1=-J1;m1<=J1;m1++ )
3648  {
3649  int m2=m-m1;
3650  if ( abs ( m2 ) <=J2 )
3651  {
3652  if ( m1>0 )
3653  {
3654  if ( m1%2==0 )
3655  {
3656  tmp0R=current_J1Rmirrowed[-m1*2];
3657  tmp0I=-current_J1Rmirrowed[-m1*2+1];
3658  }
3659  else
3660  {
3661  tmp0R=-current_J1Rmirrowed[-m1*2];
3662  tmp0I=current_J1Rmirrowed[-m1*2+1];
3663  }
3664  }
3665  else
3666  {
3667  tmp0R=current_J1R[m1*2];
3668  tmp0I=current_J1R[m1*2+1];
3669  }
3670 
3671 
3672  if ( m2>0 )
3673  {
3674  if ( m2%2==0 )
3675  {
3676  tmp1R=current_J2Rmirrowed[-m2*2];
3677  tmp1I=-current_J2Rmirrowed[-m2*2+1];
3678  }
3679  else
3680  {
3681  tmp1R=-current_J2Rmirrowed[-m2*2];
3682  tmp1I=current_J2Rmirrowed[-m2*2+1];
3683  }
3684  }
3685  else
3686  {
3687  tmp1R=current_J2R[m2*2];
3688  tmp1I=current_J2R[m2*2+1];
3689  }
3690  T ce=tmp1R*Cg[count];
3691  T de=tmp1I*Cg[count++];
3692  T cf=tmp1R*Cg[count];
3693  T df=tmp1I*Cg[count++];
3694  ce=ce-df;
3695  cf=cf+de;
3696 
3697  current_JR[m*2]+=tmp0R*ce-tmp0I*cf;
3698  current_JR[m*2+1]+=tmp0R*cf+tmp0I*ce;;
3699  }
3700  }
3701  }
3702 
3703 
3704 
3705 
3706  current_J1R+=stride_in1;
3707  current_J2R+=stride_in2;
3708  current_JR+=stride_out;
3709  }
3710  }
3711  delete [] cg;
3712  return 0;
3713 }*/
3714 
3715 
3716 //#define _STA_OLD_DERIV
3717 
3718 #ifdef _STA_OLD_DERIV
3719 template<typename T,typename S>
3720 STA_RESULT sta_derivatives_R(
3721  const S * stIn,
3722  std::complex<T> * stOut ,
3723  const std::size_t shape[],
3724  int J,
3725  int Jupdown, // either -1 0 or 1
3726  bool conjugate=false,
3727  //std::complex<T>
3728  T alpha=(T)1.0,
3729  const T v_size[]=NULL,
3730  int stride_in = -1,
3731  int stride_out = -1,
3732  bool clear_field = false)
3733 {
3734 
3735  alpha/=T(2);
3736  if ( abs ( Jupdown ) >1 ) return STA_RESULT_INVALID_TENSOR_RANK;
3737  if ( abs ( J+Jupdown ) <0 ) return STA_RESULT_INVALID_TENSOR_RANK;
3738 
3739  std::complex<T> imag=-std::complex<T>(0,1);
3740  if (conjugate) imag*=T( -1 );
3741 
3742  T voxel_size[3];
3743  voxel_size[0]=voxel_size[1]=voxel_size[2]=T(1);
3744  if (v_size!=NULL)
3745  {
3746  voxel_size[0]/=v_size[0]; // Zdir
3747  voxel_size[1]/=v_size[1]; // Ydir
3748  voxel_size[2]/=v_size[2]; // Xdir
3749  }
3750 
3751  imag*=voxel_size[1];
3752 
3753  int J1=(T)(J+Jupdown);
3754 
3755  std::size_t vectorLengthJ=J+1;
3756  std::size_t vectorLengthJ1=(J1)+1;
3757 
3758  std::size_t jumpz=shape[1]*shape[2];
3759  std::size_t jumpy=shape[2];
3760 
3761  if (stride_in == -1)
3762  stride_in = vectorLengthJ;
3763  if (stride_out == -1)
3764  stride_out = vectorLengthJ1;
3765 
3766 
3767  T * CGTable=new T[3*vectorLengthJ1];
3768  T shnorm=hanalysis::clebschGordan(1,0,J,0,J1,0);
3769  if (Jupdown==0) shnorm=1;
3770  // printf("shnorm: %f\n",shnorm);
3771  for (int M=-(J1); M<=(0); M++)
3772  {
3773  CGTable[M+(J1)] =T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,-1,J,M+1,J1,M)/shnorm;;
3774  CGTable[M+(J1)+vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan(1,0,J,M,J1,M)/shnorm;
3775  CGTable[M+(J1)+2*vectorLengthJ1]=T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,1,J,M-1,J1,M)/shnorm;
3776  }
3777  T * CGTable0=&CGTable[0];
3778  CGTable0+=(J1);
3779  T * CGTable1=&CGTable[vectorLengthJ1];
3780  CGTable1+=(J1);
3781  T * CGTable2=&CGTable[2*vectorLengthJ1];
3782  CGTable2+=(J1);
3783 
3784  #pragma omp parallel for num_threads(get_numCPUs())
3785  for (std::size_t z=0; z<shape[0]; z++)
3786  {
3787  std::size_t Z[3];
3788  Z[1]=z+shape[0];
3789  Z[0]=Z[1]-1;
3790  Z[2]=Z[1]+1;
3791  Z[0]%=shape[0];
3792  Z[1]%=shape[0];
3793  Z[2]%=shape[0];
3794 
3795  Z[0]*=jumpz;
3796  Z[1]*=jumpz;
3797  Z[2]*=jumpz;
3798 
3799  const S * derivX1;
3800  const S * derivX0;
3801 
3802  const S * derivY1;
3803  const S * derivY0;
3804 
3805  const S * derivZ1;
3806  const S * derivZ0;
3807 
3808 
3809  for (std::size_t y=0; y<shape[1]; y++)
3810  {
3811  std::size_t Y[3];
3812  Y[1]=y+shape[1];
3813  Y[0]=Y[1]-1;
3814  Y[2]=Y[1]+1;
3815  Y[0]%=shape[1];
3816  Y[1]%=shape[1];
3817  Y[2]%=shape[1];
3818 
3819  Y[0]*=jumpy;
3820  Y[1]*=jumpy;
3821  Y[2]*=jumpy;
3822 
3823  for (std::size_t x=0; x<shape[2]; x++)
3824  {
3825  std::size_t X[3];
3826  X[1]=x+shape[2];
3827  X[0]=X[1]-1;
3828  X[2]=X[1]+1;
3829  X[0]%=shape[2];
3830  X[1]%=shape[2];
3831  X[2]%=shape[2];
3832 
3833  derivX1=&stIn[(Z[1]+Y[1]+X[0])*stride_in]+J;
3834  derivX0=&stIn[(Z[1]+Y[1]+X[2])*stride_in]+J;
3835 
3836  derivY1=&stIn[(Z[1]+Y[0]+X[1])*stride_in]+J;
3837  derivY0=&stIn[(Z[1]+Y[2]+X[1])*stride_in]+J;
3838 
3839  derivZ1=&stIn[(Z[0]+Y[1]+X[1])*stride_in]+J;
3840  derivZ0=&stIn[(Z[2]+Y[1]+X[1])*stride_in]+J;
3841 
3842  std::size_t offset=(Z[1]+Y[1]+X[1])*stride_out+J1;
3843 
3844  for (int M=-(J1); M<=(0); M++)
3845  {
3846  //std::complex<T> current=T(0);
3847  std::complex<T> & current=stOut[offset+M];
3848  if ( clear_field ) current=T ( 0 );
3849  std::complex<T> tmp=T ( 0 );
3850 
3851  if (abs(M+1)<=J) // m1=-1 m2=M+1 M
3852  {
3853  int m2=M+1;
3854  if (M==0)
3855  {
3856  //m2*=-1;
3857  tmp-=CGTable0[M]*(voxel_size[2]*std::conj(derivX0[-m2]-derivX1[-m2])+imag*std::conj(derivY0[-m2]-derivY1[-m2]));
3858  } else
3859  tmp+=CGTable0[M]*(voxel_size[2]*(derivX0[m2]-derivX1[m2])+imag*(derivY0[m2]-derivY1[m2]));
3860  }
3861  if (M>=-J) // m1=0 m2=M M
3862  {
3863  tmp+=CGTable1[M]*(derivZ0[M]-derivZ1[M]);
3864  }
3865  if (M-1>=-J) // m1=1 m2=M-1 M
3866  {
3867  int m2=M-1;
3868  tmp+=CGTable2[M]*(-voxel_size[2]*(derivX0[m2]-derivX1[m2])+imag*(derivY0[m2]-derivY1[m2]));
3869  }
3870  current+=tmp*alpha;
3871  }
3872 
3873  }
3874  }
3875  }
3876 
3877  delete [] CGTable;
3878  return (STA_RESULT_SUCCESS);
3879 }
3880 
3881 
3882 
3883 template<typename T,typename S>
3884 STA_RESULT sta_derivatives2_R(
3885  const S * stIn,
3886  std::complex<T> * stOut ,
3887  const std::size_t shape[],
3888  int J,
3889  int Jupdown, // either +2 or -2
3890  bool conjugate=false,
3891  T alpha= ( T ) 1.0,
3892  const T v_size[]=NULL,
3893  int stride_in = -1,
3894  int stride_out = -1,
3895  bool clear_field = false )
3896 {
3897  if ( abs ( Jupdown ) >2 ) return STA_RESULT_INVALID_TENSOR_RANK;
3898  if ( abs ( Jupdown ) ==1 ) return STA_RESULT_INVALID_TENSOR_RANK;
3899  if ( abs ( J+Jupdown ) <0 ) return STA_RESULT_INVALID_TENSOR_RANK;
3900 
3901  if (v_size!=NULL)
3902  {
3903  if (hanalysis::verbose>0)
3904  printf("WARNING! element size is not considered yet!\n");
3905  }
3906 
3907  std::complex<T> imag=-std::complex<T> ( 0,1 );
3908  if (conjugate) imag*=T( -1 );
3909 
3910  alpha*=T(sqrt(3.0/2.0));
3911 
3912  int J1=J+Jupdown;
3913  //if (abs(Jupdown)!=2) return -1;
3914 
3915  int vectorLengthJ=J+1;
3916  int vectorLengthJ1=(J1)+1;
3917 
3918  if (stride_in == -1)
3919  stride_in = vectorLengthJ;
3920  if (stride_out == -1)
3921  stride_out = vectorLengthJ1;
3922 
3923 
3924  std::size_t jumpz=shape[1]*shape[2];
3925  std::size_t jumpy=shape[2];
3926 
3927 
3928 
3929  T * CGTable=new T[5*vectorLengthJ1];
3930  T shnorm=hanalysis::clebschGordan(2,0,J,0,J1,0);
3931  // if (Jupdown==0) shnorm=1;
3932  // printf("shnorm: %f\n",shnorm);
3933  for (int M=-(J1); M<=(0); M++)
3934  {
3935  CGTable[M+(J1)] =hanalysis::clebschGordan(2,-2,J,M+2,J1,M)/shnorm;
3936  CGTable[M+(J1)+vectorLengthJ1] =hanalysis::clebschGordan(2,-1,J,M+1,J1,M)/shnorm;;
3937  CGTable[M+(J1)+2*vectorLengthJ1]=hanalysis::clebschGordan(2,0,J,M,J1,M)/shnorm;
3938  CGTable[M+(J1)+3*vectorLengthJ1]=hanalysis::clebschGordan(2,1,J,M-1,J1,M)/shnorm;
3939  CGTable[M+(J1)+4*vectorLengthJ1]=hanalysis::clebschGordan(2,2,J,M-2,J1,M)/shnorm;
3940  }
3941  T * CGTable0=&CGTable[0];
3942  CGTable0+=(J1);
3943  T * CGTable1=&CGTable[vectorLengthJ1];
3944  CGTable1+=(J1);
3945  T * CGTable2=&CGTable[2*vectorLengthJ1];
3946  CGTable2+=(J1);
3947  T * CGTable3=&CGTable[3*vectorLengthJ1];
3948  CGTable3+=(J1);
3949  T * CGTable4=&CGTable[4*vectorLengthJ1];
3950  CGTable4+=(J1);
3951 
3952  #pragma omp parallel for num_threads(get_numCPUs())
3953  for (std::size_t z=0; z<shape[0]; z++)
3954  {
3955  std::size_t Z[5];
3956  Z[2]=z+shape[0];
3957  Z[0]=Z[2]-2;
3958  Z[1]=Z[2]-1;
3959  Z[3]=Z[2]+1;
3960  Z[4]=Z[2]+2;
3961  Z[0]%=shape[0];
3962  Z[1]%=shape[0];
3963  Z[2]%=shape[0];
3964  Z[3]%=shape[0];
3965  Z[4]%=shape[0];
3966 
3967  Z[0]*=jumpz;
3968  Z[1]*=jumpz;
3969  Z[2]*=jumpz;
3970  Z[3]*=jumpz;
3971  Z[4]*=jumpz;
3972 
3973 
3974 
3975  //const S * X1Y1Z1;
3976  const S * X1Y1Z2;
3977  //const S * X1Y1Z3;
3978  const S * X1Y2Z1;
3979  const S * X1Y2Z2;
3980  const S * X1Y2Z3;
3981  //const S * X1Y3Z1;
3982  const S * X1Y3Z2;
3983  //const S * X1Y3Z3;
3984 
3985 
3986  const S * X2Y1Z1;
3987  const S * X2Y1Z2;
3988  const S * X2Y1Z3;
3989  const S * X2Y2Z1;
3990  const S * X2Y2Z2;
3991  const S * X2Y2Z3;
3992  const S * X2Y3Z1;
3993  const S * X2Y3Z2;
3994  const S * X2Y3Z3;
3995 
3996 
3997  //const S * X3Y1Z1;
3998  const S * X3Y1Z2;
3999  //const S * X3Y1Z3;
4000  const S * X3Y2Z1;
4001  const S * X3Y2Z2;
4002  const S * X3Y2Z3;
4003  //const S * X3Y3Z1;
4004  const S * X3Y3Z2;
4005  //const S * X3Y3Z3;
4006 
4007 
4008 
4009  for (std::size_t y=0; y<shape[1]; y++)
4010  {
4011  std::size_t Y[5];
4012  Y[2]=y+shape[1];
4013  Y[0]=Y[2]-2;
4014  Y[1]=Y[2]-1;
4015  Y[3]=Y[2]+1;
4016  Y[4]=Y[2]+2;
4017  Y[0]%=shape[1];
4018  Y[1]%=shape[1];
4019  Y[2]%=shape[1];
4020  Y[3]%=shape[1];
4021  Y[4]%=shape[1];
4022 
4023  Y[0]*=jumpy;
4024  Y[1]*=jumpy;
4025  Y[2]*=jumpy;
4026  Y[3]*=jumpy;
4027  Y[4]*=jumpy;
4028 
4029 
4030  for (std::size_t x=0; x<shape[2]; x++)
4031  {
4032  std::size_t X[5];
4033  X[2]=x+shape[0];
4034  X[0]=X[2]-2;
4035  X[1]=X[2]-1;
4036  X[3]=X[2]+1;
4037  X[4]=X[2]+2;
4038  X[0]%=shape[2];
4039  X[1]%=shape[2];
4040  X[2]%=shape[2];
4041  X[3]%=shape[2];
4042  X[4]%=shape[2];
4043 
4044 
4045  //X1Y1Z1=&stIn[(Z[1]+Y[1]+X[1])*stride_in]+J;
4046  X1Y1Z2=&stIn[(Z[2]+Y[1]+X[1])*stride_in]+J;
4047  //X1Y1Z3=&stIn[(Z[3]+Y[1]+X[1])*stride_in]+J;
4048  X1Y2Z1=&stIn[(Z[1]+Y[2]+X[1])*stride_in]+J;
4049  X1Y2Z2=&stIn[(Z[2]+Y[2]+X[1])*stride_in]+J;
4050  X1Y2Z3=&stIn[(Z[3]+Y[2]+X[1])*stride_in]+J;
4051  //X1Y3Z1=&stIn[(Z[1]+Y[3]+X[1])*stride_in]+J;
4052  X1Y3Z2=&stIn[(Z[2]+Y[3]+X[1])*stride_in]+J;
4053  //X1Y3Z3=&stIn[(Z[3]+Y[3]+X[1])*stride_in]+J;
4054 
4055 
4056  X2Y1Z1=&stIn[(Z[1]+Y[1]+X[2])*stride_in]+J;
4057  X2Y1Z2=&stIn[(Z[2]+Y[1]+X[2])*stride_in]+J;
4058  X2Y1Z3=&stIn[(Z[3]+Y[1]+X[2])*stride_in]+J;
4059  X2Y2Z1=&stIn[(Z[1]+Y[2]+X[2])*stride_in]+J;
4060  X2Y2Z2=&stIn[(Z[2]+Y[2]+X[2])*stride_in]+J;
4061  X2Y2Z3=&stIn[(Z[3]+Y[2]+X[2])*stride_in]+J;
4062  X2Y3Z1=&stIn[(Z[1]+Y[3]+X[2])*stride_in]+J;
4063  X2Y3Z2=&stIn[(Z[2]+Y[3]+X[2])*stride_in]+J;
4064  X2Y3Z3=&stIn[(Z[3]+Y[3]+X[2])*stride_in]+J;
4065 
4066 
4067  //X3Y1Z1=&stIn[(Z[1]+Y[1]+X[3])*stride_in]+J;
4068  X3Y1Z2=&stIn[(Z[2]+Y[1]+X[3])*stride_in]+J;
4069  //X3Y1Z3=&stIn[(Z[3]+Y[1]+X[3])*stride_in]+J;
4070  X3Y2Z1=&stIn[(Z[1]+Y[2]+X[3])*stride_in]+J;
4071  X3Y2Z2=&stIn[(Z[2]+Y[2]+X[3])*stride_in]+J;
4072  X3Y2Z3=&stIn[(Z[3]+Y[2]+X[3])*stride_in]+J;
4073  //X3Y3Z1=&stIn[(Z[1]+Y[3]+X[3])*stride_in]+J;
4074  X3Y3Z2=&stIn[(Z[2]+Y[3]+X[3])*stride_in]+J;
4075  //X3Y3Z3=&stIn[(Z[3]+Y[3]+X[3])*stride_in]+J;
4076 
4077 
4078  std::size_t offset=(Z[2]+Y[2]+X[2])*stride_out+J1;
4079 
4080 
4081  for (int M=-(J1); M<=(0); M++)
4082  {
4083  std::complex<T> & current=stOut[offset+M];
4084  if ( clear_field ) current=T ( 0 );
4085  std::complex<T> ctmp=T ( 0 );
4086 
4087  if (abs(M+2)<=J) // m1=-1 m2=M+1 M
4088  {
4089  int m2=M+2;
4090  std::complex<T> tmp;
4091  if (m2>0)
4092  {
4093  std::complex<T> Dxx= (X1Y2Z2[-m2]-(T)2*X2Y2Z2[-m2]+X3Y2Z2[-m2]);
4094  std::complex<T> Dyy=(X2Y1Z2[-m2]-(T)2*X2Y2Z2[-m2]+X2Y3Z2[-m2]);
4095  std::complex<T> Dxy=-(T)0.25*(X1Y1Z2[-m2]-X3Y1Z2[-m2]-X1Y3Z2[-m2]+X3Y3Z2[-m2]);
4096 
4097  if (m2%2==0) tmp=(T)0.5*CGTable0[M]*(std::conj(Dxx-Dyy )-imag*std::conj((T)2.0*Dxy));
4098  else tmp=(T)0.5*CGTable0[M]*(-std::conj(Dxx-Dyy )-imag*(-std::conj((T)2.0*Dxy)));
4099 
4100  } else
4101  {
4102  std::complex<T> Dxx= (X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4103  std::complex<T> Dyy=(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4104  std::complex<T> Dxy=-(T)0.25*(X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2]);
4105  tmp=(T)0.5*CGTable0[M]*((Dxx-Dyy )-imag*((T)2.0*Dxy));
4106  }
4107  ctmp+=tmp;
4108  }
4109 
4110 
4111 
4112  if (abs(M+1)<=J) // m1=-1 m2=M+1 M
4113  {
4114  int m2=M+1;
4115  std::complex<T> tmp;
4116 
4117  if (m2>0)
4118  {
4119  std::complex<T> Dxz=(T)0.25*(X1Y2Z1[-m2]-X1Y2Z3[-m2]-X3Y2Z1[-m2]+X3Y2Z3[-m2]);
4120  std::complex<T> Dyz=-(T)0.25*(X2Y1Z1[-m2]-X2Y3Z1[-m2]-X2Y1Z3[-m2]+X2Y3Z3[-m2]);
4121  if (m2%2==0) tmp=CGTable1[M]*(std::conj(Dxz )-imag*std::conj(Dyz)); //tmp=std::conj(CGTable1[M]*((Dxz )-imag*(Dyz)));
4122  else tmp=CGTable1[M]*(-std::conj(Dxz )-imag*(-std::conj(Dyz))); //tmp=-std::conj(CGTable1[M]*((Dxz )-imag*(Dyz)));
4123  } else
4124  {
4125  std::complex<T> Dxz=(T)0.25*(X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2]);
4126  std::complex<T> Dyz=-(T)0.25*(X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2]);
4127  tmp=CGTable1[M]*((Dxz )-imag*(Dyz));
4128  }
4129  ctmp+=tmp;
4130  }
4131 
4132 
4133 
4134  if (M>=-J) // m1=-1 m2=M+1 M
4135  {
4136  int m2=M;
4137  std::complex<T> Dxx= (X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4138  std::complex<T> Dyy=(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4139  std::complex<T> Dzz=(X2Y2Z1[m2]-(T)2*X2Y2Z2[m2]+X2Y2Z3[m2]);
4140  const T SQRT6=(T)(-1.0/std::sqrt(6.0));
4141  ctmp+=CGTable2[M]*((Dxx+Dyy-(T)2.0*Dzz)*(SQRT6));
4142  }
4143 
4144  if (M-1>=-J) // m1=-1 m2=M+1 M
4145  {
4146  int m2=M-1;
4147  std::complex<T> Dxz=(T)0.25*(X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2]);
4148  std::complex<T> Dyz=-(T)0.25*(X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2]);
4149  ctmp-=CGTable3[M]*((Dxz )+imag*(Dyz));
4150  }
4151 
4152 
4153  if (M-2>=-J) // m1=-1 m2=M+1 M
4154  {
4155  int m2=M-2;
4156  std::complex<T> Dxx= (X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4157  std::complex<T> Dyy=(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4158  std::complex<T> Dxy=-(T)0.25*(X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2]);
4159  ctmp+=(T)0.5*CGTable4[M]*((Dxx-Dyy )+imag*((T)2.0*Dxy));
4160  }
4161 
4162  current+=ctmp*alpha;
4163 
4164  }
4165 
4166  }
4167  }
4168  }
4169  delete [] CGTable;
4170  return STA_RESULT_SUCCESS;
4171 }
4172 
4173 
4174 
4175 #else
4176 
4177 
4178 template<typename T>
4179 STA_RESULT sta_derivatives_R(
4180  const std::complex<T> * stIn,
4181  std::complex<T> * stOut ,
4182  const std::size_t shape[],
4183  int J,
4184  int Jupdown, // either -1 0 or 1
4185  bool conjugate=false,
4186  T alpha=(T)1.0,
4187  const T v_size[]=NULL,
4188  int stride_in = -1,
4189  int stride_out = -1,
4190  bool clear_field = false)
4191 {
4192  alpha/=T(2);
4193  if ( abs ( Jupdown ) >1 ) return STA_RESULT_INVALID_TENSOR_RANK;
4194  if ( abs ( J+Jupdown ) <0 ) return STA_RESULT_INVALID_TENSOR_RANK;
4195 
4196  //std::complex<T> imag=-std::complex<T>(0,1);
4197  //if (conjugate) imag*=T( -1 );
4198 
4199  T voxel_size[3];
4200  voxel_size[0]=voxel_size[1]=voxel_size[2]=T(1);
4201  if (v_size!=NULL)
4202  {
4203  voxel_size[0]/=v_size[0]; // Zdir
4204  voxel_size[1]/=v_size[1]; // Ydir
4205  voxel_size[2]/=v_size[2]; // Xdir
4206  }
4207 
4208  //imag*=voxel_size[1];
4209  voxel_size[1]*=-1;
4210  if (conjugate) voxel_size[1]*=T( -1 );
4211 
4212  int J1=(T)(J+Jupdown);
4213 
4214  std::size_t vectorLengthJ=J+1;
4215  std::size_t vectorLengthJ1=(J1)+1;
4216 
4217 
4218  std::size_t jumpz=shape[1]*shape[2];
4219  std::size_t jumpy=shape[2];
4220 
4221  if (stride_in == -1)
4222  stride_in = vectorLengthJ;
4223  if (stride_out == -1)
4224  stride_out = vectorLengthJ1;
4225 
4226 
4227 
4228 
4229  T * CGTable=new T[3*vectorLengthJ1];
4230 
4231  T shnorm=hanalysis::clebschGordan(1,0,J,0,J1,0);
4232 
4233  if (Jupdown==0) shnorm=1;
4234  // printf("shnorm: %f\n",shnorm);
4235  for (int M=-(J1); M<=(0); M++)
4236  {
4237  CGTable[M+(J1)] =T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,-1,J,M+1,J1,M)/shnorm;;
4238  CGTable[M+(J1)+vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan(1,0,J,M,J1,M)/shnorm;
4239  CGTable[M+(J1)+2*vectorLengthJ1]=T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,1,J,M-1,J1,M)/shnorm;
4240  }
4241 
4242 
4243 // for (int M=-(J1); M<=(0); M++)
4244 // printf("%f %f %f\n",CGTable[M+(J1)],CGTable[M+(J1)+vectorLengthJ1],CGTable[M+(J1)+2*vectorLengthJ1]);
4245 
4246  T * CGTable0=&CGTable[0];
4247  CGTable0+=(J1);
4248  T * CGTable1=&CGTable[vectorLengthJ1];
4249  CGTable1+=(J1);
4250  T * CGTable2=&CGTable[2*vectorLengthJ1];
4251  CGTable2+=(J1);
4252 
4253 
4254  const T * stIn_r=(const T *)stIn;
4255  T * stOut_r=(T*)stOut;
4256  vectorLengthJ*=2;
4257  vectorLengthJ1*=2;
4258  stride_in*=2;
4259  stride_out*=2;
4260 
4261  int J_times_2=J*2;
4262  //int J1_times_2=J1*2;
4263 
4264  #pragma omp parallel for num_threads(get_numCPUs())
4265  for (std::size_t z=0; z<shape[0]; z++)
4266  {
4267  std::size_t Z[3];
4268  Z[1]=z+shape[0];
4269  Z[0]=Z[1]-1;
4270  Z[2]=Z[1]+1;
4271  Z[0]%=shape[0];
4272  Z[1]%=shape[0];
4273  Z[2]%=shape[0];
4274 
4275  Z[0]*=jumpz;
4276  Z[1]*=jumpz;
4277  Z[2]*=jumpz;
4278 
4279  const T * derivX1;
4280  const T * derivX0;
4281 
4282  const T * derivY1;
4283  const T * derivY0;
4284 
4285  const T * derivZ1;
4286  const T * derivZ0;
4287 
4288 
4289  for (std::size_t y=0; y<shape[1]; y++)
4290  {
4291  std::size_t Y[3];
4292  Y[1]=y+shape[1];
4293  Y[0]=Y[1]-1;
4294  Y[2]=Y[1]+1;
4295  Y[0]%=shape[1];
4296  Y[1]%=shape[1];
4297  Y[2]%=shape[1];
4298 
4299  Y[0]*=jumpy;
4300  Y[1]*=jumpy;
4301  Y[2]*=jumpy;
4302 
4303  for (std::size_t x=0; x<shape[2]; x++)
4304  {
4305  std::size_t X[3];
4306  X[1]=x+shape[2];
4307  X[0]=X[1]-1;
4308  X[2]=X[1]+1;
4309  X[0]%=shape[2];
4310  X[1]%=shape[2];
4311  X[2]%=shape[2];
4312 
4313  derivX1=&stIn_r[(Z[1]+Y[1]+X[0])*stride_in]+J_times_2;
4314  derivX0=&stIn_r[(Z[1]+Y[1]+X[2])*stride_in]+J_times_2;
4315 
4316  derivY1=&stIn_r[(Z[1]+Y[0]+X[1])*stride_in]+J_times_2;
4317  derivY0=&stIn_r[(Z[1]+Y[2]+X[1])*stride_in]+J_times_2;
4318 
4319  derivZ1=&stIn_r[(Z[0]+Y[1]+X[1])*stride_in]+J_times_2;
4320  derivZ0=&stIn_r[(Z[2]+Y[1]+X[1])*stride_in]+J_times_2;
4321 
4322  T * current_r=stOut_r+(Z[1]+Y[1]+X[1])*stride_out;//+2*M;
4323 
4324  for (int M=-(J1); M<=(0); M++)
4325  {
4326  T tmp_r=T ( 0 );
4327  T tmp_i=T ( 0 );
4328 
4329  if (abs(M+1)<=J) // m1=-1 m2=M+1 M
4330  {
4331  int m2=2*(M+1);
4332  if (M==0)
4333  {
4334  m2*=-1;
4335  tmp_r-=CGTable0[M]*(voxel_size[2]*(derivX0[m2]-derivX1[m2])+voxel_size[1]*(derivY0[m2+1]-derivY1[m2+1]));
4336  tmp_i-=CGTable0[M]*(voxel_size[2]*(derivX1[m2+1]-derivX0[m2+1])+voxel_size[1]*(derivY0[m2]-derivY1[m2]));
4337  } else
4338  {
4339  tmp_r+=CGTable0[M]*(voxel_size[2]*(derivX0[m2]-derivX1[m2])+voxel_size[1]*(derivY1[m2+1]-derivY0[m2+1]));
4340  tmp_i+=CGTable0[M]*(voxel_size[2]*(derivX0[m2+1]-derivX1[m2+1])+voxel_size[1]*(derivY0[m2]-derivY1[m2]));
4341  }
4342  }
4343  if (M>=-J) // m1=0 m2=M M
4344  {
4345  tmp_r+=CGTable1[M]*(derivZ0[M*2]-derivZ1[M*2]);
4346  tmp_i+=CGTable1[M]*(derivZ0[M*2+1]-derivZ1[M*2+1]);
4347  }
4348  if (M-1>=-J) // m1=1 m2=M-1 M
4349  {
4350  int m2=2*(M-1);
4351  tmp_r+=CGTable2[M]*(voxel_size[2]*(derivX1[m2]-derivX0[m2])+voxel_size[1]*(derivY1[m2+1]-derivY0[m2+1]));
4352  tmp_i+=CGTable2[M]*(voxel_size[2]*(derivX1[m2+1]-derivX0[m2+1])+voxel_size[1]*(derivY0[m2]-derivY1[m2]));
4353  }
4354 
4355  /*if ( clear_field )
4356  {
4357  current_r[0]=T ( 0 );
4358  current_r[1]=T ( 0 );
4359  } */
4360 
4361  if ( clear_field )
4362  {
4363  (*current_r)=tmp_r*alpha;
4364  current_r++;
4365  (*current_r)=tmp_i*alpha;
4366  current_r++;
4367  } else
4368  {
4369  (*current_r)+=tmp_r*alpha;
4370  current_r++;
4371  (*current_r)+=tmp_i*alpha;
4372  current_r++;
4373  }
4374  }
4375 
4376  }
4377  }
4378  }
4379 
4380  delete [] CGTable;
4381  return (STA_RESULT_SUCCESS);
4382 }
4383 
4384 
4385 
4386 
4387 template<typename T>
4388 STA_RESULT sta_derivatives2_R(
4389  const std::complex<T> * stIn,
4390  std::complex<T> * stOut ,
4391  const std::size_t shape[],
4392  int J,
4393  int Jupdown, // either +2 or -2
4394  bool conjugate=false,
4395  T alpha= ( T ) 1.0,
4396  const T v_size[]=NULL,
4397  int stride_in = -1,
4398  int stride_out = -1,
4399  bool clear_field = false )
4400 {
4401  if ( abs ( Jupdown ) >2 ) return STA_RESULT_INVALID_TENSOR_RANK;
4402  if ( abs ( Jupdown ) ==1 ) return STA_RESULT_INVALID_TENSOR_RANK;
4403  if ( abs ( J+Jupdown ) <0 ) return STA_RESULT_INVALID_TENSOR_RANK;
4404 
4405 // if (v_size!=NULL)
4406 // {
4407 // if (hanalysis::verbose>0)
4408 // printf("WARNING! element size is not considered yet!\n");
4409 // }
4410 
4411  T voxel_weights[6];
4412  voxel_weights[0]=voxel_weights[1]=voxel_weights[2]
4413  =voxel_weights[3]=voxel_weights[4]=voxel_weights[5]=T(1);
4414 
4415  if (v_size!=NULL)
4416  {
4417  voxel_weights[0]/=(v_size[0]*v_size[0]); //Zdir
4418  voxel_weights[1]/=(v_size[1]*v_size[1]); //Ydir
4419  voxel_weights[2]/=(v_size[2]*v_size[2]); //Xdir
4420  voxel_weights[3]/=(v_size[0]*v_size[1]); //ZYdir
4421  voxel_weights[4]/=(v_size[0]*v_size[2]); //ZXdir
4422  voxel_weights[5]/=(v_size[1]*v_size[2]); //YXdir
4423 
4424  }
4425 
4426  T conj=-1;
4427  if (conjugate) conj*=T( -1 );
4428 
4429  alpha*=T(sqrt(3.0/2.0));
4430 
4431  int J1=J+Jupdown;
4432  //if (abs(Jupdown)!=2) return -1;
4433 
4434  int vectorLengthJ=J+1;
4435  int vectorLengthJ1=(J1)+1;
4436 
4437  if (stride_in == -1)
4438  stride_in = vectorLengthJ;
4439  if (stride_out == -1)
4440  stride_out = vectorLengthJ1;
4441 
4442 
4443  std::size_t jumpz=shape[1]*shape[2];
4444  std::size_t jumpy=shape[2];
4445 
4446 
4447 
4448  T * CGTable=new T[5*vectorLengthJ1];
4449  T shnorm=hanalysis::clebschGordan(2,0,J,0,J1,0);
4450  // if (Jupdown==0) shnorm=1;
4451  // printf("shnorm: %f\n",shnorm);
4452  for (int M=-(J1); M<=(0); M++)
4453  {
4454  CGTable[M+(J1)] =hanalysis::clebschGordan(2,-2,J,M+2,J1,M)/shnorm;
4455  CGTable[M+(J1)+vectorLengthJ1] =hanalysis::clebschGordan(2,-1,J,M+1,J1,M)/shnorm;
4456  CGTable[M+(J1)+2*vectorLengthJ1]=hanalysis::clebschGordan(2,0,J,M,J1,M)/shnorm;
4457  CGTable[M+(J1)+3*vectorLengthJ1]=hanalysis::clebschGordan(2,1,J,M-1,J1,M)/shnorm;
4458  CGTable[M+(J1)+4*vectorLengthJ1]=hanalysis::clebschGordan(2,2,J,M-2,J1,M)/shnorm;
4459  }
4460  T * CGTable0=&CGTable[0];
4461  CGTable0+=(J1);
4462  T * CGTable1=&CGTable[vectorLengthJ1];
4463  CGTable1+=(J1);
4464  T * CGTable2=&CGTable[2*vectorLengthJ1];
4465  CGTable2+=(J1);
4466  T * CGTable3=&CGTable[3*vectorLengthJ1];
4467  CGTable3+=(J1);
4468  T * CGTable4=&CGTable[4*vectorLengthJ1];
4469  CGTable4+=(J1);
4470 
4471 
4472 
4473  const T * stIn_r=(const T *)stIn;
4474  T * stOut_r=(T*)stOut;
4475  vectorLengthJ*=2;
4476  vectorLengthJ1*=2;
4477  stride_in*=2;
4478  stride_out*=2;
4479 
4480  int J_times_2=J*2;
4481 
4482 
4483 
4484  #pragma omp parallel for num_threads(get_numCPUs())
4485  for (std::size_t z=0; z<shape[0]; z++)
4486  {
4487  std::size_t Z[5];
4488  Z[2]=z+shape[0];
4489  Z[0]=Z[2]-2;
4490  Z[1]=Z[2]-1;
4491  Z[3]=Z[2]+1;
4492  Z[4]=Z[2]+2;
4493  Z[0]%=shape[0];
4494  Z[1]%=shape[0];
4495  Z[2]%=shape[0];
4496  Z[3]%=shape[0];
4497  Z[4]%=shape[0];
4498 
4499  Z[0]*=jumpz;
4500  Z[1]*=jumpz;
4501  Z[2]*=jumpz;
4502  Z[3]*=jumpz;
4503  Z[4]*=jumpz;
4504 
4505 
4506 
4507  const T * X1Y1Z2;
4508  const T * X1Y2Z1;
4509  const T * X1Y2Z2;
4510  const T * X1Y2Z3;
4511  const T * X1Y3Z2;
4512 
4513 
4514  const T * X2Y1Z1;
4515  const T * X2Y1Z2;
4516  const T * X2Y1Z3;
4517  const T * X2Y2Z1;
4518  const T * X2Y2Z2;
4519  const T * X2Y2Z3;
4520  const T * X2Y3Z1;
4521  const T * X2Y3Z2;
4522  const T * X2Y3Z3;
4523 
4524 
4525  const T * X3Y1Z2;
4526  const T * X3Y2Z1;
4527  const T * X3Y2Z2;
4528  const T * X3Y2Z3;
4529  const T * X3Y3Z2;
4530 
4531 
4532  for (std::size_t y=0; y<shape[1]; y++)
4533  {
4534  std::size_t Y[5];
4535  Y[2]=y+shape[1];
4536  Y[0]=Y[2]-2;
4537  Y[1]=Y[2]-1;
4538  Y[3]=Y[2]+1;
4539  Y[4]=Y[2]+2;
4540  Y[0]%=shape[1];
4541  Y[1]%=shape[1];
4542  Y[2]%=shape[1];
4543  Y[3]%=shape[1];
4544  Y[4]%=shape[1];
4545 
4546  Y[0]*=jumpy;
4547  Y[1]*=jumpy;
4548  Y[2]*=jumpy;
4549  Y[3]*=jumpy;
4550  Y[4]*=jumpy;
4551 
4552 
4553  for (std::size_t x=0; x<shape[2]; x++)
4554  {
4555  std::size_t X[5];
4556  X[2]=x+shape[0];
4557  X[0]=X[2]-2;
4558  X[1]=X[2]-1;
4559  X[3]=X[2]+1;
4560  X[4]=X[2]+2;
4561  X[0]%=shape[2];
4562  X[1]%=shape[2];
4563  X[2]%=shape[2];
4564  X[3]%=shape[2];
4565  X[4]%=shape[2];
4566 
4567 
4568 
4569  X1Y1Z2=stIn_r+(Z[2]+Y[1]+X[1])*stride_in+J_times_2;
4570 
4571  X1Y2Z1=stIn_r+(Z[1]+Y[2]+X[1])*stride_in+J_times_2;
4572  X1Y2Z2=stIn_r+(Z[2]+Y[2]+X[1])*stride_in+J_times_2;
4573  X1Y2Z3=stIn_r+(Z[3]+Y[2]+X[1])*stride_in+J_times_2;
4574 
4575  X1Y3Z2=stIn_r+(Z[2]+Y[3]+X[1])*stride_in+J_times_2;
4576 
4577 
4578 
4579  X2Y1Z1=stIn_r+(Z[1]+Y[1]+X[2])*stride_in+J_times_2;
4580  X2Y1Z2=stIn_r+(Z[2]+Y[1]+X[2])*stride_in+J_times_2;
4581  X2Y1Z3=stIn_r+(Z[3]+Y[1]+X[2])*stride_in+J_times_2;
4582  X2Y2Z1=stIn_r+(Z[1]+Y[2]+X[2])*stride_in+J_times_2;
4583  X2Y2Z2=stIn_r+(Z[2]+Y[2]+X[2])*stride_in+J_times_2;
4584  X2Y2Z3=stIn_r+(Z[3]+Y[2]+X[2])*stride_in+J_times_2;
4585  X2Y3Z1=stIn_r+(Z[1]+Y[3]+X[2])*stride_in+J_times_2;
4586  X2Y3Z2=stIn_r+(Z[2]+Y[3]+X[2])*stride_in+J_times_2;
4587  X2Y3Z3=stIn_r+(Z[3]+Y[3]+X[2])*stride_in+J_times_2;
4588 
4589 
4590 
4591  X3Y1Z2=stIn_r+(Z[2]+Y[1]+X[3])*stride_in+J_times_2;
4592 
4593  X3Y2Z1=stIn_r+(Z[1]+Y[2]+X[3])*stride_in+J_times_2;
4594  X3Y2Z2=stIn_r+(Z[2]+Y[2]+X[3])*stride_in+J_times_2;
4595  X3Y2Z3=stIn_r+(Z[3]+Y[2]+X[3])*stride_in+J_times_2;
4596 
4597  X3Y3Z2=stIn_r+(Z[2]+Y[3]+X[3])*stride_in+J_times_2;
4598 
4599 
4600 
4601 
4602  T * current_r=stOut_r+(Z[2]+Y[2]+X[2])*stride_out;
4603  for (int M=-(J1); M<=(0); M++)
4604  {
4605 
4606  T ctmp_r=T ( 0 );
4607  T ctmp_i=T ( 0 );
4608 
4609  if (abs(M+2)<=J) // m1=-1 m2=M+1 M
4610  {
4611  int m2=2*(M+2);
4612 // T tmp_r;
4613 // T tmp_i;
4614 
4615  //if (m2>0)
4616  if (M+2>0)
4617  {
4618 
4619  T Dxx_r= voxel_weights[2]*(X1Y2Z2[-m2]-(T)2*X2Y2Z2[-m2]+X3Y2Z2[-m2]);
4620  T Dxx_i= voxel_weights[2]*(X1Y2Z2[-m2+1]-(T)2*X2Y2Z2[-m2+1]+X3Y2Z2[-m2+1]);
4621 
4622  T Dyy_r=voxel_weights[1]*(X2Y1Z2[-m2]-(T)2*X2Y2Z2[-m2]+X2Y3Z2[-m2]);
4623  T Dyy_i=voxel_weights[1]*(X2Y1Z2[-m2+1]-(T)2*X2Y2Z2[-m2+1]+X2Y3Z2[-m2+1]);
4624 
4625  T Dxy_r=voxel_weights[5]*(X1Y1Z2[-m2]-X3Y1Z2[-m2]-X1Y3Z2[-m2]+X3Y3Z2[-m2]);
4626  T Dxy_i=voxel_weights[5]*(X1Y1Z2[-m2+1]-X3Y1Z2[-m2+1]-X1Y3Z2[-m2+1]+X3Y3Z2[-m2+1]);
4627 
4628  //if (m2%2==0)
4629  if (M%2==0)
4630  {
4631  ctmp_r+=(T)0.5*CGTable0[M]*((Dxx_r-Dyy_r )+conj*((T)0.5*Dxy_i));
4632  ctmp_i+=(T)0.5*CGTable0[M]*((Dyy_i-Dxx_i )+conj*((T)0.5*Dxy_r));
4633  //tmp=(T)0.5*CGTable0[M]*(std::conj(Dxx-Dyy )-imag*std::conj((T)2.0*Dxy));
4634  }
4635  else
4636  {
4637  ctmp_r+=(T)0.5*CGTable0[M]*((Dyy_r-Dxx_r )-conj*((T)0.5*Dxy_i));
4638  ctmp_i+=(T)0.5*CGTable0[M]*((Dxx_i-Dyy_i )-conj*((T)0.5*Dxy_r));
4639  //tmp=(T)0.5*CGTable0[M]*(-std::conj(Dxx-Dyy )-imag*(-std::conj((T)2.0*Dxy)));
4640  }
4641 
4642  } else
4643  {
4644  T Dxx_r= voxel_weights[2]*(X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4645  T Dxx_i= voxel_weights[2]*(X1Y2Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X3Y2Z2[m2+1]);
4646 
4647  T Dyy_r=voxel_weights[1]*(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4648  T Dyy_i=voxel_weights[1]*(X2Y1Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X2Y3Z2[m2+1]);
4649 
4650  T Dxy_r=voxel_weights[5]*(X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2]);
4651  T Dxy_i=voxel_weights[5]*(X1Y1Z2[m2+1]-X3Y1Z2[m2+1]-X1Y3Z2[m2+1]+X3Y3Z2[m2+1]);
4652 
4653  ctmp_r+=(T)0.5*CGTable0[M]*((Dxx_r-Dyy_r )-conj*((T)0.5*Dxy_i));
4654  ctmp_i+=(T)0.5*CGTable0[M]*((Dxx_i-Dyy_i )+conj*((T)0.5*Dxy_r));
4655  //tmp=(T)0.5*CGTable0[M]*((Dxx-Dyy )-imag*((T)2.0*Dxy));
4656  }
4657 // ctmp_r+=tmp_r;
4658 // ctmp_i+=tmp_i;
4659  }
4660 
4661 
4662 
4663  if (abs(M+1)<=J) // m1=-1 m2=M+1 M
4664  {
4665  int m2=2*(M+1);
4666 
4667  /* T tmp_r;
4668  T tmp_i;*/
4669 
4670  if (M+1>0)
4671  {
4672  T Dxz_r=(T)0.25*voxel_weights[4]*(X1Y2Z1[-m2]-X1Y2Z3[-m2]-X3Y2Z1[-m2]+X3Y2Z3[-m2]);
4673  T Dxz_i=(T)0.25*voxel_weights[4]*(X1Y2Z1[-m2+1]-X1Y2Z3[-m2+1]-X3Y2Z1[-m2+1]+X3Y2Z3[-m2+1]);
4674 
4675 
4676  T Dyz_r=-(T)0.25*voxel_weights[3]*(X2Y1Z1[-m2]-X2Y3Z1[-m2]-X2Y1Z3[-m2]+X2Y3Z3[-m2]);
4677  T Dyz_i=-(T)0.25*voxel_weights[3]*(X2Y1Z1[-m2+1]-X2Y3Z1[-m2+1]-X2Y1Z3[-m2+1]+X2Y3Z3[-m2+1]);
4678 
4679 
4680  if (M%2==0)
4681  {
4682  ctmp_r+=CGTable1[M]*(conj*Dyz_i-Dxz_r);
4683  ctmp_i+=CGTable1[M]*(Dxz_i+conj*Dyz_r);
4684 
4685 
4686  //tmp=CGTable1[M]*(-std::conj(Dxz )-imag*(-std::conj(Dyz)));
4687  }
4688  else
4689  {
4690  ctmp_r+=CGTable1[M]*(Dxz_r+conj*Dyz_i);
4691  ctmp_i+=-CGTable1[M]*(Dxz_i+conj*Dyz_r);
4692  //tmp=CGTable1[M]*(std::conj(Dxz )-imag*std::conj(Dyz));
4693  }
4694  } else
4695  {
4696  T Dxz_r=(T)0.25*voxel_weights[4]*(X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2]);
4697  T Dxz_i=(T)0.25*voxel_weights[4]*(X1Y2Z1[m2+1]-X1Y2Z3[m2+1]-X3Y2Z1[m2+1]+X3Y2Z3[m2+1]);
4698 
4699 
4700  T Dyz_r=-(T)0.25*voxel_weights[3]*(X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2]);
4701  T Dyz_i=-(T)0.25*voxel_weights[3]*(X2Y1Z1[m2+1]-X2Y3Z1[m2+1]-X2Y1Z3[m2+1]+X2Y3Z3[m2+1]);
4702 
4703  ctmp_r+=CGTable1[M]*(Dxz_r +conj*Dyz_i);
4704  ctmp_i+=CGTable1[M]*(Dxz_i -conj*Dyz_r);
4705  //tmp=CGTable1[M]*((Dxz )-imag*(Dyz));
4706  }
4707 // ctmp_r+=tmp_r;
4708 // ctmp_i+=tmp_i;
4709  }
4710 
4711 
4712 
4713  if (M>=-J) // m1=-1 m2=M+1 M
4714  {
4715  int m2=2*M;
4716  T Dxx_r= voxel_weights[2]*(X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4717  T Dxx_i= voxel_weights[2]*(X1Y2Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X3Y2Z2[m2+1]);
4718 
4719  T Dyy_r= voxel_weights[1]*(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4720  T Dyy_i= voxel_weights[1]*(X2Y1Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X2Y3Z2[m2+1]);
4721 
4722  T Dzz_r= voxel_weights[0]*(X2Y2Z1[m2]-(T)2*X2Y2Z2[m2]+X2Y2Z3[m2]);
4723  T Dzz_i= voxel_weights[0]*(X2Y2Z1[m2+1]-(T)2*X2Y2Z2[m2+1]+X2Y2Z3[m2+1]);
4724 
4725  const T SQRT6=(T)(-1.0/std::sqrt(6.0));
4726 
4727  ctmp_r+=CGTable2[M]*((Dxx_r+Dyy_r-(T)2.0*Dzz_r)*(SQRT6));
4728  ctmp_i+=CGTable2[M]*((Dxx_i+Dyy_i-(T)2.0*Dzz_i)*(SQRT6));
4729 
4730  //ctmp+=CGTable2[M]*((Dxx+Dyy-(T)2.0*Dzz)*(SQRT6));
4731  }
4732 
4733  if (M-1>=-J) // m1=-1 m2=M+1 M
4734  {
4735  int m2=2*(M-1);
4736  T Dxz_r=voxel_weights[4]*(T)0.25*(X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2]);
4737  T Dxz_i=voxel_weights[4]*(T)0.25*(X1Y2Z1[m2+1]-X1Y2Z3[m2+1]-X3Y2Z1[m2+1]+X3Y2Z3[m2+1]);
4738 
4739  T Dyz_r=-(T)0.25*voxel_weights[3]*(X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2]);
4740  T Dyz_i=-(T)0.25*voxel_weights[3]*(X2Y1Z1[m2+1]-X2Y3Z1[m2+1]-X2Y1Z3[m2+1]+X2Y3Z3[m2+1]);
4741 
4742 
4743  ctmp_r-=CGTable3[M]*(Dxz_r-conj*Dyz_i);
4744  ctmp_i-=CGTable3[M]*(Dxz_i+conj*Dyz_r);
4745  //ctmp-=CGTable3[M]*((Dxz )+imag*(Dyz));
4746  }
4747 
4748 
4749  if (M-2>=-J) // m1=-1 m2=M+1 M
4750  {
4751  int m2=2*(M-2);
4752 
4753  T Dxx_r= voxel_weights[2]*(X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4754  T Dxx_i= voxel_weights[2]*(X1Y2Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X3Y2Z2[m2+1]);
4755 
4756  T Dyy_r= voxel_weights[1]*(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4757  T Dyy_i= voxel_weights[1]*(X2Y1Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X2Y3Z2[m2+1]);
4758 
4759 
4760  T Dxy_r= voxel_weights[5]*(X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2]);
4761  T Dxy_i= voxel_weights[5]*(X1Y1Z2[m2+1]-X3Y1Z2[m2+1]-X1Y3Z2[m2+1]+X3Y3Z2[m2+1]);
4762 
4763  ctmp_r+=(T)0.5*CGTable4[M]*((Dxx_r-Dyy_r )+conj*((T)0.5*Dxy_i));
4764  ctmp_i+=(T)0.5*CGTable4[M]*((Dxx_i-Dyy_i )-conj*((T)0.5*Dxy_r));
4765  //ctmp+=(T)0.5*CGTable4[M]*((Dxx-Dyy )+imag*((T)2.0*Dxy));
4766  }
4767 
4768  /* if ( clear_field )
4769  {
4770  current_r[0]=T ( 0 );
4771  current_r[1]=T ( 0 );
4772  }
4773 
4774  (*current_r)+=ctmp_r*alpha;
4775  current_r++;
4776  (*current_r)+=ctmp_i*alpha;
4777  current_r++; */
4778 
4779  if ( clear_field )
4780  {
4781  (*current_r)=ctmp_r*alpha;
4782  current_r++;
4783  (*current_r)=ctmp_i*alpha;
4784  current_r++;
4785  } else
4786  {
4787  (*current_r)+=ctmp_r*alpha;
4788  current_r++;
4789  (*current_r)+=ctmp_i*alpha;
4790  current_r++;
4791  }
4792 
4793 
4794  //current+=ctmp*alpha;
4795  }
4796  }
4797  }
4798  }
4799  delete [] CGTable;
4800  return STA_RESULT_SUCCESS;
4801 }
4802 
4803 
4804 
4805 #endif
4806 
4807 
4808 
4809 
4810 
4811 
4812 
4813 
4814 
4815 
4816 
4817 
4818 
4819 
4820 template<typename T,typename S>
4821 STA_RESULT sta_derivatives_R4th(
4822  const S * stIn,
4823  std::complex<T> * stOut ,
4824  const std::size_t shape[],
4825  int J,
4826  int Jupdown, // either -1 0 or 1
4827  bool conjugate=false,
4828  std::complex<T> alpha=(T)1.0,
4829  const T v_size[]=NULL,
4830  int stride_in = -1,
4831  int stride_out = -1,
4832  bool clear_field = false)
4833 {
4834  alpha/=T(12);
4835  if ( abs ( Jupdown ) >1 ) return STA_RESULT_INVALID_TENSOR_RANK;
4836  if ( abs ( J+Jupdown ) <0 ) return STA_RESULT_INVALID_TENSOR_RANK;
4837 
4838  std::complex<T> imag=-std::complex<T>(0,1);
4839  if (conjugate) imag*=T( -1 );
4840 
4841  T voxel_size[3];
4842  voxel_size[0]=voxel_size[1]=voxel_size[2]=T(1);
4843  if (v_size!=NULL)
4844  {
4845  voxel_size[0]/=v_size[0]; // Zdir
4846  voxel_size[1]/=v_size[1]; // Ydir
4847  voxel_size[2]/=v_size[2]; // Xdir
4848  }
4849 
4850  imag*=voxel_size[1];
4851 
4852  int J1=(T)(J+Jupdown);
4853 
4854  std::size_t vectorLengthJ=J+1;
4855  std::size_t vectorLengthJ1=(J1)+1;
4856 
4857 
4858  if (stride_in == -1)
4859  stride_in = vectorLengthJ;
4860  if (stride_out == -1)
4861  stride_out = vectorLengthJ1;
4862 
4863 
4864 
4865 
4866  std::size_t jumpz=shape[1]*shape[2];
4867  std::size_t jumpy=shape[2];
4868 
4869 
4870  T * CGTable=new T[3*vectorLengthJ1];
4871  T shnorm=hanalysis::clebschGordan(1,0,J,0,J1,0);
4872  if (Jupdown==0) shnorm=1;
4873 
4874  for (int M=-(J1); M<=(0); M++)
4875  {
4876  CGTable[M+(J1)] =T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,-1,J,M+1,J1,M)/shnorm;;
4877  CGTable[M+(J1)+vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan(1,0,J,M,J1,M)/shnorm;
4878  CGTable[M+(J1)+2*vectorLengthJ1]=T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,1,J,M-1,J1,M)/shnorm;
4879  }
4880  T * CGTable0=&CGTable[0];
4881  CGTable0+=(J1);
4882  T * CGTable1=&CGTable[vectorLengthJ1];
4883  CGTable1+=(J1);
4884  T * CGTable2=&CGTable[2*vectorLengthJ1];
4885  CGTable2+=(J1);
4886 
4887  #pragma omp parallel for num_threads(get_numCPUs())
4888  for (std::size_t z=0; z<shape[0]; z++)
4889  {
4890  std::size_t Z[5];
4891  Z[2]=z+shape[0];
4892  Z[0]=Z[2]-2;
4893  Z[1]=Z[2]-1;
4894  Z[3]=Z[2]+1;
4895  Z[4]=Z[2]+2;
4896 
4897 
4898  Z[0]%=shape[0];
4899  Z[1]%=shape[0];
4900  Z[2]%=shape[0];
4901  Z[3]%=shape[0];
4902  Z[4]%=shape[0];
4903 
4904 
4905  Z[0]*=jumpz;
4906  Z[1]*=jumpz;
4907  Z[2]*=jumpz;
4908  Z[3]*=jumpz;
4909  Z[4]*=jumpz;
4910 
4911 
4912  const S * derivZ0p=stIn+Z[0]*stride_in+J;
4913  const S * derivZ1p=stIn+Z[1]*stride_in+J;
4914  const S * derivZ2p=stIn+Z[2]*stride_in+J;
4915  const S * derivZ3p=stIn+Z[3]*stride_in+J;
4916  const S * derivZ4p=stIn+Z[4]*stride_in+J;
4917 
4918 
4919  const S * derivX3;
4920  const S * derivX2;
4921  const S * derivX1;
4922  const S * derivX0;
4923 
4924  const S * derivY3;
4925  const S * derivY2;
4926  const S * derivY1;
4927  const S * derivY0;
4928 
4929 
4930  const S * derivZ3;
4931  const S * derivZ2;
4932  const S * derivZ1;
4933  const S * derivZ0;
4934 
4935 
4936  for (std::size_t y=0; y<shape[1]; y++)
4937  {
4938  std::size_t Y[5];
4939  Y[2]=y+shape[1];
4940  Y[0]=Y[2]-2;
4941  Y[1]=Y[2]-1;
4942  Y[3]=Y[2]+1;
4943  Y[4]=Y[2]+2;
4944 
4945 
4946 
4947  Y[0]%=shape[1];
4948  Y[1]%=shape[1];
4949  Y[2]%=shape[1];
4950  Y[3]%=shape[1];
4951  Y[4]%=shape[1];
4952 
4953  Y[0]*=jumpy;
4954  Y[1]*=jumpy;
4955  Y[2]*=jumpy;
4956  Y[3]*=jumpy;
4957  Y[4]*=jumpy;
4958 
4959 
4960 
4961  const S * derivZ2Y0p=derivZ2p+Y[0]*stride_in;
4962  const S * derivZ2Y1p=derivZ2p+Y[1]*stride_in;
4963  const S * derivZ2Y3p=derivZ2p+Y[3]*stride_in;
4964  const S * derivZ2Y4p=derivZ2p+Y[4]*stride_in;
4965 
4966  std::size_t tmp=Y[2]*stride_in;
4967  const S * derivZ0Y2p=derivZ0p+tmp;
4968  const S * derivZ1Y2p=derivZ1p+tmp;
4969  const S * derivZ2Y2p=derivZ2p+tmp;
4970  const S * derivZ3Y2p=derivZ3p+tmp;
4971  const S * derivZ4Y2p=derivZ4p+tmp;
4972 
4973 
4974  for (std::size_t x=0; x<shape[2]; x++)
4975  {
4976 
4977  std::size_t X[5];
4978  X[2]=x+shape[2];
4979  X[0]=X[2]-2;
4980  X[1]=X[2]-1;
4981  X[3]=X[2]+1;
4982  X[4]=X[2]+2;
4983 
4984  X[0]%=shape[2];
4985  X[1]%=shape[2];
4986  X[2]%=shape[2];
4987  X[3]%=shape[2];
4988  X[4]%=shape[2];
4989 
4990 
4991  derivX0=derivZ2Y2p+(X[0])*stride_in;
4992  derivX1=derivZ2Y2p+(X[1])*stride_in;
4993  derivX2=derivZ2Y2p+X[3]*stride_in;
4994  derivX3=derivZ2Y2p+(X[4])*stride_in;
4995 
4996 
4997  std::size_t tmp=X[2]*stride_in;
4998  derivY0=derivZ2Y0p+tmp;
4999  derivY1=derivZ2Y1p+tmp;
5000  derivY2=derivZ2Y3p+tmp;
5001  derivY3=derivZ2Y4p+tmp;
5002 
5003  derivZ0=derivZ0Y2p+tmp;
5004  derivZ1=derivZ1Y2p+tmp;
5005  derivZ2=derivZ3Y2p+tmp;
5006  derivZ3=derivZ4Y2p+tmp;
5007 
5008  std::size_t offset=(Z[2]+Y[2]+X[2])*stride_out+J1;
5009 
5010  for (int M=-(J1); M<=(0); M++)
5011  {
5012  std::complex<T> & current=stOut[offset+M];
5013  if ( clear_field ) current=T ( 0 );
5014  std::complex<T> tmp=T ( 0 );
5015 
5016  if (abs(M+1)<=J) // m1=-1 m2=M+1 M
5017  {
5018  int m2=M+1;
5019  if (M==0)
5020  {
5021  tmp-=CGTable0[M]*(voxel_size[2]*std::conj(derivX0[-m2]+(T)8.0*(derivX2[-m2]-derivX1[-m2])-derivX3[-m2])+
5022  imag*std::conj(derivY0[-m2]+(T)8.0*(derivY2[-m2]-derivY1[-m2])-derivY3[-m2]));
5023  } else
5024  tmp+=CGTable0[M]*(voxel_size[2]*(derivX0[m2]+(T)8.0*(derivX2[m2]-derivX1[m2])-derivX3[m2])+
5025  imag*(derivY0[m2]+(T)8.0*(derivY2[m2]-derivY1[m2])-derivY3[m2]));
5026  }
5027  if (M>=-J) // m1=0 m2=M M
5028  {
5029  tmp+=CGTable1[M]*(derivZ0[M]+(T)8.0*(derivZ2[M]-derivZ1[M])-derivZ3[M]);
5030  }
5031  if (M-1>=-J) // m1=1 m2=M-1 M
5032  {
5033  int m2=M-1;
5034  tmp+=CGTable2[M]*(-voxel_size[2]*(derivX0[m2]+(T)8.0*(derivX2[m2]-derivX1[m2])-derivX3[m2])+
5035  imag*(derivY0[m2]+(T)8.0*(derivY2[m2]-derivY1[m2])-derivY3[m2]));
5036  }
5037  current+=tmp*alpha;
5038  }
5039 
5040  }
5041  }
5042  }
5043 
5044  delete [] CGTable;
5045  return (STA_RESULT_SUCCESS);
5046 }
5047 
5048 
5049 
5050 template<typename T,typename S>
5051 STA_RESULT sta_product0 (
5052  const std::complex<T> * stIn1,
5053  const std::complex<T> * stIn2,
5054  std::complex<T> * stOut,
5055  const std::size_t shape[],
5056  S alpha,
5057  int stride_in1 = -1,
5058  int stride_in2 = -1,
5059  int stride_out = -1,
5060  bool clear_field = false )
5061 {
5062  {
5063  if (stride_in1 == -1)
5064  stride_in1 = 1;
5065  if (stride_in2 == -1)
5066  stride_in2 = 1;
5067  if (stride_out == -1)
5068  stride_out = 1;
5069 
5070 
5071 
5072  std::size_t jumpz=shape[1]*shape[2];
5073 
5074 
5075  #pragma omp parallel for num_threads(hanalysis::get_numCPUs())
5076  for (std::size_t z=0; z<shape[0]; z++)
5077  {
5078  std::size_t Z=z;
5079  Z*=jumpz;
5080  const std::complex<T> * current_J1=&stIn1[Z*stride_in1];
5081  const std::complex<T> * current_J2=&stIn2[Z*stride_in2];
5082  std::complex<T> * current_J=&stOut[Z*stride_out];
5083 
5084  for (std::size_t i=0; i<jumpz; i++)
5085  {
5086  if (clear_field)
5087  *current_J=T ( 0 );
5088  *current_J+=(*current_J1)* (*current_J2)*alpha;
5089  current_J+=stride_out;
5090  current_J1+=stride_in1;
5091  current_J2+=stride_in2;
5092  }
5093  }
5094  }
5095  return STA_RESULT_SUCCESS;
5096 }
5097 
5098 
5099 
5100 /*########################################################
5101 
5102 
5103 
5104 ########################################################*/
5105 
5106 
5107 
5108 static std::string errortostring(STA_RESULT error)
5109 {
5110  switch(error)
5111  {
5112  case STA_RESULT_SUCCESS:
5113  return "STA_RESULT_SUCCESS";
5114  case STA_RESULT_FAILED:
5115  return "STA_RESULT_FAILED";
5116  case STA_RESULT_SHAPE_MISMATCH:
5117  return "STA_RESULT_SHAPE_MISMATCH";
5118  case STA_RESULT_INVALID_PRODUCT:
5119  return "STA_RESULT_INVALID_PRODUCT";
5120  case STA_RESULT_STORAGE_MISMATCH:
5121  return "STA_RESULT_STORAGE_MISMATCH";
5122  case STA_RESULT_INVALID_TENSOR_RANK:
5123  return "STA_RESULT_INVALID_TENSOR_RANK";
5124  case STA_RESULT_OFIELD_TYPE_MISMATCH:
5125  return "STA_RESULT_OFIELD_TYPE_MISMATCH";
5126  case STA_RESULT_SAME_ADDRESS:
5127  return "STA_RESULT_SAME_ADDRESS";
5128  case STA_RESULT_NOT_IMPLEMENTED:
5129  return "STA_RESULT_NOT_IMPLEMENTED";
5130  }
5131  return "unkown error code";
5132 }
5133 
5134 
5135 
5136 /*
5138 enum STA_FIELD_PROPERTIES {
5139  STA_FIELD_INVALID_PARAM=0,
5142  STA_FIELD_STORAGE_C=2,
5145  STA_FIELD_STORAGE_R=4,
5148  STA_FIELD_STORAGE_RF=8,
5149  STA_FIELD_EXPERIMENTAL=16,
5150 
5152  STA_OFIELD_SINGLE=128,
5154  STA_OFIELD_FULL=256,
5156  STA_OFIELD_EVEN=512,
5158  STA_OFIELD_ODD=1024
5159 };*/
5160 
5161 
5164  STA_FIELD_STORAGE_UNSUPPORTED=0,
5174 };
5175 
5178  STA_OFIELD_UNSUPPORTED=0,
5187 };
5188 
5189 
5190 
5191 
5192 static inline STA_FIELD_STORAGE enumfromstring_storage(std::string s)
5193 {
5194  if (s.compare("STA_FIELD_STORAGE_C")==0)
5195  return STA_FIELD_STORAGE_C;
5196 
5197  if (s.compare("STA_FIELD_STORAGE_R")==0)
5198  return STA_FIELD_STORAGE_R;
5199 
5200  if (s.compare("STA_FIELD_STORAGE_RF")==0)
5201  return STA_FIELD_STORAGE_RF;
5202  return STA_FIELD_STORAGE_UNSUPPORTED;
5203 }
5204 
5205 
5206 static inline STA_FIELD_TYPE enumfromstring_type(std::string s)
5207 {
5208  if (s.compare("STA_OFIELD_EVEN")==0)
5209  return STA_OFIELD_EVEN;
5210 
5211  if (s.compare("STA_OFIELD_ODD")==0)
5212  return STA_OFIELD_ODD;
5213 
5214  if (s.compare("STA_OFIELD_FULL")==0)
5215  return STA_OFIELD_FULL;
5216 
5217  if (s.compare("STA_OFIELD_SINGLE")==0)
5218  return STA_OFIELD_SINGLE;
5219  return STA_OFIELD_UNSUPPORTED;
5220 }
5221 
5222 
5223 static inline std::string enumtostring_storage(STA_FIELD_STORAGE p)
5224 {
5225  if (p == STA_FIELD_STORAGE_C)
5226  return "STA_FIELD_STORAGE_C";
5227 
5228  if (p == STA_FIELD_STORAGE_R)
5229  return "STA_FIELD_STORAGE_R";
5230 
5231  if (p == STA_FIELD_STORAGE_RF)
5232  return "STA_FIELD_STORAGE_RF";
5233 
5234  return "STA_FIELD_STORAGE_UNSUPPORTED";
5235 }
5236 
5237 
5238 
5239 static inline std::string enumtostring_type(STA_FIELD_TYPE p)
5240 {
5241 
5242  if (p == STA_OFIELD_EVEN)
5243  return "STA_OFIELD_EVEN";
5244 
5245  if (p == STA_OFIELD_ODD)
5246  return "STA_OFIELD_ODD";
5247 
5248  if (p == STA_OFIELD_FULL)
5249  return "STA_OFIELD_FULL";
5250 
5251  if (p == STA_OFIELD_SINGLE)
5252  return "STA_OFIELD_SINGLE";
5253 
5254  return "STA_OFIELD_UNSUPPORTED";
5255 }
5256 
5257 
5258 
5259 static inline int numComponents2order(
5260  hanalysis::STA_FIELD_STORAGE field_storage,
5261  hanalysis::STA_FIELD_TYPE field_type,
5262  int ncomponents)
5263 {
5264  if (field_storage==STA_FIELD_STORAGE_C)
5265  {
5266  switch (field_type)
5267  {
5268  case STA_OFIELD_SINGLE:
5269  {
5270  return (ncomponents-1)/2;
5271  }
5272  case STA_OFIELD_FULL:
5273  {
5274  return std::sqrt(1.0*ncomponents)-1;
5275  }
5276  case STA_OFIELD_ODD:
5277  {
5278  return (-3/2+std::sqrt((3.0/2.0)*(3.0/2.0)+2*ncomponents-2));
5279  }
5280  case STA_OFIELD_EVEN:
5281  {
5282  return (-3/2+std::sqrt((3.0/2.0)*(3.0/2.0)+2*ncomponents-2));
5283  }
5284  default:
5285  return -1;
5286  }
5287  } else
5288  {
5289  switch (field_type)
5290  {
5291  case STA_OFIELD_SINGLE:
5292  {
5293  return ncomponents-1;
5294  }
5295  case STA_OFIELD_FULL:
5296  {
5297  return (-3/2+std::sqrt((3.0/2.0)*(3.0/2.0)+2*ncomponents-2));
5298  }
5299  case STA_OFIELD_ODD:
5300  {
5301  return std::sqrt(1+4.0*ncomponents)-2;
5302  }
5303  case STA_OFIELD_EVEN:
5304  {
5305  return std::sqrt(4.0*ncomponents)-2;
5306  }
5307  default:
5308  return -1;
5309  }
5310  }
5311 }
5312 
5313 
5314 
5315 
5316 static inline int order2numComponents(
5317  hanalysis::STA_FIELD_STORAGE field_storage,
5318  hanalysis::STA_FIELD_TYPE field_type,
5319  int L)
5320 {
5321  if (field_storage==STA_FIELD_STORAGE_C)
5322  {
5323  switch (field_type)
5324  {
5325  case STA_OFIELD_SINGLE:
5326  {
5327  return 2*L+1;
5328  }
5329  case STA_OFIELD_FULL:
5330  {
5331  return ((L+1)*(L+1));
5332  }
5333  case STA_OFIELD_ODD:
5334  {
5335  return ((L+1)*(L+2))/2;
5336  }
5337  case STA_OFIELD_EVEN:
5338  {
5339  return ((L+1)*(L+2))/2;
5340  }
5341  default:
5342  return -1;
5343  }
5344  } else
5345  {
5346  switch (field_type)
5347  {
5348  case STA_OFIELD_SINGLE:
5349  {
5350  return L+1;
5351  }
5352  case STA_OFIELD_FULL:
5353  {
5354  return ((L+1)*(L+2))/2;
5355  }
5356  case STA_OFIELD_ODD:
5357  {
5358  return ((L+1)*(L+3))/4;
5359  }
5360  case STA_OFIELD_EVEN:
5361  {
5362  return ((L+2)*(L+2))/4;
5363  }
5364  default:
5365  return -1;
5366  }
5367  }
5368 }
5369 
5370 
5371 
5372 static inline int getComponentOffset(
5373  hanalysis::STA_FIELD_STORAGE field_storage,
5374  hanalysis::STA_FIELD_TYPE field_type,
5375  int L)
5376 {
5377  if (field_storage==STA_FIELD_STORAGE_C)
5378  {
5379  switch (field_type)
5380  {
5381  case STA_OFIELD_SINGLE:
5382  {
5383  return 0;
5384  }
5385  case STA_OFIELD_FULL:
5386  {
5387  return (L)*(L);
5388  }
5389  case STA_OFIELD_ODD:
5390  {
5391  return ((L-1)*L)/2;
5392  }
5393  case STA_OFIELD_EVEN:
5394  {
5395  return ((L-1)*L)/2;
5396  }
5397  default:
5398  return -1;
5399  }
5400  } else
5401  {
5402  switch (field_type)
5403  {
5404  case STA_OFIELD_SINGLE:
5405  {
5406  return 0;
5407  }
5408  case STA_OFIELD_FULL:
5409  {
5410  return (L*(L+1))/2;
5411  }
5412  case STA_OFIELD_ODD:
5413  {
5414  return ((L+1)*(L+3))/4-L-1;
5415  }
5416  case STA_OFIELD_EVEN:
5417  {
5418  return ((L+2)*(L+2))/4-L-1;
5419  }
5420  default:
5421  return -1;
5422  }
5423  }
5424 }
5425 
5426 
5427 /*
5428  *
5429  *
5430  *
5431  * */
5432 template<typename T>
5433 STA_RESULT sta_3product (
5434  const std::complex<T> * stIn1,
5435  const std::complex<T> * stIn2,
5436  const std::complex<T> * stIn3,
5437  std::complex<T> * stOut,
5438  const std::size_t shape[],
5439  int J1,
5440  int J2,
5441  int J3,
5442  int Jprod1,
5443  int Jprod2,
5444  std::complex<T> alpha,
5445  bool normalize = false,
5446  STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C,
5447  int stride_in1 = -1,
5448  int stride_in2 = -1,
5449  int stride_in3 = -1,
5450  int stride_out = -1,
5451  bool clear_field = false )
5452 {
5453 
5454  printf("tripple product experimental ! not tested yet, and slow\n");
5455  if ((stIn1==stOut)||(stIn2==stOut))
5456  return STA_RESULT_SAME_ADDRESS;
5457 
5458  STA_RESULT result=STA_RESULT_FAILED;
5459  bool alpha_real=(alpha.imag()==0);
5460 
5461 
5462  switch (field_storage)
5463  {
5464 
5465  case STA_FIELD_STORAGE_R:
5466  {
5467  if (alpha_real)
5468  return sta_tripleproduct_R (
5469  stIn1,
5470  stIn2,
5471  stIn3,
5472  stOut ,
5473  shape,
5474  J1,
5475  J2,
5476  J3,
5477  Jprod1,
5478  Jprod2,
5479  alpha.real(),
5480  normalize,
5481  stride_in1,
5482  stride_in2,
5483  stride_in3,
5484  stride_out,
5485  clear_field);
5486  else
5487  return STA_RESULT_STORAGE_MISMATCH;
5488  }
5489  break;
5490  }
5491 
5492 
5493 
5494  return result;
5495 }
5496 
5497 
5498 
5500 
5527 template<typename T>
5529  const std::complex<T> * stIn1,
5530  const std::complex<T> * stIn2,
5531  std::complex<T> * stOut,
5532  const std::size_t shape[],
5533  int J1,
5534  int J2,
5535  int J,
5536  std::complex<T> alpha = T( 1 ),
5537  bool normalize = false,
5538  STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C,
5539  int stride_in1 = -1,
5540  int stride_in2 = -1,
5541  int stride_out = -1,
5542  bool clear_field = false )
5543 {
5544  if ((stIn1==stOut)||(stIn2==stOut))
5545  return STA_RESULT_SAME_ADDRESS;
5546 
5547  STA_RESULT result=STA_RESULT_FAILED;
5548  bool alpha_real=(alpha.imag()==0);
5549 
5550 
5551 
5552 
5553 
5554 
5555  if ((J1==0)&&(J2==0)&&(J==0))
5556  {
5557  if (alpha_real)
5558  return sta_product0(stIn1,
5559  stIn2,
5560  stOut,
5561  shape,
5562  alpha.real(),
5563  stride_in1,
5564  stride_in2,
5565  stride_out,
5566  clear_field);
5567  else if (field_storage==STA_FIELD_STORAGE_C)
5568  {
5569  return sta_product0(stIn1,
5570  stIn2,
5571  stOut,
5572  shape,
5573  alpha,
5574  stride_in1,
5575  stride_in2,
5576  stride_out,
5577  clear_field);
5578  } else
5579  return STA_RESULT_STORAGE_MISMATCH;
5580 
5581  }
5582 
5583 
5584  switch (field_storage)
5585  {
5586  case STA_FIELD_STORAGE_C:
5587  {
5588  if (alpha_real)
5589  result=sta_product_C (stIn1,
5590  stIn2,
5591  stOut,
5592  shape,
5593  J1,
5594  J2,
5595  J,
5596  alpha.real(),
5597  normalize,
5598  stride_in1,
5599  stride_in2,
5600  stride_out,
5601  clear_field);
5602  else
5603  result=sta_product_C (stIn1,
5604  stIn2,
5605  stOut,
5606  shape,
5607  J1,
5608  J2,
5609  J,
5610  alpha,
5611  normalize,
5612  stride_in1,
5613  stride_in2,
5614  stride_out,
5615  clear_field);
5616  }
5617  break;
5618 
5619  case STA_FIELD_STORAGE_R:
5620  {
5621  if (alpha_real)
5622  result=sta_product_R (stIn1,
5623  stIn2,
5624  stOut,
5625  shape,
5626  J1,
5627  J2,
5628  J,
5629  alpha.real(),
5630  normalize,
5631  stride_in1,
5632  stride_in2,
5633  stride_out,
5634  clear_field);
5635  else
5636  return STA_RESULT_STORAGE_MISMATCH;
5637 // else
5638 // result=sta_product_R (stIn1,
5639 // stIn2,
5640 // stOut,
5641 // shape,
5642 // J1,
5643 // J2,
5644 // J,
5645 // alpha,
5646 // normalize,
5647 // stride_in1,
5648 // stride_in2,
5649 // stride_out,
5650 // clear_field);
5651  }
5652  break;
5653 
5654  case STA_FIELD_STORAGE_RF:
5655  {
5656  if (alpha_real)
5657  result=sta_product_Rft (stIn1,
5658  stIn2,
5659  stOut,
5660  shape,
5661  J1,
5662  J2,
5663  J,
5664  alpha.real(),
5665  normalize,
5666  stride_in1,
5667  stride_in2,
5668  stride_out,
5669  clear_field);
5670  else
5671  return STA_RESULT_STORAGE_MISMATCH;
5672  }
5673  break;
5674  default:
5675  {
5676  printf("unsupported\n");
5677  }
5678  break;
5679 
5680  }
5681  return result;
5682 }
5683 
5684 
5685 
5686 
5688 
5699 template<typename T,typename S>
5701  const std::complex<T> * stIn,
5702  std::complex<T> * stOut,
5703  const std::size_t shape[],
5704  int ncomponents,
5705  S alpha = S ( 1 ),
5706  bool conjugate=false,
5707  int stride_in = -1,
5708  int stride_out = -1,
5709  bool clear_field = false )
5710 {
5711  bool doalpha= ( alpha!=S ( 1 ) );
5712 
5713  if (stride_in==-1)
5714  stride_in=ncomponents;
5715  if (stride_out==-1)
5716  stride_out=ncomponents;
5717 
5718 
5719  std::size_t jumpz=shape[1]*shape[2];
5720 
5721  //printf("alpha (%f %f) %d\n",alpha.real(),alpha.imag(),conjugate);
5722 
5723  #pragma omp parallel for num_threads(hanalysis::get_numCPUs())
5724  for (std::size_t a=0; a<shape[0]; a++ )
5725  {
5726  std::complex<T> *resultp=stOut+a*jumpz*stride_out;
5727  const std::complex<T> *inp=stIn+a*jumpz*stride_in;
5728 
5729  for ( std::size_t i=0; i<jumpz; i++ )
5730  {
5731  for (int b=0; b<ncomponents; b++)
5732  {
5733  std::complex<T> tmp=inp[b];
5734  if ( conjugate )
5735  tmp=std::conj ( tmp );
5736 
5737  if ( doalpha )
5738  tmp*=alpha;
5739 
5740  if ( clear_field )
5741  resultp[b] =tmp;
5742  else
5743  resultp[b] +=tmp;
5744  }
5745  resultp+=stride_out;
5746  inp+=stride_in;
5747  }
5748  }
5749  return STA_RESULT_SUCCESS;
5750 }
5751 
5752 
5753 
5754 
5755 
5757 
5764 template<typename T>
5766  const std::complex<T> * stIn,
5767  std::complex<T> * stOut,
5768  const std::size_t shape[],
5769  int J,
5770  STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C,
5771  int stride_in = -1,
5772  int stride_out = -1,
5773  bool clear_field = false )
5774 {
5775  if (stIn==stOut)
5776  return STA_RESULT_SAME_ADDRESS;
5777 
5778  int J1=J+1;
5779  if (field_storage==STA_FIELD_STORAGE_C)
5780  J1=2*J+1;
5781 
5782  if (stride_in==-1)
5783  stride_in=J1;
5784  if (stride_out==-1)
5785  stride_out=1;
5786 
5787 
5788  std::size_t jumpz=shape[1]*shape[2];
5789 
5790 
5791  #pragma omp parallel for num_threads(hanalysis::get_numCPUs())
5792  for (std::size_t a=0; a<shape[0]; a++ )
5793  {
5794  std::complex<T> *resultp=stOut+a*jumpz*stride_out;
5795  const std::complex<T> *inp=stIn+a*jumpz*stride_in+J;
5796 
5797  for ( std::size_t i=0; i<jumpz; i++ )
5798  {
5799  const std::complex<T> * current=inp;
5800 
5801  T tmp=0;
5802 
5803  if (field_storage==STA_FIELD_STORAGE_C)
5804  {
5805  for (int b=-J; b<=J; b++)
5806  {
5807  tmp+=std::norm(current[b]);
5808  }
5809  } else
5810  {
5811  for (int b=-J; b<0; b++)
5812  {
5813  tmp+=T( 2 )*std::norm(current[b]);
5814  }
5815  tmp+=std::norm(current[0]);
5816  }
5817 
5818  if ( clear_field )
5819  *resultp=T (0);
5820  *resultp+=std::sqrt(tmp);
5821 
5822  resultp+=stride_out;
5823  inp+=stride_in;
5824 
5825  }
5826  }
5827  return STA_RESULT_SUCCESS;
5828 }
5829 
5830 
5831 
5833 
5863 template<typename T>
5865  const std::complex<T> * stIn,
5866  std::complex<T> * stOut ,
5867  const std::size_t shape[],
5868  int J,
5869  int Jupdown, // either -1 0 or 1
5870  bool conjugate=false,
5871  std::complex<T> alpha= ( T ) 1.0,
5872  STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C,
5873  const T v_size[]=NULL,
5874  int stride_in = -1,
5875  int stride_out = -1,
5876  bool clear_field = false,
5877  int accuracy=0)
5878 {
5879  if (stIn==stOut)
5880  return STA_RESULT_SAME_ADDRESS;
5881 
5882  STA_RESULT result=STA_RESULT_FAILED;
5883  bool alpha_real=(alpha.imag()==0);
5884 
5885  switch (field_storage)
5886  {
5887  case STA_FIELD_STORAGE_C:
5888  {
5889  result=sta_derivatives_C (stIn,
5890  stOut,
5891  shape,
5892  J,
5893  Jupdown,
5894  conjugate,
5895  alpha,
5896  v_size,
5897  stride_in,
5898  stride_out,
5899  clear_field);
5900  }
5901  break;
5902 
5903  case STA_FIELD_STORAGE_R:
5904  {
5905  if (alpha_real)
5906  {
5907  if (accuracy==0)
5908  {
5909  result=sta_derivatives_R (stIn,
5910  stOut,
5911  shape,
5912  J,
5913  Jupdown,
5914  conjugate,
5915  alpha.real(),
5916  v_size,
5917  stride_in,
5918  stride_out,
5919  clear_field);
5920 
5921 
5922 // printf("ok? nan %d inf %d\n",
5923 // sta_isnan(stOut,shape,J+2,stride_out),sta_isinf(stOut,shape,J+2,stride_out));
5924 
5925 
5926  }
5927  else
5928  {
5929  result=sta_derivatives_R4th (stIn,
5930  stOut,
5931  shape,
5932  J,
5933  Jupdown,
5934  conjugate,
5935  alpha,
5936  v_size,
5937  stride_in,
5938  stride_out,
5939  clear_field);
5940  }
5941  } else
5942  return STA_RESULT_STORAGE_MISMATCH;
5943  }
5944  break;
5945 
5946  default:
5947  {
5948  printf("unsupported derivative\n");
5949  }
5950  break;
5951  }
5952 
5953 
5954 
5955  return result;
5956 }
5957 
5958 
5960 
5990 template<typename T>
5992  const std::complex<T> * stIn,
5993  std::complex<T> * stOut ,
5994  const std::size_t shape[],
5995  int J,
5996  int Jupdown, // either +2 or -2 or 0
5997  bool conjugate=false,
5998  std::complex<T> alpha= ( T ) 1.0,
5999  STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C,
6000  const T v_size[]=NULL,
6001  int stride_in = -1,
6002  int stride_out = -1,
6003  bool clear_field = false )
6004 {
6005  if (stIn==stOut)
6006  return STA_RESULT_SAME_ADDRESS;
6007 
6008  STA_RESULT result=STA_RESULT_FAILED;
6009  bool alpha_real=(alpha.imag()==0);
6010 
6011  switch (field_storage)
6012  {
6013  case STA_FIELD_STORAGE_C:
6014  {
6015  result=sta_derivatives2_C (stIn,
6016  stOut,
6017  shape,
6018  J,
6019  Jupdown,
6020  conjugate,
6021  alpha,
6022  v_size,
6023  stride_in,
6024  stride_out,
6025  clear_field);
6026  }
6027  break;
6028 
6029  case STA_FIELD_STORAGE_R:
6030  {
6031  if (alpha_real)
6032  result=sta_derivatives2_R (stIn,
6033  stOut,
6034  shape,
6035  J,
6036  Jupdown,
6037  conjugate,
6038  alpha.real(),
6039  v_size,
6040  stride_in,
6041  stride_out,
6042  clear_field);
6043  else
6044  return STA_RESULT_STORAGE_MISMATCH;
6045  }
6046  break;
6047  default:
6048  {
6049  printf("unsupported\n");
6050  }
6051  break;
6052 
6053 
6054 
6055  }
6056 
6057  return result;
6058 }
6059 
6060 
6061 
6062 
6063 
6065 
6077 template<typename T>
6079  const std::complex<T> * stIn,
6080  std::complex<T> * stOut ,
6081  const std::size_t shape[],
6082  int components=1,
6083  int type=1,
6084  std::complex<T> alpha=1,
6085  STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C,
6086  const T v_size[]=NULL,
6087  int stride_in = -1,
6088  int stride_out = -1,
6089  bool clear_field = false )
6090 {
6091  if (stIn==stOut)
6092  return STA_RESULT_SAME_ADDRESS;
6093 
6094 
6095  bool alpha_real=(alpha.imag()==0);
6096  if ((!alpha_real) && (field_storage!=STA_FIELD_STORAGE_C))
6097  return STA_RESULT_STORAGE_MISMATCH;
6098 
6099  //printf("%d %d %d\n",components,stride_in,stride_out);
6100  if ( components<=0 ) return STA_RESULT_INVALID_TENSOR_RANK;
6101  //if (( components==1 )&&(stride_in==-1)&&(stride_out==-1))
6102  if (( components==1 )&&(stride_in<=1)&&(stride_out<=1))
6103  sta_laplace_1component ( stIn,stOut,shape,type,alpha,v_size,clear_field);
6104  else
6105  {
6106  if (alpha_real)
6107  sta_laplace_Ncomponents_R ( stIn,stOut,shape,components,type,alpha.real(),v_size,stride_in,stride_out,clear_field);
6108  else
6109  sta_laplace_Ncomponents_C ( stIn,stOut,shape,components,type,alpha,v_size,stride_in,stride_out,clear_field);
6110  }
6111  return STA_RESULT_SUCCESS;
6112 }
6113 
6114 
6115 
6116 
6118 
6139 template<typename T,typename S>
6140 STA_RESULT sta_fft ( const std::complex<T> * stIn,
6141  std::complex<T> * stOut,
6142  const std::size_t shape[],
6143  int components,
6144  bool forward,
6145  bool conjugate=false,
6146  //S alpha = S ( 1 ),
6147  S alpha = ( S ) 1 ,
6148 #ifdef _STA_LINK_FFTW
6149  int flag=FFTW_ESTIMATE )
6150 #else
6151  int flag=0 )
6152 #endif
6153 {
6154  if (stIn==stOut)
6155  return STA_RESULT_SAME_ADDRESS;
6156 
6157  STA_RESULT result=STA_RESULT_FAILED;
6158 
6159  int shape_[3];
6160  shape_[0]=shape[0];
6161  shape_[1]=shape[1];
6162  shape_[2]=shape[2];
6163  result=fft ( stIn,stOut,shape_,components,forward,flag );
6164 
6165 
6166 
6167  if (conjugate || (alpha!=T(1)) )
6168  sta_mult (stOut,
6169  stOut,
6170  shape,
6171  components,
6172  alpha,
6173  conjugate,
6174  -1,
6175  -1,
6176  true);
6177 
6178 // #ifndef _STA_LINK_FFTW
6179 // printf("fftw libs have not been linked an no transformation is performed\n");
6180 // #endif
6181  return result;
6182 }
6183 
6184 
6185 
6186 
6187 
6188 
6189 template<typename T>
6191 {
6192 public:
6193  virtual std::complex<T> fspecial(const std::complex<T> & value) const =0;
6194 };
6195 
6196 
6197 
6198 template<typename T>
6199 STA_RESULT sta_special (
6200  const std::complex<T> * stIn,
6201  std::complex<T> * stOut,
6202  const std::size_t shape[],
6203  const sta_fspecial_func<T> & fspecial,
6204  int ncomponents,
6205  int stride_in = -1,
6206  int stride_out = -1,
6207  bool clear_field = false )
6208 {
6209 
6210  if (stride_in==-1)
6211  stride_in=ncomponents;
6212  if (stride_out==-1)
6213  stride_out=ncomponents;
6214 
6215 
6216  std::size_t jumpz=shape[1]*shape[2];
6217 
6218 
6219 
6220  #pragma omp parallel for num_threads(hanalysis::get_numCPUs())
6221  for (std::size_t a=0; a<shape[0]; a++ )
6222  {
6223  std::complex<T> *resultp=stOut+a*jumpz*stride_out;
6224  const std::complex<T> *inp=stIn+a*jumpz*stride_in;
6225 
6226  for ( std::size_t i=0; i<jumpz; i++ )
6227  {
6228  for (int b=0; b<ncomponents; b++)
6229  {
6230  const std::complex<T> & in=inp[b];
6231  std::complex<T> & out=resultp[b];
6232  if ( clear_field )
6233  out =0;
6234  out+=fspecial.fspecial(in);
6235  }
6236  resultp+=stride_out;
6237  inp+=stride_in;
6238  }
6239  }
6240  return hanalysis::STA_RESULT_SUCCESS;
6241 }
6242 
6243 
6244 
6245 }
6246 #endif
STA_FIELD_STORAGE
tensor field data storage
Definition: stensor.h:5163
tensor field has all components of even ranks :
Definition: stensor.h:5184
STA_RESULT sta_derivatives(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int J, int Jupdown, bool conjugate=false, std::complex< T > alpha=(T) 1.0, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, const T v_size[]=NULL, int stride_in=-1, int stride_out=-1, bool clear_field=false, int accuracy=0)
spherical tensor derivative:
Definition: stensor.h:5864
tensor field has one single component of rank :
Definition: stensor.h:5180
STA_FIELD_TYPE
tensor field data interpretations according to certain symmetries
Definition: stensor.h:5177
STA_RESULT
function return value
Definition: stensor.h:124
Definition: stensor.h:6190
STA_RESULT sta_norm(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int J, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, int stride_in=-1, int stride_out=-1, bool clear_field=false)
returns lengths of vectors component by compnent
Definition: stensor.h:5765
Definition: stensor.h:5173
STA_RESULT sta_product(const std::complex< T > *stIn1, const std::complex< T > *stIn2, std::complex< T > *stOut, const std::size_t shape[], int J1, int J2, int J, std::complex< T > alpha=T(1), bool normalize=false, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, int stride_in1=-1, int stride_in2=-1, int stride_out=-1, bool clear_field=false)
spherical tensor product: and , respectively
Definition: stensor.h:5528
Definition: stensor.h:5167
STA_RESULT sta_fft(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int components, bool forward, bool conjugate=false, S alpha=(S) 1, int flag=0)
tensor fft component by component
Definition: stensor.h:6140
The STA-ImageAnalysisToolkit namespace.
Definition: stafield.h:55
tensor field has all components of odd ranks :
Definition: stensor.h:5186
STA_RESULT sta_derivatives2(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int J, int Jupdown, bool conjugate=false, std::complex< T > alpha=(T) 1.0, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, const T v_size[]=NULL, int stride_in=-1, int stride_out=-1, bool clear_field=false)
spherical tensor double-derivative:
Definition: stensor.h:5991
STA_RESULT sta_mult(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int ncomponents, S alpha=S(1), bool conjugate=false, int stride_in=-1, int stride_out=-1, bool clear_field=false)
computes
Definition: stensor.h:5700
tensor field has all components of ranks :
Definition: stensor.h:5182
STA_RESULT sta_laplace(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int components=1, int type=1, std::complex< T > alpha=1, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, const T v_size[]=NULL, int stride_in=-1, int stride_out=-1, bool clear_field=false)
Laplacian: .
Definition: stensor.h:6078
Definition: stensor.h:5170