31 #include "gsl/gsl_sf_coupling.h"
32 #include "gsl/gsl_sf_laguerre.h"
33 #include "gsl/gsl_sf_gamma.h"
34 #include "gsl/gsl_sf_legendre.h"
35 #include "gsl/gsl_sf_bessel.h"
59 #define M_PI 3.14159265358979323846
65 #include "sta_omp_threads.h"
125 STA_RESULT_SUCCESS=0,
126 STA_RESULT_FAILED=-1,
127 STA_RESULT_SHAPE_MISMATCH=-2,
128 STA_RESULT_INVALID_PRODUCT=-3,
129 STA_RESULT_STORAGE_MISMATCH=-4,
130 STA_RESULT_INVALID_TENSOR_RANK=-5,
131 STA_RESULT_OFIELD_TYPE_MISMATCH=-6,
132 STA_RESULT_SAME_ADDRESS=-7,
133 STA_RESULT_NOT_IMPLEMENTED=-8,
138 static int verbose=0;
141 T sigmoid(T x, T s, T o)
143 return (T)1/((T)1+std::exp(-s*(x-o)));
148 void getFourierFreqAndNorm(
155 double zero=gsl_sf_bessel_zero_Jnu (l+0.5, n);
156 Nln=std::pow(a,3.0)*std::pow(gsl_sf_bessel_jl (l+1,zero),2.0);
157 Nln=1/(std::sqrt(Nln)+std::numeric_limits<float>::epsilon());
178 double clebschGordan (
const int ja,
187 return nogsl_clebschGordan<double>(ja,ma,jb,mb,J,M);
206 gsl_sf_result _result;
207 gsl_sf_coupling_3j_e ( 2*ja, 2*jb, J2,
208 2*ma, 2*mb, -2*M, &_result );
211 double norm = sqrt ( (
double ) ( J2+1.0 ) );
212 int phase = ( ja-jb+M );
213 double sign = ( phase & 1 ) ? -1.0 : 1.0;
217 return _result.val*sign*norm;
226 std::complex<double> basis_SphericalHarmonics(
int l,
int m,
double theta,
double phi)
233 double legendre=gsl_sf_legendre_sphPlm (l, m, std::cos(theta));
234 std::complex<double> tmp;
235 tmp=legendre*std::exp(std::complex<double>(0,1)*(
double)m*phi);
238 if (m%2==0)
return std::conj(tmp);
239 else return -std::conj(tmp);
250 std::complex<double> basis_SphericalHarmonicsSemiSchmidt(
int l,
int m,
double theta,
double phi)
253 double norm=std::sqrt(4.0*M_PI/(2.0*l+1.0));;
254 return norm*basis_SphericalHarmonics(l,m,theta,phi);
274 T * sta_product_precomputeCGcoefficients_C (
int J1,
int J2,
int J,
bool normalized=
false, T alpha=1 )
280 norm= ( T ) 1/ ( T ) hanalysis::clebschGordan ( J1,0,J2,0,J,0 );
284 for (
int m=-J; m<=J; m++ )
286 for (
int m1=-J1; m1<=J1; m1++ )
289 if ( abs ( m2 ) <=J2 )
295 T * cg=
new T[count];
297 for (
int m=-J; m<=J; m++ )
299 for (
int m1=-J1; m1<=J1; m1++ )
302 if ( abs ( m2 ) <=J2 )
304 cg[count++]=norm* ( T ) hanalysis::clebschGordan ( J1,m1,J2,m2,J,m );
316 const std::complex<T> * stIn,
317 const std::size_t shape[],
324 std::size_t numvoxel=shape[0]*shape[1]*shape[2];
333 const T * stIn_r=(
const T*) stIn;
336 printf(
"isnan: stride: %d components: %d data ptr: %d",stride,components,stIn_r);
338 for (std::size_t i=0;i<numvoxel;i++)
340 for (std::size_t j=0;j<components;j++)
342 if (std::isnan(stIn_r[j]))
352 const std::complex<T> * stIn,
353 const std::size_t shape[],
360 std::size_t numvoxel=shape[0]*shape[1]*shape[2];
364 const T * stIn_r=(
const T*) stIn;
366 for (std::size_t i=0;i<numvoxel;i++)
368 for (std::size_t j=0;j<components;j++)
370 if (std::isinf(stIn_r[j]))
379 template<
typename T,
typename S>
381 const std::complex<T> * stIn1,
382 const std::complex<T> * stIn2,
383 std::complex<T> * stOut ,
384 const std::size_t shape[],
389 bool normalize =
false,
393 bool clear_field =
false )
397 if ( ( std::abs ( J1-J2 ) >J ) || ( J>std::abs ( J1+J2 ) ) )
398 return STA_RESULT_INVALID_PRODUCT;
400 if ( ( ( J1+J2+J ) %2!=0 ) && ( normalize ) )
401 return STA_RESULT_INVALID_PRODUCT;
404 S * cg= hanalysis::sta_product_precomputeCGcoefficients_C<S> ( J1,J2, J,normalize,alpha );
406 std::size_t vectorLengthJ1=J1*2+1;
407 std::size_t vectorLengthJ2=J2*2+1;
408 std::size_t vectorLengthJ=J*2+1;
411 if ( stride_in1 == -1 )
412 stride_in1 = vectorLengthJ1;
413 if ( stride_in2 == -1 )
414 stride_in2 = vectorLengthJ2;
415 if ( stride_out == -1 )
416 stride_out = vectorLengthJ;
418 std::size_t jumpz=shape[1]*shape[2];
420 #pragma omp parallel for num_threads(get_numCPUs())
421 for ( std::size_t z=0; z<shape[0]; z++ )
425 const std::complex<T> * current_J1=&stIn1[Z*stride_in1];
426 const std::complex<T> * current_J2=&stIn2[Z*stride_in2];
427 std::complex<T> * current_J=&stOut[Z*stride_out];
432 for ( std::size_t i=0; i<jumpz; i++ )
435 for (
int m=-J; m<=J; m++ )
437 std::complex<T> & current=current_J[m];
438 if ( clear_field ) current=T ( 0 );
440 for (
int m1=-J1; m1<=J1; m1++ )
443 if ( std::abs ( m2 ) <=J2 )
445 current+= ( current_J1[m1] ) * ( current_J2[m2] ) *cg[count++];
449 current_J1+=stride_in1;
450 current_J2+=stride_in2;
451 current_J+=stride_out;
455 return STA_RESULT_SUCCESS;
690 template<
typename T,
typename S>
693 std::complex<T> * stOut ,
694 const std::size_t shape[],
697 bool conjugate=
false,
698 std::complex<T> alpha= ( T ) 1.0,
699 const T v_size[]=NULL,
702 bool clear_field =
false )
705 if ( abs ( Jupdown ) >1 )
return STA_RESULT_INVALID_TENSOR_RANK;
706 if ( abs ( J+Jupdown ) <0 )
return STA_RESULT_INVALID_TENSOR_RANK;
709 std::complex<T> imag=-std::complex<T> ( 0,1 );
710 if (conjugate) imag*=T( -1 );
713 voxel_size[0]=voxel_size[1]=voxel_size[2]=T ( 1 );
716 voxel_size[0]/=v_size[0];
717 voxel_size[1]/=v_size[1];
718 voxel_size[2]/=v_size[2];
725 std::size_t vectorLengthJ=2*J+1;
726 std::size_t vectorLengthJ1=2* ( J1 ) +1;
728 if ( stride_in == -1 )
729 stride_in = vectorLengthJ;
730 if ( stride_out == -1 )
731 stride_out = vectorLengthJ1;
734 std::size_t jumpz=shape[1]*shape[2];
735 std::size_t jumpy=shape[2];
738 T * CGTable=
new T[3*vectorLengthJ1];
739 T shnorm=hanalysis::clebschGordan ( 1,0,J,0,J1,0 );
740 if ( Jupdown==0 ) shnorm=1;
741 for (
int M=- ( J1 ); M<= ( J1 ); M++ )
743 CGTable[M+ ( J1 ) ] =T ( 1.0/std::sqrt ( 2.0 ) ) *hanalysis::clebschGordan ( 1,-1,J,M+1,J1,M ) /shnorm;;
744 CGTable[M+ ( J1 ) +vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan ( 1,0,J,M,J1,M ) /shnorm;
745 CGTable[M+ ( J1 ) +2*vectorLengthJ1]=T ( 1.0/std::sqrt ( 2.0 ) ) *hanalysis::clebschGordan ( 1,1,J,M-1,J1,M ) /shnorm;
747 T * CGTable0=&CGTable[0];
749 T * CGTable1=&CGTable[vectorLengthJ1];
751 T * CGTable2=&CGTable[2*vectorLengthJ1];
754 #pragma omp parallel for num_threads(get_numCPUs())
755 for ( std::size_t z=0; z<shape[0]; z++ )
778 for ( std::size_t y=0; y<shape[1]; y++ )
792 for ( std::size_t x=0; x<shape[2]; x++ )
802 derivX1=&stIn[ ( Z[1]+Y[1]+X[0] ) *stride_in]+J;
803 derivX0=&stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in]+J;
805 derivY1=&stIn[ ( Z[1]+Y[0]+X[1] ) *stride_in]+J;
806 derivY0=&stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in]+J;
808 derivZ1=&stIn[ ( Z[0]+Y[1]+X[1] ) *stride_in]+J;
809 derivZ0=&stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in]+J;
811 std::size_t offset= ( Z[1]+Y[1]+X[1] ) *stride_out+J1;
813 for (
int M=- ( J1 ); M<= ( J1 ); M++ )
815 std::complex<T> & current=stOut[offset+M];
816 if ( clear_field ) current=T ( 0 );
817 std::complex<T> tmp=T ( 0 );
819 if ( abs ( M+1 ) <=J )
822 tmp+=CGTable0[M]* ( voxel_size[2]* ( derivX0[m2]-derivX1[m2] ) +imag* ( derivY0[m2]-derivY1[m2] ) );
826 tmp+=CGTable1[M]* ( derivZ0[M]-derivZ1[M] );
828 if ( abs ( M-1 ) <=J )
831 tmp+=CGTable2[M]* ( -voxel_size[2]* ( derivX0[m2]-derivX1[m2] ) +imag* ( derivY0[m2]-derivY1[m2] ) );
839 return ( STA_RESULT_SUCCESS );
875 template<
typename T,
typename S>
878 std::complex<T> * stOut ,
879 const std::size_t shape[],
882 bool conjugate=
false,
883 std::complex<T> alpha= ( T ) 1.0,
884 const T v_size[]=NULL,
887 bool clear_field =
false )
889 if ( abs ( Jupdown ) >2 )
return STA_RESULT_INVALID_TENSOR_RANK;
890 if ( abs ( Jupdown ) ==1 )
return STA_RESULT_INVALID_TENSOR_RANK;
891 if ( abs ( J+Jupdown ) <0 )
return STA_RESULT_INVALID_TENSOR_RANK;
896 voxel_size[0]=voxel_size[1]=voxel_size[2]=T ( 1 );
899 voxel_size[0]/=v_size[0];
900 voxel_size[1]/=v_size[1];
901 voxel_size[2]/=v_size[2];
902 if (hanalysis::verbose>0)
903 printf(
"element size not considered yet: sta_derivatives2 \n");
906 std::complex<T> imag=-std::complex<T> ( 0,1 );
907 if (conjugate) imag*=T( -1 );
909 alpha*=T ( sqrt ( 3.0/2.0 ) );
913 int vectorLengthJ=J*2+1;
914 int vectorLengthJ1= ( J1 ) *2+1;
916 if ( stride_in == -1 )
917 stride_in = vectorLengthJ;
918 if ( stride_out == -1 )
919 stride_out = vectorLengthJ1;
921 std::size_t jumpz=shape[1]*shape[2];
922 std::size_t jumpy=shape[2];
925 T * CGTable=
new T[5*vectorLengthJ1];
926 T shnorm=hanalysis::clebschGordan ( 2,0,J,0,J1,0 );
927 for (
int M=- ( J1 ); M<= ( J1 ); M++ )
929 CGTable[M+ ( J1 ) ] =hanalysis::clebschGordan ( 2,-2,J,M+2,J1,M ) /shnorm;
930 CGTable[M+ ( J1 ) +vectorLengthJ1] =hanalysis::clebschGordan ( 2,-1,J,M+1,J1,M ) /shnorm;;
931 CGTable[M+ ( J1 ) +2*vectorLengthJ1]=hanalysis::clebschGordan ( 2,0,J,M,J1,M ) /shnorm;
932 CGTable[M+ ( J1 ) +3*vectorLengthJ1]=hanalysis::clebschGordan ( 2,1,J,M-1,J1,M ) /shnorm;
933 CGTable[M+ ( J1 ) +4*vectorLengthJ1]=hanalysis::clebschGordan ( 2,2,J,M-2,J1,M ) /shnorm;
935 T * CGTable0=&CGTable[0];
937 T * CGTable1=&CGTable[vectorLengthJ1];
939 T * CGTable2=&CGTable[2*vectorLengthJ1];
941 T * CGTable3=&CGTable[3*vectorLengthJ1];
943 T * CGTable4=&CGTable[4*vectorLengthJ1];
946 #pragma omp parallel for num_threads(get_numCPUs())
947 for ( std::size_t z=0; z<shape[0]; z++ )
1002 std::complex<T> current;
1005 for ( std::size_t y=0; y<shape[1]; y++ )
1026 for ( std::size_t x=0; x<shape[2]; x++ )
1042 X1Y1Z2=&stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in]+J;
1044 X1Y2Z1=&stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in]+J;
1045 X1Y2Z2=&stIn[ ( Z[2]+Y[2]+X[1] ) *stride_in]+J;
1046 X1Y2Z3=&stIn[ ( Z[3]+Y[2]+X[1] ) *stride_in]+J;
1048 X1Y3Z2=&stIn[ ( Z[2]+Y[3]+X[1] ) *stride_in]+J;
1052 X2Y1Z1=&stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in]+J;
1053 X2Y1Z2=&stIn[ ( Z[2]+Y[1]+X[2] ) *stride_in]+J;
1054 X2Y1Z3=&stIn[ ( Z[3]+Y[1]+X[2] ) *stride_in]+J;
1055 X2Y2Z1=&stIn[ ( Z[1]+Y[2]+X[2] ) *stride_in]+J;
1056 X2Y2Z2=&stIn[ ( Z[2]+Y[2]+X[2] ) *stride_in]+J;
1057 X2Y2Z3=&stIn[ ( Z[3]+Y[2]+X[2] ) *stride_in]+J;
1058 X2Y3Z1=&stIn[ ( Z[1]+Y[3]+X[2] ) *stride_in]+J;
1059 X2Y3Z2=&stIn[ ( Z[2]+Y[3]+X[2] ) *stride_in]+J;
1060 X2Y3Z3=&stIn[ ( Z[3]+Y[3]+X[2] ) *stride_in]+J;
1064 X3Y1Z2=&stIn[ ( Z[2]+Y[1]+X[3] ) *stride_in]+J;
1066 X3Y2Z1=&stIn[ ( Z[1]+Y[2]+X[3] ) *stride_in]+J;
1067 X3Y2Z2=&stIn[ ( Z[2]+Y[2]+X[3] ) *stride_in]+J;
1068 X3Y2Z3=&stIn[ ( Z[3]+Y[2]+X[3] ) *stride_in]+J;
1070 X3Y3Z2=&stIn[ ( Z[2]+Y[3]+X[3] ) *stride_in]+J;
1074 std::size_t offset= ( Z[2]+Y[2]+X[2] ) *stride_out+J1;
1077 for (
int M=- ( J1 ); M<= ( J1 ); M++ )
1079 std::complex<T> & current=stOut[offset+M];
1080 if ( clear_field ) current=T ( 0 );
1081 std::complex<T> tmp=T ( 0 );
1084 if ( abs ( M+2 ) <=J )
1087 std::complex<T> Dxx= ( X1Y2Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X3Y2Z2[m2] );
1088 std::complex<T> Dyy= ( X2Y1Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X2Y3Z2[m2] );
1089 std::complex<T> Dxy=- ( T ) 0.25* ( X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2] );
1090 tmp+= ( T ) 0.5*CGTable0[M]* ( ( Dxx-Dyy )-imag* ( ( T ) 2.0*Dxy ) );
1094 if ( abs ( M+1 ) <=J )
1097 std::complex<T> Dxz= ( T ) 0.25* ( X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2] );
1098 std::complex<T> Dyz=- ( T ) 0.25* ( X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2] );
1099 tmp+=CGTable1[M]* ( ( Dxz )-imag* ( Dyz ) );
1104 if ( abs ( M ) <=J )
1107 std::complex<T> Dxx= ( X1Y2Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X3Y2Z2[m2] );
1108 std::complex<T> Dyy= ( X2Y1Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X2Y3Z2[m2] );
1109 std::complex<T> Dzz= ( X2Y2Z1[m2]- ( T ) 2*X2Y2Z2[m2]+X2Y2Z3[m2] );
1110 const T SQRT6= ( T ) ( -1.0/std::sqrt ( 6.0 ) );
1111 tmp+=CGTable2[M]* ( ( Dxx+Dyy- ( T ) 2.0*Dzz ) * ( SQRT6 ) );
1114 if ( abs ( M-1 ) <=J )
1117 std::complex<T> Dxz= ( T ) 0.25* ( X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2] );
1118 std::complex<T> Dyz=- ( T ) 0.25* ( X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2] );
1119 tmp-=CGTable3[M]* ( ( Dxz ) +imag* ( Dyz ) );
1123 if ( abs ( M-2 ) <=J )
1126 std::complex<T> Dxx= ( X1Y2Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X3Y2Z2[m2] );
1127 std::complex<T> Dyy= ( X2Y1Z2[m2]- ( T ) 2*X2Y2Z2[m2]+X2Y3Z2[m2] );
1128 std::complex<T> Dxy=- ( T ) 0.25* ( X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2] );
1129 tmp+= ( T ) 0.5*CGTable4[M]* ( ( Dxx-Dyy ) +imag* ( ( T ) 2.0*Dxy ) );
1140 return STA_RESULT_SUCCESS;
1152 template<
typename T,
typename S>
1154 const S * stIn,std::complex<T> * stOut ,
1155 const std::size_t shape[],
1157 std::complex<T> alpha= ( T ) 1.0,
1158 const T v_size[]=NULL,
1159 bool clear_field=
false)
1173 voxel_weights[0]=voxel_weights[1]=voxel_weights[2]=voxel_weights[3]=1;
1180 if (hanalysis::verbose>0)
1181 printf (
"WARNING! element size is not considered yet!\n" );
1185 voxel_weights[0]/=v_size[0]*v_size[0];
1186 voxel_weights[1]/=v_size[1]*v_size[1];
1187 voxel_weights[2]/=v_size[2]*v_size[2];
1190 if (hanalysis::verbose>0)
1191 printf (
"v_size: [%f %f %f]\n",v_size[0],v_size[1],v_size[2] );
1194 voxel_weights[3]*=2*(voxel_weights[0]+voxel_weights[1]+voxel_weights[2]);
1197 if (hanalysis::verbose>0)
1198 printf (
"laplace_1: [%f %f %f %f]\n",voxel_weights[0],voxel_weights[1],voxel_weights[2],voxel_weights[3] );
1200 std::size_t jumpz=shape[1]*shape[2];
1201 std::size_t jumpy=shape[2];
1208 #pragma omp parallel for num_threads(get_numCPUs())
1209 for ( std::size_t z=0; z<shape[0]; z++ )
1223 for ( std::size_t y=0; y<shape[1]; y++ )
1237 for ( std::size_t x=0; x<shape[2]; x++ )
1247 std::size_t offset= ( Z[1]+Y[1]+X[1] );
1248 std::complex<T> & current=stOut[offset];
1249 if ( clear_field ) current=T ( 0 );
1252 stIn[ ( Z[0]+Y[0]+X[1] ) ] +
1253 stIn[ ( Z[0]+Y[1]+X[0] ) ] +
1254 stIn[ ( Z[0]+Y[1]+X[1] ) ] +
1255 stIn[ ( Z[0]+Y[1]+X[2] ) ] +
1256 stIn[ ( Z[0]+Y[2]+X[1] ) ] +
1257 stIn[ ( Z[1]+Y[0]+X[0] ) ] +
1258 stIn[ ( Z[1]+Y[0]+X[1] ) ] +
1259 stIn[ ( Z[1]+Y[0]+X[2] ) ] +
1260 stIn[ ( Z[1]+Y[1]+X[0] ) ]-
1261 T ( 18 ) *stIn[ ( Z[1]+Y[1]+X[1] ) ] +
1262 stIn[ ( Z[1]+Y[1]+X[2] ) ] +
1263 stIn[ ( Z[1]+Y[2]+X[0] ) ] +
1264 stIn[ ( Z[1]+Y[2]+X[1] ) ] +
1265 stIn[ ( Z[1]+Y[2]+X[2] ) ] +
1266 stIn[ ( Z[2]+Y[0]+X[1] ) ] +
1267 stIn[ ( Z[2]+Y[1]+X[0] ) ] +
1268 stIn[ ( Z[2]+Y[1]+X[1] ) ] +
1269 stIn[ ( Z[2]+Y[1]+X[2] ) ] +
1270 stIn[ ( Z[2]+Y[2]+X[1] ) ]
1282 #pragma omp parallel for num_threads(get_numCPUs())
1283 for ( std::size_t z=0; z<shape[0]; z++ )
1297 for ( std::size_t y=0; y<shape[1]; y++ )
1311 for ( std::size_t x=0; x<shape[2]; x++ )
1322 std::complex<T> & current=stOut[Z[1]+Y[1]+X[1]];
1324 if ( clear_field ) current=T ( 0 );
1327 stIn[ ( Z[0]+Y[1]+X[1] ) ] +
1328 stIn[ ( Z[1]+Y[0]+X[1] ) ] +
1329 stIn[ ( Z[1]+Y[1]+X[0] ) ] -
1330 T ( 6 ) *stIn[ ( Z[1]+Y[1]+X[1] ) ] +
1331 stIn[ ( Z[1]+Y[1]+X[2] ) ] +
1332 stIn[ ( Z[1]+Y[2]+X[1] ) ] +
1333 stIn[ ( Z[2]+Y[1]+X[1] ) ]
1347 #pragma omp parallel for num_threads(get_numCPUs())
1348 for ( std::size_t z=0; z<shape[0]; z++ )
1362 for ( std::size_t y=0; y<shape[1]; y++ )
1376 for ( std::size_t x=0; x<shape[2]; x++ )
1387 std::complex<T> & current=stOut[Z[1]+Y[1]+X[1]];
1389 if ( clear_field ) current=T ( 0 );
1392 voxel_weights[0]*stIn[ ( Z[0]+Y[1]+X[1] ) ] +
1393 voxel_weights[1]*stIn[ ( Z[1]+Y[0]+X[1] ) ] +
1394 voxel_weights[2]*stIn[ ( Z[1]+Y[1]+X[0] ) ] -
1395 voxel_weights[3]*stIn[ ( Z[1]+Y[1]+X[1] ) ] +
1396 voxel_weights[2]*stIn[ ( Z[1]+Y[1]+X[2] ) ] +
1397 voxel_weights[1]*stIn[ ( Z[1]+Y[2]+X[1] ) ] +
1398 voxel_weights[0]*stIn[ ( Z[2]+Y[1]+X[1] ) ]
1409 printf (
"unsoported operator\n" );
1411 return STA_RESULT_SUCCESS;
1418 template<
typename T,
typename S>
1420 const S * stIn,std::complex<T> * stOut ,
1421 const std::size_t shape[],
1424 std::complex<T> alpha=1,
1425 const T v_size[]=NULL,
1427 int stride_out = -1,
1428 bool clear_field=
false)
1434 if (hanalysis::verbose>0)
1435 printf (
"WARNING! element size is not considered yet!\n" );
1443 voxel_weights[0]=voxel_weights[1]=voxel_weights[2]=voxel_weights[3]=1;
1456 if (hanalysis::verbose>0)
1457 printf (
"WARNING! element size is not considered yet!\n" );
1461 voxel_weights[0]/=v_size[0]*v_size[0];
1462 voxel_weights[1]/=v_size[1]*v_size[1];
1463 voxel_weights[2]/=v_size[2]*v_size[2];
1466 if (hanalysis::verbose>0)
1467 printf (
"v_size: [%f %f %f]\n",v_size[0],v_size[1],v_size[2] );
1470 voxel_weights[3]*=2*(voxel_weights[0]+voxel_weights[1]+voxel_weights[2]);
1473 if (hanalysis::verbose>0)
1474 printf (
"laplace_C: [%f %f %f %f]\n",voxel_weights[0],voxel_weights[1],voxel_weights[2],voxel_weights[3] );
1478 if ( stride_in == -1 )
1480 if ( stride_out == -1 )
1484 std::size_t jumpz=shape[1]*shape[2];
1485 std::size_t jumpy=shape[2];
1492 #pragma omp parallel for num_threads(get_numCPUs())
1493 for ( std::size_t z=0; z<shape[0]; z++ )
1507 for ( std::size_t y=0; y<shape[1]; y++ )
1521 for ( std::size_t x=0; x<shape[2]; x++ )
1531 for (
int j=0; j<dim; j++ )
1533 std::size_t offset= ( Z[1]+Y[1]+X[1] ) *stride_out+j;
1534 std::complex<T> & current=stOut[offset];
1535 if ( clear_field ) current=T ( 0 );
1538 stIn[ ( Z[0]+Y[0]+X[1] ) *stride_in+j] +
1539 stIn[ ( Z[0]+Y[1]+X[0] ) *stride_in+j] +
1540 stIn[ ( Z[0]+Y[1]+X[1] ) *stride_in+j] +
1541 stIn[ ( Z[0]+Y[1]+X[2] ) *stride_in+j] +
1542 stIn[ ( Z[0]+Y[2]+X[1] ) *stride_in+j] +
1543 stIn[ ( Z[1]+Y[0]+X[0] ) *stride_in+j] +
1544 stIn[ ( Z[1]+Y[0]+X[1] ) *stride_in+j] +
1545 stIn[ ( Z[1]+Y[0]+X[2] ) *stride_in+j] +
1546 stIn[ ( Z[1]+Y[1]+X[0] ) *stride_in+j]-
1547 T ( 18 ) *stIn[ ( Z[1]+Y[1]+X[1] ) *stride_in+j] +
1548 stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in+j] +
1549 stIn[ ( Z[1]+Y[2]+X[0] ) *stride_in+j] +
1550 stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in+j] +
1551 stIn[ ( Z[1]+Y[2]+X[2] ) *stride_in+j]+
1552 stIn[ ( Z[2]+Y[0]+X[1] ) *stride_in+j] +
1553 stIn[ ( Z[2]+Y[1]+X[0] ) *stride_in+j] +
1554 stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in+j] +
1555 stIn[ ( Z[2]+Y[1]+X[2] ) *stride_in+j] +
1556 stIn[ ( Z[2]+Y[2]+X[1] ) *stride_in+j]
1568 #pragma omp parallel for num_threads(get_numCPUs())
1569 for ( std::size_t z=0; z<shape[0]; z++ )
1585 for ( std::size_t y=0; y<shape[1]; y++ )
1599 for ( std::size_t x=0; x<shape[2]; x++ )
1609 for (
int j=0; j<dim; j++ )
1612 std::size_t offset= ( Z[1]+Y[1]+X[1] ) *stride_out+j;
1613 std::complex<T> & current=stOut[offset];
1614 if ( clear_field ) current=T ( 0 );
1627 voxel_weights[0]*stIn[ ( Z[0]+Y[1]+X[1] ) *stride_in+j] +
1628 voxel_weights[1]*stIn[ ( Z[1]+Y[0]+X[1] ) *stride_in+j] +
1629 voxel_weights[2]*stIn[ ( Z[1]+Y[1]+X[0] ) *stride_in+j] -
1630 voxel_weights[3]*stIn[ ( Z[1]+Y[1]+X[1] ) *stride_in+j] +
1631 voxel_weights[2]*stIn[ ( Z[1]+Y[1]+X[2] ) *stride_in+j] +
1632 voxel_weights[1]*stIn[ ( Z[1]+Y[2]+X[1] ) *stride_in+j] +
1633 voxel_weights[0]*stIn[ ( Z[2]+Y[1]+X[1] ) *stride_in+j]
1648 printf (
"unsupported operator\n" );
1654 return STA_RESULT_SUCCESS;
1660 template<
typename T>
1662 const std::complex<T> * stIn,
1663 std::complex<T> * stOut ,
1664 const std::size_t shape[],
1668 const T v_size[]=NULL,
1670 int stride_out = -1,
1671 bool clear_field=
false)
1684 voxel_weights[0]=voxel_weights[1]=voxel_weights[2]=voxel_weights[3]=1;
1698 if (hanalysis::verbose>0)
1699 printf (
"WARNING! element size is not considered yet!\n" );
1703 voxel_weights[0]/=v_size[0]*v_size[0];
1704 voxel_weights[1]/=v_size[1]*v_size[1];
1705 voxel_weights[2]/=v_size[2]*v_size[2];
1707 if (hanalysis::verbose>0)
1708 printf (
"v_size: [%f %f %f]\n",v_size[0],v_size[1],v_size[2] );
1711 voxel_weights[3]*=2*(voxel_weights[0]+voxel_weights[1]+voxel_weights[2]);
1714 if (hanalysis::verbose>0)
1715 printf (
"laplace_R: [%f %f %f %f]\n",voxel_weights[0],voxel_weights[1],voxel_weights[2],voxel_weights[3] );
1720 if ( stride_in == -1 )
1722 if ( stride_out == -1 )
1726 std::size_t jumpz=shape[1]*shape[2];
1727 std::size_t jumpy=shape[2];
1733 const T * stIn_r=(
const T*) stIn;
1734 T * stOut_r=( T*) stOut;
1741 #pragma omp parallel for num_threads(get_numCPUs())
1742 for ( std::size_t z=0; z<shape[0]; z++ )
1782 for ( std::size_t y=0; y<shape[1]; y++ )
1796 for ( std::size_t x=0; x<shape[2]; x++ )
1830 p001=stIn_r+ ( Z[0]+Y[0]+X[1] ) *stride_in;
1831 p010=stIn_r+ ( Z[0]+Y[1]+X[0] ) *stride_in;
1832 p011=stIn_r+ ( Z[0]+Y[1]+X[1] ) *stride_in;
1833 p012=stIn_r+ ( Z[0]+Y[1]+X[2] ) *stride_in;
1834 p021=stIn_r+ ( Z[0]+Y[2]+X[1] ) *stride_in;
1835 p100=stIn_r+ ( Z[1]+Y[0]+X[0] ) *stride_in;
1836 p101=stIn_r+ ( Z[1]+Y[0]+X[1] ) *stride_in;
1837 p102=stIn_r+ ( Z[1]+Y[0]+X[2] ) *stride_in;
1838 p110=stIn_r+ ( Z[1]+Y[1]+X[0] ) *stride_in;
1840 p111=stIn_r+ ( Z[1]+Y[1]+X[1] ) *stride_in;
1842 p112=stIn_r+ ( Z[1]+Y[1]+X[2] ) *stride_in;
1843 p120=stIn_r+ ( Z[1]+Y[2]+X[0] ) *stride_in;
1844 p121=stIn_r+ ( Z[1]+Y[2]+X[1] ) *stride_in;
1845 p122=stIn_r+ ( Z[1]+Y[2]+X[2] ) *stride_in;
1846 p201=stIn_r+ ( Z[2]+Y[0]+X[1] ) *stride_in;
1847 p210=stIn_r+ ( Z[2]+Y[1]+X[0] ) *stride_in;
1848 p211=stIn_r+ ( Z[2]+Y[1]+X[1] ) *stride_in;
1849 p212=stIn_r+ ( Z[2]+Y[1]+X[2] ) *stride_in;
1850 p221=stIn_r+ ( Z[2]+Y[2]+X[1] ) *stride_in;
1853 T * current=stOut_r+( Z[1]+Y[1]+X[1] ) *stride_out;
1855 for (
int j=0; j<dim; j++ )
1858 if ( clear_field ) (*current)=T ( 0 );
1860 (*current++)+=alpha* (
1870 T ( 18 ) * (*p111++) +
1921 #pragma omp parallel for num_threads(get_numCPUs())
1922 for ( std::size_t z=0; z<shape[0]; z++ )
1947 for ( std::size_t y=0; y<shape[1]; y++ )
1961 for ( std::size_t x=0; x<shape[2]; x++ )
1971 p011=stIn_r+ ( Z[0]+Y[1]+X[1] ) *stride_in;
1972 p101=stIn_r+ ( Z[1]+Y[0]+X[1] ) *stride_in;
1973 p110=stIn_r+ ( Z[1]+Y[1]+X[0] ) *stride_in;
1974 p111=stIn_r+ ( Z[1]+Y[1]+X[1] ) *stride_in;
1975 p112=stIn_r+ ( Z[1]+Y[1]+X[2] ) *stride_in;
1976 p121=stIn_r+ ( Z[1]+Y[2]+X[1] ) *stride_in;
1977 p211=stIn_r+ ( Z[2]+Y[1]+X[1] ) *stride_in;
1987 T * current=stOut_r+( Z[1]+Y[1]+X[1] ) *stride_out;
1989 for (
int j=0; j<dim; j++ )
1991 if ( clear_field ) (*current)=T ( 0 );
2003 (*current++)+=alpha* (
2004 voxel_weights[0]*(*p011++) +
2005 voxel_weights[1]*(*p101++) +
2006 voxel_weights[2]*(*p110++) -
2007 voxel_weights[3]*(*p111++) +
2008 voxel_weights[2]*(*p112++) +
2009 voxel_weights[1]*(*p121++) +
2010 voxel_weights[0]*(*p211++)
2042 printf (
"unsupported operator\n" );
2048 return STA_RESULT_SUCCESS;
2063 #ifdef _STA_LINK_FFTW
2068 STA_RESULT fft (
const std::complex<double> * IN,
2069 std::complex<double> * OUT,
2070 int shape[],
int numComponents,
2072 int flag=FFTW_ESTIMATE )
2074 #ifdef _STA_FFT_MULTI_THREAD
2075 fftw_init_threads();
2076 fftw_plan_with_nthreads ( get_numCPUs() );
2077 if ( verbose>0 ) printf (
"FFTW with %d threads \n",get_numCPUs() );
2079 if ( verbose>0 ) printf (
"FFTW is single threaded\n" );
2087 int howmany=numComponents;
2088 fftw_complex * in = ( fftw_complex * ) IN;
2090 int istride=numComponents;
2092 fftw_complex * out = ( fftw_complex * ) OUT;
2093 int * onembed=inembed;
2095 int ostride=istride;
2096 int sign=FFTW_FORWARD;
2097 if ( !forward ) sign=FFTW_BACKWARD;
2098 unsigned flags=flag | FFTW_PRESERVE_INPUT;
2112 #if defined (__linux__) && ! defined (_STA_MWFFTW)
2115 gethostname ( buffer,255 );
2117 s=std::string ( getenv (
"HOME" ) ) +std::string (
"/.stensor_mywisdom_" ) +std::string ( buffer ) +
".wisdom";
2123 ifp = fopen ( s.c_str(),
"r" );
2126 if ( 0==fftw_import_wisdom_from_file ( ifp ) )
2127 printf (
"Error reading wisdom file: %s!\n",s.c_str() );
2130 else printf (
"Wisdom file does not exist!\n" );
2134 fftw_plan plan=fftw_plan_many_dft ( rank,n,howmany,in,inembed,istride, idist,out,onembed, ostride, odist, sign, flags | FFTW_PRESERVE_INPUT );
2137 printf (
"no plan\n" );
2138 return STA_RESULT_FAILED;
2141 fftw_execute_dft ( plan,in, out );
2144 #if defined (__linux__) && ! defined (_STA_MWFFTW)
2146 ifp = fopen ( s.c_str(),
"w" );
2149 fftw_export_wisdom_to_file ( ifp );
2152 else printf (
"Error creating file!\n" );
2156 fftw_destroy_plan ( plan );
2158 return STA_RESULT_SUCCESS;
2166 STA_RESULT fft (
const std::complex<float> * IN,
2167 std::complex<float> * OUT,
2171 int flag=FFTW_ESTIMATE )
2173 #ifdef _STA_FFT_MULTI_THREAD
2174 fftwf_init_threads();
2175 fftwf_plan_with_nthreads ( get_numCPUs() );
2176 if ( verbose>0 ) printf (
"FFTW with %d threads \n",get_numCPUs() );
2178 if ( verbose>0 ) printf (
"FFTW is single threaded\n" );
2183 int howmany=numComponents;
2184 fftwf_complex * in = ( fftwf_complex * ) IN;
2186 int istride=numComponents;
2188 fftwf_complex * out = ( fftwf_complex * ) OUT;
2189 int * onembed=inembed;
2191 int ostride=istride;
2192 int sign=FFTW_FORWARD;
2193 if ( !forward ) sign=FFTW_BACKWARD;
2194 unsigned flags=flag | FFTW_PRESERVE_INPUT;
2198 #if defined (__linux__) && ! defined (_STA_MWFFTW)
2200 gethostname ( buffer,255 );
2202 s=std::string ( getenv (
"HOME" ) ) +std::string (
"/.stensor_mywisdom_" ) +std::string ( buffer ) +
"_single.wisdom";
2209 ifp = fopen ( s.c_str(),
"r" );
2212 if ( 0==fftwf_import_wisdom_from_file ( ifp ) )
2213 printf (
"Error reading wisdom file: %s!\n",s.c_str() );
2216 else printf (
"Wisdom file does not exist!\n" );
2220 fftwf_plan plan=fftwf_plan_many_dft ( rank,n,howmany,in,inembed,istride, idist,out,onembed, ostride, odist, sign, flags | FFTW_PRESERVE_INPUT );
2223 printf (
"no plan\n" );
2224 return STA_RESULT_FAILED;
2227 fftwf_execute_dft ( plan,in, out );
2231 #if defined (__linux__) && ! defined (_STA_MWFFTW)
2233 ifp = fopen ( s.c_str(),
"w" );
2236 fftwf_export_wisdom_to_file ( ifp );
2239 else printf (
"Error creating file!\n" );
2243 fftwf_destroy_plan ( plan );
2244 return STA_RESULT_SUCCESS;
2250 #if ! defined (_STA_CUDA)
2252 #include "matlab_fft.icc"
2288 template<
typename T>
2289 T * sta_th_precomputeCGcoefficients_R(
int J,
int L,
int j, T norm)
2291 std::size_t count=0;
2294 for (
int m=-rank; m<=0; m++)
2296 for (
int M=-J; M<=J; M++)
2309 T * cg=
new T[count];
2311 for (
int m=-rank; m<=0; m++)
2313 for (
int M=-J; M<=J; M++)
2320 double sign=((L+n) & 1) ? -1.0 : 1.0;
2321 cg[count++]=sign*norm*(T)hanalysis::clebschGordan(L,-n,J,M,rank,m);
2337 template<
typename T>
2339 const std::complex<T> ** stIn,
2340 std::complex<T> * stOut ,
2341 const std::size_t shape[],
2347 int stride_out = -1,
2348 bool clear_field=
false)
2351 return STA_RESULT_INVALID_TENSOR_RANK;
2353 return STA_RESULT_INVALID_TENSOR_RANK;
2355 return STA_RESULT_INVALID_TENSOR_RANK;
2366 printf(
"result in inv\n");
2372 T * Cg = sta_th_precomputeCGcoefficients_R<T> ( J,L,j,alpha);
2374 std::size_t vectorLengthIn= (2* L+1 );
2375 std::size_t vectorLengthOut= ( j+L+1 );
2377 if ( stride_in == -1 )
2378 stride_in = vectorLengthIn;
2379 if ( stride_out == -1 )
2380 stride_out = vectorLengthOut;
2388 if (!((rankOut>=0)&&(std::abs(j)<=J)&&(J<=j+2*L)&&(L>=0)))
2390 printf(
"trotz test bis hierher??");
2391 return STA_RESULT_FAILED;
2408 int rankIn_times_2=rankIn*2;
2409 int rankOut_times_2=rankOut*2;
2411 std::size_t jumpz=shape[1]*shape[2];
2413 T * stOutR= ( T * ) stOut;
2415 #pragma omp parallel for num_threads(get_numCPUs())
2416 for ( std::size_t z=0; z<shape[0]; z++ )
2421 const T ** current_In =
new const T *[J+1];
2422 for (
int a=0; a<=J; a++)
2423 current_In[a]=(
const T * )stIn[a]+ ( Z*stride_in+rankIn_times_2 );
2425 T * current_Out=stOutR+ ( Z*stride_out+rankOut_times_2 );
2432 for ( std::size_t i=0; i<jumpz; i++ )
2434 std::size_t count=0;
2435 for (
int m=-rankOut; m<=0; m++)
2439 current_Out[m*2]=T ( 0 );
2440 current_Out[m*2+1]=T ( 0 );
2445 for (
int M=-J; M<=J; M++)
2460 tmp0R=current_In[-M+J][-n*2];
2461 tmp0I=-current_In[-M+J][-n*2+1];
2465 tmp0R=-current_In[-M+J][-n*2];
2466 tmp0I=current_In[-M+J][-n*2+1];
2471 tmp0R=current_In[M+J][n*2];
2472 tmp0I=current_In[M+J][n*2+1];
2474 tmp1R+=Cg[count]*tmp0R;
2475 tmp1I+=Cg[count++]*tmp0I;
2495 current_Out[m*2]+=tmp1R;
2496 current_Out[m*2+1]-=tmp1I;
2499 for (
int a=0; a<=J; a++)
2500 current_In[a]+=stride_in;
2501 current_Out+=stride_out;
2508 delete [] current_In;
2511 return STA_RESULT_SUCCESS;
2526 template<
typename T>
2527 T * sta_product_precomputeCGcoefficients_R(
int J1,
int J2,
int J,
bool normalized, T fact)
2533 norm=(T)1/(T)hanalysis::clebschGordan(J1,0,J2,0,J,0);
2538 std::size_t count=0;
2539 for (
int m=-J; m<=0; m++)
2541 for (
int m1=-J1; m1<=J1; m1++)
2550 T * cg=
new T[count];
2552 for (
int m=-J; m<=0; m++)
2554 for (
int m1=-J1; m1<=J1; m1++)
2559 cg[count++]=norm*(T)hanalysis::clebschGordan(J1,m1,J2,m2,J,m);
2569 template<
typename T>
2570 T * sta_tripleproduct_precomputeCGcoefficients_R(
int J1,
int J2,
int J3,
int Jprod1,
int Jprod2,
bool normalized, T fact)
2576 norm=(T)1/((T)(hanalysis::clebschGordan(J1,0,J2,0,Jprod1,0)*hanalysis::clebschGordan(Jprod1,0,J3,0,Jprod2,0)));
2581 std::size_t count=0;
2583 for (
int m=-Jprod2; m<=0; m++ )
2585 for (
int m3=-J3; m3<=J3; m3++ )
2588 if ( abs ( mL ) <=Jprod1 )
2590 for (
int m1=-J1; m1<=J1; m1++ )
2593 if ( abs ( m2 ) <=J2 )
2602 T * cg=
new T[count];
2605 for (
int m=-Jprod2; m<=0; m++ )
2607 for (
int m3=-J3; m3<=J3; m3++ )
2610 if ( abs ( mL ) <=Jprod1 )
2612 for (
int m1=-J1; m1<=J1; m1++ )
2615 if ( abs ( m2 ) <=J2 )
2617 cg[count++]=norm*(T)hanalysis::clebschGordan(J1,m1,J2,m2,Jprod1,mL)*(T)hanalysis::clebschGordan(Jprod1,mL,J3,m3,Jprod2,m);
2632 template<
typename T>
2634 const std::complex<T> * stIn1,
2635 const std::complex<T> * stIn2,
2636 const std::complex<T> * stIn3,
2637 std::complex<T> * stOut ,
2638 const std::size_t shape[],
2646 int stride_in1 = -1,
2647 int stride_in2 = -1,
2648 int stride_in3 = -1,
2649 int stride_out = -1,
2650 bool clear_field=
false)
2656 if ( ( std::abs ( J1-J2 ) >Jprod1 ) || ( Jprod1>std::abs ( J1+J2 ) ) )
2657 return STA_RESULT_INVALID_PRODUCT;
2658 if ( ( ( J1+J2+Jprod1 ) %2!=0 ) && ( normalize ) )
2659 return STA_RESULT_INVALID_PRODUCT;
2661 if ( ( std::abs ( Jprod1-J3 ) >Jprod2 ) || ( Jprod2>std::abs ( Jprod1+J3 ) ) )
2662 return STA_RESULT_INVALID_PRODUCT;
2663 if ( ( ( J3+Jprod1+Jprod2 ) %2!=0 ) && ( normalize ) )
2664 return STA_RESULT_INVALID_PRODUCT;
2672 bool tmp1InIv = ( ( J1+J2+Jprod1 ) %2!=0 );
2673 bool tmp2InIv = ( ( J3+Jprod2+Jprod1 ) %2!=0 );
2674 bool resultInIv= (tmp1InIv&&(!tmp2InIv))||((!tmp1InIv)&&(tmp2InIv));
2675 if (tmp1InIv) alpha*=-1;
2678 T * Cg= sta_tripleproduct_precomputeCGcoefficients_R(J1,J2,J3,Jprod1,Jprod2,normalize,alpha);
2684 std::size_t vectorLengthJ1= ( J1+1 );
2685 std::size_t vectorLengthJ2= ( J2+1 );
2686 std::size_t vectorLengthJ3= ( J3+1 );
2687 std::size_t vectorLengthJprod2= ( Jprod2+1 );
2689 if ( stride_in1 == -1 )
2690 stride_in1 = vectorLengthJ1;
2691 if ( stride_in2 == -1 )
2692 stride_in2 = vectorLengthJ2;
2693 if ( stride_in3 == -1 )
2694 stride_in3 = vectorLengthJ3;
2695 if ( stride_out == -1 )
2696 stride_out = vectorLengthJprod2;
2703 int J3_times_2=J3*2;
2704 int J2_times_2=J2*2;
2705 int J1_times_2=J1*2;
2706 int J_times_2=Jprod2*2;
2709 std::size_t jumpz=shape[1]*shape[2];
2711 const T * stIn1R= (
const T * ) stIn1;
2712 const T * stIn2R= (
const T * ) stIn2;
2713 const T * stIn3R= (
const T * ) stIn3;
2714 T * stOutR= ( T * ) stOut;
2716 #pragma omp parallel for num_threads(get_numCPUs())
2717 for ( std::size_t z=0; z<shape[0]; z++ )
2721 const T * current_J1R=stIn1R+ ( Z*stride_in1+J1_times_2 );
2722 const T * current_J2R=stIn2R+ ( Z*stride_in2+J2_times_2 );
2723 const T * current_J3R=stIn3R+ ( Z*stride_in3+J3_times_2 );
2724 T * current_JR=stOutR+ ( Z*stride_out+J_times_2 );
2735 for ( std::size_t i=0; i<jumpz; i++ )
2737 std::size_t count=0;
2738 for (
int m=-Jprod2; m<=0; m++ )
2742 current_JR[m*2]=T ( 0 );
2743 current_JR[m*2+1]=T ( 0 );
2746 for (
int m3=-J3; m3<=J3; m3++ )
2749 if ( abs ( mL ) <=Jprod1 )
2755 e=current_J3R[-m3*2];
2756 f=-current_J3R[-m3*2+1];
2760 e=-current_J3R[-m3*2];
2761 f=current_J3R[-m3*2+1];
2766 e=current_J3R[m3*2];
2767 f=current_J3R[m3*2+1];
2770 for (
int m1=-J1; m1<=J1; m1++ )
2773 if ( abs ( m2 ) <=J2 )
2779 a=current_J1R[-m1*2];
2780 b=-current_J1R[-m1*2+1];
2784 a=-current_J1R[-m1*2];
2785 b=current_J1R[-m1*2+1];
2790 a=current_J1R[m1*2];
2791 b=current_J1R[m1*2+1];
2799 c=current_J2R[-m2*2];
2800 d=-current_J2R[-m2*2+1];
2804 c=-current_J2R[-m2*2];
2805 d=current_J2R[-m2*2+1];
2810 c=current_J2R[m2*2];
2811 d=current_J2R[m2*2+1];
2842 current_JR[m*2]-=Cg[count]* ( a*(e*d+c*f)+b*(e*c-d*f));
2843 current_JR[m*2+1]+=Cg[count++]* ( a*(e*c-d*f)-b*(e*d+c*f));
2850 current_JR[m*2]+=Cg[count]* ( a*(e*c-d*f)-b*(e*d+c*f));
2851 current_JR[m*2+1]+=Cg[count++]* ( a*(e*d+c*f)+b*(e*c-d*f));
2869 current_J1R+=stride_in1;
2870 current_J2R+=stride_in2;
2871 current_J3R+=stride_in3;
2872 current_JR+=stride_out;
2876 return STA_RESULT_SUCCESS;
3077 template<
typename T>
3079 const std::complex<T> * stIn1,
3080 const std::complex<T> * stIn2,
3081 std::complex<T> * stOut ,
3082 const std::size_t shape[],
3088 int stride_in1 = -1,
3089 int stride_in2 = -1,
3090 int stride_out = -1,
3091 bool clear_field=
false)
3094 if ( ( std::abs ( J1-J2 ) >J ) || ( J>std::abs ( J1+J2 ) ) )
3095 return STA_RESULT_INVALID_PRODUCT;
3096 if ( ( ( J1+J2+J ) %2!=0 ) && ( normalize ) )
3097 return STA_RESULT_INVALID_PRODUCT;
3105 T * Cg= sta_product_precomputeCGcoefficients_R<T> ( J1,J2, J,normalize,alpha );
3107 bool resultInIv= ( ( J1+J2+J ) %2!=0 );
3109 std::size_t vectorLengthJ1= ( J1+1 );
3110 std::size_t vectorLengthJ2= ( J2+1 );
3111 std::size_t vectorLengthJ= ( J+1 );
3113 if ( stride_in1 == -1 )
3114 stride_in1 = vectorLengthJ1;
3115 if ( stride_in2 == -1 )
3116 stride_in2 = vectorLengthJ2;
3117 if ( stride_out == -1 )
3118 stride_out = vectorLengthJ;
3124 int J2_times_2=J2*2;
3125 int J1_times_2=J1*2;
3129 std::size_t jumpz=shape[1]*shape[2];
3131 const T * stIn1R= (
const T * ) stIn1;
3132 const T * stIn2R= (
const T * ) stIn2;
3133 T * stOutR= ( T * ) stOut;
3135 #pragma omp parallel for num_threads(get_numCPUs())
3136 for ( std::size_t z=0; z<shape[0]; z++ )
3140 const T * current_J1R=stIn1R+ ( Z*stride_in1+J1_times_2 );
3141 const T * current_J2R=stIn2R+ ( Z*stride_in2+J2_times_2 );
3142 T * current_JR=stOutR+ ( Z*stride_out+J_times_2 );
3150 for ( std::size_t i=0; i<jumpz; i++ )
3152 std::size_t count=0;
3153 for (
int m=-J; m<=0; m++ )
3157 current_JR[m*2]=T ( 0 );
3158 current_JR[m*2+1]=T ( 0 );
3219 for (
int m1=-J1; m1<=J1; m1++ )
3222 if ( abs ( m2 ) <=J2 )
3228 tmp0R=current_J1R[-m1*2];
3229 tmp0I=-current_J1R[-m1*2+1];
3233 tmp0R=-current_J1R[-m1*2];
3234 tmp0I=current_J1R[-m1*2+1];
3239 tmp0R=current_J1R[m1*2];
3240 tmp0I=current_J1R[m1*2+1];
3246 tmp1R=current_J2R[-m2*2];
3247 tmp1I=-current_J2R[-m2*2+1];
3251 tmp1R=-current_J2R[-m2*2];
3252 tmp1I=current_J2R[-m2*2+1];
3257 tmp1R=current_J2R[m2*2];
3258 tmp1I=current_J2R[m2*2+1];
3264 current_JR[m*2]-=Cg[count]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3265 current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3271 current_JR[m*2]+=Cg[count]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3272 current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3277 current_J1R+=stride_in1;
3278 current_J2R+=stride_in2;
3279 current_JR+=stride_out;
3283 return STA_RESULT_SUCCESS;
3315 template<
typename T>
3317 const std::complex<T> * stIn1,
3318 const std::complex<T> * stIn2,
3319 std::complex<T> * stOut ,
3320 const std::size_t shape[],
3326 int stride_in1 = -1,
3327 int stride_in2 = -1,
3328 int stride_out = -1,
3329 bool clear_field=
false)
3331 if ( ( std::abs ( J1-J2 ) >J ) || ( J>std::abs ( J1+J2 ) ) )
3332 return STA_RESULT_INVALID_PRODUCT;
3333 if ( ( ( J1+J2+J ) %2!=0 ) && ( normalize ) )
3334 return STA_RESULT_INVALID_PRODUCT;
3342 T * Cg= sta_product_precomputeCGcoefficients_R<T> ( J1,J2, J,normalize,alpha );
3344 bool resultInIv= ( ( J1+J2+J ) %2!=0 );
3346 std::size_t vectorLengthJ1= ( J1+1 );
3347 std::size_t vectorLengthJ2= ( J2+1 );
3348 std::size_t vectorLengthJ= ( J+1 );
3350 if ( stride_in1 == -1 )
3351 stride_in1 = vectorLengthJ1;
3352 if ( stride_in2 == -1 )
3353 stride_in2 = vectorLengthJ2;
3354 if ( stride_out == -1 )
3355 stride_out = vectorLengthJ;
3362 std::size_t jumpz=shape[1]*shape[2];
3363 std::size_t jumpy=shape[2];
3365 const T * stIn1R= (
const T * ) stIn1;
3366 const T * stIn2R= (
const T * ) stIn2;
3367 T * stOutR= ( T * ) stOut;
3369 #pragma omp parallel for num_threads(get_numCPUs())
3370 for ( std::size_t z=0; z<shape[0]; z++ )
3374 const T * current_J1R=stIn1R+ ( Z*stride_in1+2*J1 );
3375 const T * current_J2R=stIn2R+ ( Z*stride_in2+2*J2 );
3376 T * current_JR=stOutR+ ( Z*stride_out+2*J );
3384 for ( std::size_t i=0; i<jumpz; i++ )
3387 std::size_t Y=i/jumpy;
3388 std::size_t X=i%jumpy;
3389 std::size_t mpos= ((shape[0]-z)%shape[0])*jumpz+
3390 ((shape[1]-Y)%shape[1])*jumpy+
3391 ((shape[2]-X)%shape[2]);
3393 const T * current_J1Rmirrowed=stIn1R+ (mpos*stride_in1+2*J1 );
3394 const T * current_J2Rmirrowed=stIn2R+ (mpos*stride_in2+2*J2 );
3396 std::size_t count=0;
3397 for (
int m=-J; m<=0; m++ )
3401 current_JR[m*2]=T ( 0 );
3402 current_JR[m*2+1]=T ( 0 );
3406 for (
int m1=-J1; m1<=J1; m1++ )
3409 if ( abs ( m2 ) <=J2 )
3415 tmp0R=current_J1Rmirrowed[-m1*2];
3416 tmp0I=-current_J1Rmirrowed[-m1*2+1];
3420 tmp0R=-current_J1Rmirrowed[-m1*2];
3421 tmp0I=current_J1Rmirrowed[-m1*2+1];
3426 tmp0R=current_J1R[m1*2];
3427 tmp0I=current_J1R[m1*2+1];
3433 tmp1R=current_J2Rmirrowed[-m2*2];
3434 tmp1I=-current_J2Rmirrowed[-m2*2+1];
3438 tmp1R=-current_J2Rmirrowed[-m2*2];
3439 tmp1I=current_J2Rmirrowed[-m2*2+1];
3444 tmp1R=current_J2R[m2*2];
3445 tmp1I=current_J2R[m2*2+1];
3449 current_JR[m*2]-=Cg[count]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3450 current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3454 current_JR[m*2]+=Cg[count]* ( tmp0R*tmp1R-tmp0I*tmp1I );
3455 current_JR[m*2+1]+=Cg[count++]* ( tmp0R*tmp1I+tmp0I*tmp1R );
3460 current_J1R+=stride_in1;
3461 current_J2R+=stride_in2;
3462 current_JR+=stride_out;
3466 return STA_RESULT_SUCCESS;
3472 template<
typename T,
typename S >
3474 const std::complex<T> * stIn1,
3475 const std::complex<T> * stIn2,
3477 const std::size_t shape[],
3481 int stride_in1 = -1,
3482 int stride_in2 = -1,
3483 int stride_out = -1,
3484 bool clear_field=
false)
3487 T * Cg= sta_product_precomputeCGcoefficients_R<T> ( J,J, 0,normalize,alpha );
3489 std::size_t vectorLengthJ1= ( J+1 );
3490 std::size_t vectorLengthJ2= ( J+1 );
3491 std::size_t vectorLengthJ= ( 1 );
3493 if ( stride_in1 == -1 )
3494 stride_in1 = vectorLengthJ1;
3495 if ( stride_in2 == -1 )
3496 stride_in2 = vectorLengthJ2;
3497 if ( stride_out == -1 )
3498 stride_out = vectorLengthJ;
3504 std::size_t jumpz=shape[1]*shape[2];
3506 const T * stIn1R= (
const T * ) stIn1;
3507 const T * stIn2R= (
const T * ) stIn2;
3508 S * stOutR= ( S * ) stOut;
3510 #pragma omp parallel for num_threads(get_numCPUs())
3511 for ( std::size_t z=0; z<shape[0]; z++ )
3515 const T * current_J1R=stIn1R+ ( Z*stride_in1+2*J );
3516 const T * current_J2R=stIn2R+ ( Z*stride_in2+2*J );
3517 S * current_JR=stOutR+ ( Z*stride_out );
3519 for ( std::size_t i=0; i<jumpz; i++ )
3521 std::size_t count=0;
3525 current_JR[0]=T ( 0 );
3528 for (
int m1=-J; m1<=0; m1++ )
3532 current_JR[0]+=2*Cg[count]* ( current_J1R[m1*2]*current_J2R[m1*2]
3533 +current_J1R[m1*2+1]*current_J2R[m1*2+1] );
3536 current_JR[0]+=Cg[count]* ( current_J1R[m1*2]*current_J2R[m1*2]
3537 +current_J1R[m1*2+1]*current_J2R[m1*2+1] );
3540 current_J1R+=stride_in1;
3541 current_J2R+=stride_in2;
3542 current_JR+=stride_out;
3546 return STA_RESULT_SUCCESS;
3718 #ifdef _STA_OLD_DERIV
3719 template<
typename T,
typename S>
3722 std::complex<T> * stOut ,
3723 const std::size_t shape[],
3726 bool conjugate=
false,
3729 const T v_size[]=NULL,
3731 int stride_out = -1,
3732 bool clear_field =
false)
3736 if ( abs ( Jupdown ) >1 )
return STA_RESULT_INVALID_TENSOR_RANK;
3737 if ( abs ( J+Jupdown ) <0 )
return STA_RESULT_INVALID_TENSOR_RANK;
3739 std::complex<T> imag=-std::complex<T>(0,1);
3740 if (conjugate) imag*=T( -1 );
3743 voxel_size[0]=voxel_size[1]=voxel_size[2]=T(1);
3746 voxel_size[0]/=v_size[0];
3747 voxel_size[1]/=v_size[1];
3748 voxel_size[2]/=v_size[2];
3751 imag*=voxel_size[1];
3753 int J1=(T)(J+Jupdown);
3755 std::size_t vectorLengthJ=J+1;
3756 std::size_t vectorLengthJ1=(J1)+1;
3758 std::size_t jumpz=shape[1]*shape[2];
3759 std::size_t jumpy=shape[2];
3761 if (stride_in == -1)
3762 stride_in = vectorLengthJ;
3763 if (stride_out == -1)
3764 stride_out = vectorLengthJ1;
3767 T * CGTable=
new T[3*vectorLengthJ1];
3768 T shnorm=hanalysis::clebschGordan(1,0,J,0,J1,0);
3769 if (Jupdown==0) shnorm=1;
3771 for (
int M=-(J1); M<=(0); M++)
3773 CGTable[M+(J1)] =T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,-1,J,M+1,J1,M)/shnorm;;
3774 CGTable[M+(J1)+vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan(1,0,J,M,J1,M)/shnorm;
3775 CGTable[M+(J1)+2*vectorLengthJ1]=T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,1,J,M-1,J1,M)/shnorm;
3777 T * CGTable0=&CGTable[0];
3779 T * CGTable1=&CGTable[vectorLengthJ1];
3781 T * CGTable2=&CGTable[2*vectorLengthJ1];
3784 #pragma omp parallel for num_threads(get_numCPUs())
3785 for (std::size_t z=0; z<shape[0]; z++)
3809 for (std::size_t y=0; y<shape[1]; y++)
3823 for (std::size_t x=0; x<shape[2]; x++)
3833 derivX1=&stIn[(Z[1]+Y[1]+X[0])*stride_in]+J;
3834 derivX0=&stIn[(Z[1]+Y[1]+X[2])*stride_in]+J;
3836 derivY1=&stIn[(Z[1]+Y[0]+X[1])*stride_in]+J;
3837 derivY0=&stIn[(Z[1]+Y[2]+X[1])*stride_in]+J;
3839 derivZ1=&stIn[(Z[0]+Y[1]+X[1])*stride_in]+J;
3840 derivZ0=&stIn[(Z[2]+Y[1]+X[1])*stride_in]+J;
3842 std::size_t offset=(Z[1]+Y[1]+X[1])*stride_out+J1;
3844 for (
int M=-(J1); M<=(0); M++)
3847 std::complex<T> & current=stOut[offset+M];
3848 if ( clear_field ) current=T ( 0 );
3849 std::complex<T> tmp=T ( 0 );
3857 tmp-=CGTable0[M]*(voxel_size[2]*std::conj(derivX0[-m2]-derivX1[-m2])+imag*std::conj(derivY0[-m2]-derivY1[-m2]));
3859 tmp+=CGTable0[M]*(voxel_size[2]*(derivX0[m2]-derivX1[m2])+imag*(derivY0[m2]-derivY1[m2]));
3863 tmp+=CGTable1[M]*(derivZ0[M]-derivZ1[M]);
3868 tmp+=CGTable2[M]*(-voxel_size[2]*(derivX0[m2]-derivX1[m2])+imag*(derivY0[m2]-derivY1[m2]));
3878 return (STA_RESULT_SUCCESS);
3883 template<
typename T,
typename S>
3886 std::complex<T> * stOut ,
3887 const std::size_t shape[],
3890 bool conjugate=
false,
3892 const T v_size[]=NULL,
3894 int stride_out = -1,
3895 bool clear_field =
false )
3897 if ( abs ( Jupdown ) >2 )
return STA_RESULT_INVALID_TENSOR_RANK;
3898 if ( abs ( Jupdown ) ==1 )
return STA_RESULT_INVALID_TENSOR_RANK;
3899 if ( abs ( J+Jupdown ) <0 )
return STA_RESULT_INVALID_TENSOR_RANK;
3903 if (hanalysis::verbose>0)
3904 printf(
"WARNING! element size is not considered yet!\n");
3907 std::complex<T> imag=-std::complex<T> ( 0,1 );
3908 if (conjugate) imag*=T( -1 );
3910 alpha*=T(sqrt(3.0/2.0));
3915 int vectorLengthJ=J+1;
3916 int vectorLengthJ1=(J1)+1;
3918 if (stride_in == -1)
3919 stride_in = vectorLengthJ;
3920 if (stride_out == -1)
3921 stride_out = vectorLengthJ1;
3924 std::size_t jumpz=shape[1]*shape[2];
3925 std::size_t jumpy=shape[2];
3929 T * CGTable=
new T[5*vectorLengthJ1];
3930 T shnorm=hanalysis::clebschGordan(2,0,J,0,J1,0);
3933 for (
int M=-(J1); M<=(0); M++)
3935 CGTable[M+(J1)] =hanalysis::clebschGordan(2,-2,J,M+2,J1,M)/shnorm;
3936 CGTable[M+(J1)+vectorLengthJ1] =hanalysis::clebschGordan(2,-1,J,M+1,J1,M)/shnorm;;
3937 CGTable[M+(J1)+2*vectorLengthJ1]=hanalysis::clebschGordan(2,0,J,M,J1,M)/shnorm;
3938 CGTable[M+(J1)+3*vectorLengthJ1]=hanalysis::clebschGordan(2,1,J,M-1,J1,M)/shnorm;
3939 CGTable[M+(J1)+4*vectorLengthJ1]=hanalysis::clebschGordan(2,2,J,M-2,J1,M)/shnorm;
3941 T * CGTable0=&CGTable[0];
3943 T * CGTable1=&CGTable[vectorLengthJ1];
3945 T * CGTable2=&CGTable[2*vectorLengthJ1];
3947 T * CGTable3=&CGTable[3*vectorLengthJ1];
3949 T * CGTable4=&CGTable[4*vectorLengthJ1];
3952 #pragma omp parallel for num_threads(get_numCPUs())
3953 for (std::size_t z=0; z<shape[0]; z++)
4009 for (std::size_t y=0; y<shape[1]; y++)
4030 for (std::size_t x=0; x<shape[2]; x++)
4046 X1Y1Z2=&stIn[(Z[2]+Y[1]+X[1])*stride_in]+J;
4048 X1Y2Z1=&stIn[(Z[1]+Y[2]+X[1])*stride_in]+J;
4049 X1Y2Z2=&stIn[(Z[2]+Y[2]+X[1])*stride_in]+J;
4050 X1Y2Z3=&stIn[(Z[3]+Y[2]+X[1])*stride_in]+J;
4052 X1Y3Z2=&stIn[(Z[2]+Y[3]+X[1])*stride_in]+J;
4056 X2Y1Z1=&stIn[(Z[1]+Y[1]+X[2])*stride_in]+J;
4057 X2Y1Z2=&stIn[(Z[2]+Y[1]+X[2])*stride_in]+J;
4058 X2Y1Z3=&stIn[(Z[3]+Y[1]+X[2])*stride_in]+J;
4059 X2Y2Z1=&stIn[(Z[1]+Y[2]+X[2])*stride_in]+J;
4060 X2Y2Z2=&stIn[(Z[2]+Y[2]+X[2])*stride_in]+J;
4061 X2Y2Z3=&stIn[(Z[3]+Y[2]+X[2])*stride_in]+J;
4062 X2Y3Z1=&stIn[(Z[1]+Y[3]+X[2])*stride_in]+J;
4063 X2Y3Z2=&stIn[(Z[2]+Y[3]+X[2])*stride_in]+J;
4064 X2Y3Z3=&stIn[(Z[3]+Y[3]+X[2])*stride_in]+J;
4068 X3Y1Z2=&stIn[(Z[2]+Y[1]+X[3])*stride_in]+J;
4070 X3Y2Z1=&stIn[(Z[1]+Y[2]+X[3])*stride_in]+J;
4071 X3Y2Z2=&stIn[(Z[2]+Y[2]+X[3])*stride_in]+J;
4072 X3Y2Z3=&stIn[(Z[3]+Y[2]+X[3])*stride_in]+J;
4074 X3Y3Z2=&stIn[(Z[2]+Y[3]+X[3])*stride_in]+J;
4078 std::size_t offset=(Z[2]+Y[2]+X[2])*stride_out+J1;
4081 for (
int M=-(J1); M<=(0); M++)
4083 std::complex<T> & current=stOut[offset+M];
4084 if ( clear_field ) current=T ( 0 );
4085 std::complex<T> ctmp=T ( 0 );
4090 std::complex<T> tmp;
4093 std::complex<T> Dxx= (X1Y2Z2[-m2]-(T)2*X2Y2Z2[-m2]+X3Y2Z2[-m2]);
4094 std::complex<T> Dyy=(X2Y1Z2[-m2]-(T)2*X2Y2Z2[-m2]+X2Y3Z2[-m2]);
4095 std::complex<T> Dxy=-(T)0.25*(X1Y1Z2[-m2]-X3Y1Z2[-m2]-X1Y3Z2[-m2]+X3Y3Z2[-m2]);
4097 if (m2%2==0) tmp=(T)0.5*CGTable0[M]*(std::conj(Dxx-Dyy )-imag*std::conj((T)2.0*Dxy));
4098 else tmp=(T)0.5*CGTable0[M]*(-std::conj(Dxx-Dyy )-imag*(-std::conj((T)2.0*Dxy)));
4102 std::complex<T> Dxx= (X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4103 std::complex<T> Dyy=(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4104 std::complex<T> Dxy=-(T)0.25*(X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2]);
4105 tmp=(T)0.5*CGTable0[M]*((Dxx-Dyy )-imag*((T)2.0*Dxy));
4115 std::complex<T> tmp;
4119 std::complex<T> Dxz=(T)0.25*(X1Y2Z1[-m2]-X1Y2Z3[-m2]-X3Y2Z1[-m2]+X3Y2Z3[-m2]);
4120 std::complex<T> Dyz=-(T)0.25*(X2Y1Z1[-m2]-X2Y3Z1[-m2]-X2Y1Z3[-m2]+X2Y3Z3[-m2]);
4121 if (m2%2==0) tmp=CGTable1[M]*(std::conj(Dxz )-imag*std::conj(Dyz));
4122 else tmp=CGTable1[M]*(-std::conj(Dxz )-imag*(-std::conj(Dyz)));
4125 std::complex<T> Dxz=(T)0.25*(X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2]);
4126 std::complex<T> Dyz=-(T)0.25*(X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2]);
4127 tmp=CGTable1[M]*((Dxz )-imag*(Dyz));
4137 std::complex<T> Dxx= (X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4138 std::complex<T> Dyy=(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4139 std::complex<T> Dzz=(X2Y2Z1[m2]-(T)2*X2Y2Z2[m2]+X2Y2Z3[m2]);
4140 const T SQRT6=(T)(-1.0/std::sqrt(6.0));
4141 ctmp+=CGTable2[M]*((Dxx+Dyy-(T)2.0*Dzz)*(SQRT6));
4147 std::complex<T> Dxz=(T)0.25*(X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2]);
4148 std::complex<T> Dyz=-(T)0.25*(X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2]);
4149 ctmp-=CGTable3[M]*((Dxz )+imag*(Dyz));
4156 std::complex<T> Dxx= (X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4157 std::complex<T> Dyy=(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4158 std::complex<T> Dxy=-(T)0.25*(X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2]);
4159 ctmp+=(T)0.5*CGTable4[M]*((Dxx-Dyy )+imag*((T)2.0*Dxy));
4162 current+=ctmp*alpha;
4170 return STA_RESULT_SUCCESS;
4178 template<
typename T>
4180 const std::complex<T> * stIn,
4181 std::complex<T> * stOut ,
4182 const std::size_t shape[],
4185 bool conjugate=
false,
4187 const T v_size[]=NULL,
4189 int stride_out = -1,
4190 bool clear_field =
false)
4193 if ( abs ( Jupdown ) >1 )
return STA_RESULT_INVALID_TENSOR_RANK;
4194 if ( abs ( J+Jupdown ) <0 )
return STA_RESULT_INVALID_TENSOR_RANK;
4200 voxel_size[0]=voxel_size[1]=voxel_size[2]=T(1);
4203 voxel_size[0]/=v_size[0];
4204 voxel_size[1]/=v_size[1];
4205 voxel_size[2]/=v_size[2];
4210 if (conjugate) voxel_size[1]*=T( -1 );
4212 int J1=(T)(J+Jupdown);
4214 std::size_t vectorLengthJ=J+1;
4215 std::size_t vectorLengthJ1=(J1)+1;
4218 std::size_t jumpz=shape[1]*shape[2];
4219 std::size_t jumpy=shape[2];
4221 if (stride_in == -1)
4222 stride_in = vectorLengthJ;
4223 if (stride_out == -1)
4224 stride_out = vectorLengthJ1;
4229 T * CGTable=
new T[3*vectorLengthJ1];
4231 T shnorm=hanalysis::clebschGordan(1,0,J,0,J1,0);
4233 if (Jupdown==0) shnorm=1;
4235 for (
int M=-(J1); M<=(0); M++)
4237 CGTable[M+(J1)] =T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,-1,J,M+1,J1,M)/shnorm;;
4238 CGTable[M+(J1)+vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan(1,0,J,M,J1,M)/shnorm;
4239 CGTable[M+(J1)+2*vectorLengthJ1]=T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,1,J,M-1,J1,M)/shnorm;
4246 T * CGTable0=&CGTable[0];
4248 T * CGTable1=&CGTable[vectorLengthJ1];
4250 T * CGTable2=&CGTable[2*vectorLengthJ1];
4254 const T * stIn_r=(
const T *)stIn;
4255 T * stOut_r=(T*)stOut;
4264 #pragma omp parallel for num_threads(get_numCPUs())
4265 for (std::size_t z=0; z<shape[0]; z++)
4289 for (std::size_t y=0; y<shape[1]; y++)
4303 for (std::size_t x=0; x<shape[2]; x++)
4313 derivX1=&stIn_r[(Z[1]+Y[1]+X[0])*stride_in]+J_times_2;
4314 derivX0=&stIn_r[(Z[1]+Y[1]+X[2])*stride_in]+J_times_2;
4316 derivY1=&stIn_r[(Z[1]+Y[0]+X[1])*stride_in]+J_times_2;
4317 derivY0=&stIn_r[(Z[1]+Y[2]+X[1])*stride_in]+J_times_2;
4319 derivZ1=&stIn_r[(Z[0]+Y[1]+X[1])*stride_in]+J_times_2;
4320 derivZ0=&stIn_r[(Z[2]+Y[1]+X[1])*stride_in]+J_times_2;
4322 T * current_r=stOut_r+(Z[1]+Y[1]+X[1])*stride_out;
4324 for (
int M=-(J1); M<=(0); M++)
4335 tmp_r-=CGTable0[M]*(voxel_size[2]*(derivX0[m2]-derivX1[m2])+voxel_size[1]*(derivY0[m2+1]-derivY1[m2+1]));
4336 tmp_i-=CGTable0[M]*(voxel_size[2]*(derivX1[m2+1]-derivX0[m2+1])+voxel_size[1]*(derivY0[m2]-derivY1[m2]));
4339 tmp_r+=CGTable0[M]*(voxel_size[2]*(derivX0[m2]-derivX1[m2])+voxel_size[1]*(derivY1[m2+1]-derivY0[m2+1]));
4340 tmp_i+=CGTable0[M]*(voxel_size[2]*(derivX0[m2+1]-derivX1[m2+1])+voxel_size[1]*(derivY0[m2]-derivY1[m2]));
4345 tmp_r+=CGTable1[M]*(derivZ0[M*2]-derivZ1[M*2]);
4346 tmp_i+=CGTable1[M]*(derivZ0[M*2+1]-derivZ1[M*2+1]);
4351 tmp_r+=CGTable2[M]*(voxel_size[2]*(derivX1[m2]-derivX0[m2])+voxel_size[1]*(derivY1[m2+1]-derivY0[m2+1]));
4352 tmp_i+=CGTable2[M]*(voxel_size[2]*(derivX1[m2+1]-derivX0[m2+1])+voxel_size[1]*(derivY0[m2]-derivY1[m2]));
4363 (*current_r)=tmp_r*alpha;
4365 (*current_r)=tmp_i*alpha;
4369 (*current_r)+=tmp_r*alpha;
4371 (*current_r)+=tmp_i*alpha;
4381 return (STA_RESULT_SUCCESS);
4387 template<
typename T>
4389 const std::complex<T> * stIn,
4390 std::complex<T> * stOut ,
4391 const std::size_t shape[],
4394 bool conjugate=
false,
4396 const T v_size[]=NULL,
4398 int stride_out = -1,
4399 bool clear_field =
false )
4401 if ( abs ( Jupdown ) >2 )
return STA_RESULT_INVALID_TENSOR_RANK;
4402 if ( abs ( Jupdown ) ==1 )
return STA_RESULT_INVALID_TENSOR_RANK;
4403 if ( abs ( J+Jupdown ) <0 )
return STA_RESULT_INVALID_TENSOR_RANK;
4412 voxel_weights[0]=voxel_weights[1]=voxel_weights[2]
4413 =voxel_weights[3]=voxel_weights[4]=voxel_weights[5]=T(1);
4417 voxel_weights[0]/=(v_size[0]*v_size[0]);
4418 voxel_weights[1]/=(v_size[1]*v_size[1]);
4419 voxel_weights[2]/=(v_size[2]*v_size[2]);
4420 voxel_weights[3]/=(v_size[0]*v_size[1]);
4421 voxel_weights[4]/=(v_size[0]*v_size[2]);
4422 voxel_weights[5]/=(v_size[1]*v_size[2]);
4427 if (conjugate) conj*=T( -1 );
4429 alpha*=T(sqrt(3.0/2.0));
4434 int vectorLengthJ=J+1;
4435 int vectorLengthJ1=(J1)+1;
4437 if (stride_in == -1)
4438 stride_in = vectorLengthJ;
4439 if (stride_out == -1)
4440 stride_out = vectorLengthJ1;
4443 std::size_t jumpz=shape[1]*shape[2];
4444 std::size_t jumpy=shape[2];
4448 T * CGTable=
new T[5*vectorLengthJ1];
4449 T shnorm=hanalysis::clebschGordan(2,0,J,0,J1,0);
4452 for (
int M=-(J1); M<=(0); M++)
4454 CGTable[M+(J1)] =hanalysis::clebschGordan(2,-2,J,M+2,J1,M)/shnorm;
4455 CGTable[M+(J1)+vectorLengthJ1] =hanalysis::clebschGordan(2,-1,J,M+1,J1,M)/shnorm;
4456 CGTable[M+(J1)+2*vectorLengthJ1]=hanalysis::clebschGordan(2,0,J,M,J1,M)/shnorm;
4457 CGTable[M+(J1)+3*vectorLengthJ1]=hanalysis::clebschGordan(2,1,J,M-1,J1,M)/shnorm;
4458 CGTable[M+(J1)+4*vectorLengthJ1]=hanalysis::clebschGordan(2,2,J,M-2,J1,M)/shnorm;
4460 T * CGTable0=&CGTable[0];
4462 T * CGTable1=&CGTable[vectorLengthJ1];
4464 T * CGTable2=&CGTable[2*vectorLengthJ1];
4466 T * CGTable3=&CGTable[3*vectorLengthJ1];
4468 T * CGTable4=&CGTable[4*vectorLengthJ1];
4473 const T * stIn_r=(
const T *)stIn;
4474 T * stOut_r=(T*)stOut;
4484 #pragma omp parallel for num_threads(get_numCPUs())
4485 for (std::size_t z=0; z<shape[0]; z++)
4532 for (std::size_t y=0; y<shape[1]; y++)
4553 for (std::size_t x=0; x<shape[2]; x++)
4569 X1Y1Z2=stIn_r+(Z[2]+Y[1]+X[1])*stride_in+J_times_2;
4571 X1Y2Z1=stIn_r+(Z[1]+Y[2]+X[1])*stride_in+J_times_2;
4572 X1Y2Z2=stIn_r+(Z[2]+Y[2]+X[1])*stride_in+J_times_2;
4573 X1Y2Z3=stIn_r+(Z[3]+Y[2]+X[1])*stride_in+J_times_2;
4575 X1Y3Z2=stIn_r+(Z[2]+Y[3]+X[1])*stride_in+J_times_2;
4579 X2Y1Z1=stIn_r+(Z[1]+Y[1]+X[2])*stride_in+J_times_2;
4580 X2Y1Z2=stIn_r+(Z[2]+Y[1]+X[2])*stride_in+J_times_2;
4581 X2Y1Z3=stIn_r+(Z[3]+Y[1]+X[2])*stride_in+J_times_2;
4582 X2Y2Z1=stIn_r+(Z[1]+Y[2]+X[2])*stride_in+J_times_2;
4583 X2Y2Z2=stIn_r+(Z[2]+Y[2]+X[2])*stride_in+J_times_2;
4584 X2Y2Z3=stIn_r+(Z[3]+Y[2]+X[2])*stride_in+J_times_2;
4585 X2Y3Z1=stIn_r+(Z[1]+Y[3]+X[2])*stride_in+J_times_2;
4586 X2Y3Z2=stIn_r+(Z[2]+Y[3]+X[2])*stride_in+J_times_2;
4587 X2Y3Z3=stIn_r+(Z[3]+Y[3]+X[2])*stride_in+J_times_2;
4591 X3Y1Z2=stIn_r+(Z[2]+Y[1]+X[3])*stride_in+J_times_2;
4593 X3Y2Z1=stIn_r+(Z[1]+Y[2]+X[3])*stride_in+J_times_2;
4594 X3Y2Z2=stIn_r+(Z[2]+Y[2]+X[3])*stride_in+J_times_2;
4595 X3Y2Z3=stIn_r+(Z[3]+Y[2]+X[3])*stride_in+J_times_2;
4597 X3Y3Z2=stIn_r+(Z[2]+Y[3]+X[3])*stride_in+J_times_2;
4602 T * current_r=stOut_r+(Z[2]+Y[2]+X[2])*stride_out;
4603 for (
int M=-(J1); M<=(0); M++)
4619 T Dxx_r= voxel_weights[2]*(X1Y2Z2[-m2]-(T)2*X2Y2Z2[-m2]+X3Y2Z2[-m2]);
4620 T Dxx_i= voxel_weights[2]*(X1Y2Z2[-m2+1]-(T)2*X2Y2Z2[-m2+1]+X3Y2Z2[-m2+1]);
4622 T Dyy_r=voxel_weights[1]*(X2Y1Z2[-m2]-(T)2*X2Y2Z2[-m2]+X2Y3Z2[-m2]);
4623 T Dyy_i=voxel_weights[1]*(X2Y1Z2[-m2+1]-(T)2*X2Y2Z2[-m2+1]+X2Y3Z2[-m2+1]);
4625 T Dxy_r=voxel_weights[5]*(X1Y1Z2[-m2]-X3Y1Z2[-m2]-X1Y3Z2[-m2]+X3Y3Z2[-m2]);
4626 T Dxy_i=voxel_weights[5]*(X1Y1Z2[-m2+1]-X3Y1Z2[-m2+1]-X1Y3Z2[-m2+1]+X3Y3Z2[-m2+1]);
4631 ctmp_r+=(T)0.5*CGTable0[M]*((Dxx_r-Dyy_r )+conj*((T)0.5*Dxy_i));
4632 ctmp_i+=(T)0.5*CGTable0[M]*((Dyy_i-Dxx_i )+conj*((T)0.5*Dxy_r));
4637 ctmp_r+=(T)0.5*CGTable0[M]*((Dyy_r-Dxx_r )-conj*((T)0.5*Dxy_i));
4638 ctmp_i+=(T)0.5*CGTable0[M]*((Dxx_i-Dyy_i )-conj*((T)0.5*Dxy_r));
4644 T Dxx_r= voxel_weights[2]*(X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4645 T Dxx_i= voxel_weights[2]*(X1Y2Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X3Y2Z2[m2+1]);
4647 T Dyy_r=voxel_weights[1]*(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4648 T Dyy_i=voxel_weights[1]*(X2Y1Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X2Y3Z2[m2+1]);
4650 T Dxy_r=voxel_weights[5]*(X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2]);
4651 T Dxy_i=voxel_weights[5]*(X1Y1Z2[m2+1]-X3Y1Z2[m2+1]-X1Y3Z2[m2+1]+X3Y3Z2[m2+1]);
4653 ctmp_r+=(T)0.5*CGTable0[M]*((Dxx_r-Dyy_r )-conj*((T)0.5*Dxy_i));
4654 ctmp_i+=(T)0.5*CGTable0[M]*((Dxx_i-Dyy_i )+conj*((T)0.5*Dxy_r));
4672 T Dxz_r=(T)0.25*voxel_weights[4]*(X1Y2Z1[-m2]-X1Y2Z3[-m2]-X3Y2Z1[-m2]+X3Y2Z3[-m2]);
4673 T Dxz_i=(T)0.25*voxel_weights[4]*(X1Y2Z1[-m2+1]-X1Y2Z3[-m2+1]-X3Y2Z1[-m2+1]+X3Y2Z3[-m2+1]);
4676 T Dyz_r=-(T)0.25*voxel_weights[3]*(X2Y1Z1[-m2]-X2Y3Z1[-m2]-X2Y1Z3[-m2]+X2Y3Z3[-m2]);
4677 T Dyz_i=-(T)0.25*voxel_weights[3]*(X2Y1Z1[-m2+1]-X2Y3Z1[-m2+1]-X2Y1Z3[-m2+1]+X2Y3Z3[-m2+1]);
4682 ctmp_r+=CGTable1[M]*(conj*Dyz_i-Dxz_r);
4683 ctmp_i+=CGTable1[M]*(Dxz_i+conj*Dyz_r);
4690 ctmp_r+=CGTable1[M]*(Dxz_r+conj*Dyz_i);
4691 ctmp_i+=-CGTable1[M]*(Dxz_i+conj*Dyz_r);
4696 T Dxz_r=(T)0.25*voxel_weights[4]*(X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2]);
4697 T Dxz_i=(T)0.25*voxel_weights[4]*(X1Y2Z1[m2+1]-X1Y2Z3[m2+1]-X3Y2Z1[m2+1]+X3Y2Z3[m2+1]);
4700 T Dyz_r=-(T)0.25*voxel_weights[3]*(X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2]);
4701 T Dyz_i=-(T)0.25*voxel_weights[3]*(X2Y1Z1[m2+1]-X2Y3Z1[m2+1]-X2Y1Z3[m2+1]+X2Y3Z3[m2+1]);
4703 ctmp_r+=CGTable1[M]*(Dxz_r +conj*Dyz_i);
4704 ctmp_i+=CGTable1[M]*(Dxz_i -conj*Dyz_r);
4716 T Dxx_r= voxel_weights[2]*(X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4717 T Dxx_i= voxel_weights[2]*(X1Y2Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X3Y2Z2[m2+1]);
4719 T Dyy_r= voxel_weights[1]*(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4720 T Dyy_i= voxel_weights[1]*(X2Y1Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X2Y3Z2[m2+1]);
4722 T Dzz_r= voxel_weights[0]*(X2Y2Z1[m2]-(T)2*X2Y2Z2[m2]+X2Y2Z3[m2]);
4723 T Dzz_i= voxel_weights[0]*(X2Y2Z1[m2+1]-(T)2*X2Y2Z2[m2+1]+X2Y2Z3[m2+1]);
4725 const T SQRT6=(T)(-1.0/std::sqrt(6.0));
4727 ctmp_r+=CGTable2[M]*((Dxx_r+Dyy_r-(T)2.0*Dzz_r)*(SQRT6));
4728 ctmp_i+=CGTable2[M]*((Dxx_i+Dyy_i-(T)2.0*Dzz_i)*(SQRT6));
4736 T Dxz_r=voxel_weights[4]*(T)0.25*(X1Y2Z1[m2]-X1Y2Z3[m2]-X3Y2Z1[m2]+X3Y2Z3[m2]);
4737 T Dxz_i=voxel_weights[4]*(T)0.25*(X1Y2Z1[m2+1]-X1Y2Z3[m2+1]-X3Y2Z1[m2+1]+X3Y2Z3[m2+1]);
4739 T Dyz_r=-(T)0.25*voxel_weights[3]*(X2Y1Z1[m2]-X2Y3Z1[m2]-X2Y1Z3[m2]+X2Y3Z3[m2]);
4740 T Dyz_i=-(T)0.25*voxel_weights[3]*(X2Y1Z1[m2+1]-X2Y3Z1[m2+1]-X2Y1Z3[m2+1]+X2Y3Z3[m2+1]);
4743 ctmp_r-=CGTable3[M]*(Dxz_r-conj*Dyz_i);
4744 ctmp_i-=CGTable3[M]*(Dxz_i+conj*Dyz_r);
4753 T Dxx_r= voxel_weights[2]*(X1Y2Z2[m2]-(T)2*X2Y2Z2[m2]+X3Y2Z2[m2]);
4754 T Dxx_i= voxel_weights[2]*(X1Y2Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X3Y2Z2[m2+1]);
4756 T Dyy_r= voxel_weights[1]*(X2Y1Z2[m2]-(T)2*X2Y2Z2[m2]+X2Y3Z2[m2]);
4757 T Dyy_i= voxel_weights[1]*(X2Y1Z2[m2+1]-(T)2*X2Y2Z2[m2+1]+X2Y3Z2[m2+1]);
4760 T Dxy_r= voxel_weights[5]*(X1Y1Z2[m2]-X3Y1Z2[m2]-X1Y3Z2[m2]+X3Y3Z2[m2]);
4761 T Dxy_i= voxel_weights[5]*(X1Y1Z2[m2+1]-X3Y1Z2[m2+1]-X1Y3Z2[m2+1]+X3Y3Z2[m2+1]);
4763 ctmp_r+=(T)0.5*CGTable4[M]*((Dxx_r-Dyy_r )+conj*((T)0.5*Dxy_i));
4764 ctmp_i+=(T)0.5*CGTable4[M]*((Dxx_i-Dyy_i )-conj*((T)0.5*Dxy_r));
4781 (*current_r)=ctmp_r*alpha;
4783 (*current_r)=ctmp_i*alpha;
4787 (*current_r)+=ctmp_r*alpha;
4789 (*current_r)+=ctmp_i*alpha;
4800 return STA_RESULT_SUCCESS;
4820 template<
typename T,
typename S>
4823 std::complex<T> * stOut ,
4824 const std::size_t shape[],
4827 bool conjugate=
false,
4828 std::complex<T> alpha=(T)1.0,
4829 const T v_size[]=NULL,
4831 int stride_out = -1,
4832 bool clear_field =
false)
4835 if ( abs ( Jupdown ) >1 )
return STA_RESULT_INVALID_TENSOR_RANK;
4836 if ( abs ( J+Jupdown ) <0 )
return STA_RESULT_INVALID_TENSOR_RANK;
4838 std::complex<T> imag=-std::complex<T>(0,1);
4839 if (conjugate) imag*=T( -1 );
4842 voxel_size[0]=voxel_size[1]=voxel_size[2]=T(1);
4845 voxel_size[0]/=v_size[0];
4846 voxel_size[1]/=v_size[1];
4847 voxel_size[2]/=v_size[2];
4850 imag*=voxel_size[1];
4852 int J1=(T)(J+Jupdown);
4854 std::size_t vectorLengthJ=J+1;
4855 std::size_t vectorLengthJ1=(J1)+1;
4858 if (stride_in == -1)
4859 stride_in = vectorLengthJ;
4860 if (stride_out == -1)
4861 stride_out = vectorLengthJ1;
4866 std::size_t jumpz=shape[1]*shape[2];
4867 std::size_t jumpy=shape[2];
4870 T * CGTable=
new T[3*vectorLengthJ1];
4871 T shnorm=hanalysis::clebschGordan(1,0,J,0,J1,0);
4872 if (Jupdown==0) shnorm=1;
4874 for (
int M=-(J1); M<=(0); M++)
4876 CGTable[M+(J1)] =T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,-1,J,M+1,J1,M)/shnorm;;
4877 CGTable[M+(J1)+vectorLengthJ1] =voxel_size[0]*hanalysis::clebschGordan(1,0,J,M,J1,M)/shnorm;
4878 CGTable[M+(J1)+2*vectorLengthJ1]=T(1.0/std::sqrt(2.0))*hanalysis::clebschGordan(1,1,J,M-1,J1,M)/shnorm;
4880 T * CGTable0=&CGTable[0];
4882 T * CGTable1=&CGTable[vectorLengthJ1];
4884 T * CGTable2=&CGTable[2*vectorLengthJ1];
4887 #pragma omp parallel for num_threads(get_numCPUs())
4888 for (std::size_t z=0; z<shape[0]; z++)
4912 const S * derivZ0p=stIn+Z[0]*stride_in+J;
4913 const S * derivZ1p=stIn+Z[1]*stride_in+J;
4914 const S * derivZ2p=stIn+Z[2]*stride_in+J;
4915 const S * derivZ3p=stIn+Z[3]*stride_in+J;
4916 const S * derivZ4p=stIn+Z[4]*stride_in+J;
4936 for (std::size_t y=0; y<shape[1]; y++)
4961 const S * derivZ2Y0p=derivZ2p+Y[0]*stride_in;
4962 const S * derivZ2Y1p=derivZ2p+Y[1]*stride_in;
4963 const S * derivZ2Y3p=derivZ2p+Y[3]*stride_in;
4964 const S * derivZ2Y4p=derivZ2p+Y[4]*stride_in;
4966 std::size_t tmp=Y[2]*stride_in;
4967 const S * derivZ0Y2p=derivZ0p+tmp;
4968 const S * derivZ1Y2p=derivZ1p+tmp;
4969 const S * derivZ2Y2p=derivZ2p+tmp;
4970 const S * derivZ3Y2p=derivZ3p+tmp;
4971 const S * derivZ4Y2p=derivZ4p+tmp;
4974 for (std::size_t x=0; x<shape[2]; x++)
4991 derivX0=derivZ2Y2p+(X[0])*stride_in;
4992 derivX1=derivZ2Y2p+(X[1])*stride_in;
4993 derivX2=derivZ2Y2p+X[3]*stride_in;
4994 derivX3=derivZ2Y2p+(X[4])*stride_in;
4997 std::size_t tmp=X[2]*stride_in;
4998 derivY0=derivZ2Y0p+tmp;
4999 derivY1=derivZ2Y1p+tmp;
5000 derivY2=derivZ2Y3p+tmp;
5001 derivY3=derivZ2Y4p+tmp;
5003 derivZ0=derivZ0Y2p+tmp;
5004 derivZ1=derivZ1Y2p+tmp;
5005 derivZ2=derivZ3Y2p+tmp;
5006 derivZ3=derivZ4Y2p+tmp;
5008 std::size_t offset=(Z[2]+Y[2]+X[2])*stride_out+J1;
5010 for (
int M=-(J1); M<=(0); M++)
5012 std::complex<T> & current=stOut[offset+M];
5013 if ( clear_field ) current=T ( 0 );
5014 std::complex<T> tmp=T ( 0 );
5021 tmp-=CGTable0[M]*(voxel_size[2]*std::conj(derivX0[-m2]+(T)8.0*(derivX2[-m2]-derivX1[-m2])-derivX3[-m2])+
5022 imag*std::conj(derivY0[-m2]+(T)8.0*(derivY2[-m2]-derivY1[-m2])-derivY3[-m2]));
5024 tmp+=CGTable0[M]*(voxel_size[2]*(derivX0[m2]+(T)8.0*(derivX2[m2]-derivX1[m2])-derivX3[m2])+
5025 imag*(derivY0[m2]+(T)8.0*(derivY2[m2]-derivY1[m2])-derivY3[m2]));
5029 tmp+=CGTable1[M]*(derivZ0[M]+(T)8.0*(derivZ2[M]-derivZ1[M])-derivZ3[M]);
5034 tmp+=CGTable2[M]*(-voxel_size[2]*(derivX0[m2]+(T)8.0*(derivX2[m2]-derivX1[m2])-derivX3[m2])+
5035 imag*(derivY0[m2]+(T)8.0*(derivY2[m2]-derivY1[m2])-derivY3[m2]));
5045 return (STA_RESULT_SUCCESS);
5050 template<
typename T,
typename S>
5052 const std::complex<T> * stIn1,
5053 const std::complex<T> * stIn2,
5054 std::complex<T> * stOut,
5055 const std::size_t shape[],
5057 int stride_in1 = -1,
5058 int stride_in2 = -1,
5059 int stride_out = -1,
5060 bool clear_field =
false )
5063 if (stride_in1 == -1)
5065 if (stride_in2 == -1)
5067 if (stride_out == -1)
5072 std::size_t jumpz=shape[1]*shape[2];
5075 #pragma omp parallel for num_threads(hanalysis::get_numCPUs())
5076 for (std::size_t z=0; z<shape[0]; z++)
5080 const std::complex<T> * current_J1=&stIn1[Z*stride_in1];
5081 const std::complex<T> * current_J2=&stIn2[Z*stride_in2];
5082 std::complex<T> * current_J=&stOut[Z*stride_out];
5084 for (std::size_t i=0; i<jumpz; i++)
5088 *current_J+=(*current_J1)* (*current_J2)*alpha;
5089 current_J+=stride_out;
5090 current_J1+=stride_in1;
5091 current_J2+=stride_in2;
5095 return STA_RESULT_SUCCESS;
5108 static std::string errortostring(
STA_RESULT error)
5112 case STA_RESULT_SUCCESS:
5113 return "STA_RESULT_SUCCESS";
5114 case STA_RESULT_FAILED:
5115 return "STA_RESULT_FAILED";
5116 case STA_RESULT_SHAPE_MISMATCH:
5117 return "STA_RESULT_SHAPE_MISMATCH";
5118 case STA_RESULT_INVALID_PRODUCT:
5119 return "STA_RESULT_INVALID_PRODUCT";
5120 case STA_RESULT_STORAGE_MISMATCH:
5121 return "STA_RESULT_STORAGE_MISMATCH";
5122 case STA_RESULT_INVALID_TENSOR_RANK:
5123 return "STA_RESULT_INVALID_TENSOR_RANK";
5124 case STA_RESULT_OFIELD_TYPE_MISMATCH:
5125 return "STA_RESULT_OFIELD_TYPE_MISMATCH";
5126 case STA_RESULT_SAME_ADDRESS:
5127 return "STA_RESULT_SAME_ADDRESS";
5128 case STA_RESULT_NOT_IMPLEMENTED:
5129 return "STA_RESULT_NOT_IMPLEMENTED";
5131 return "unkown error code";
5164 STA_FIELD_STORAGE_UNSUPPORTED=0,
5178 STA_OFIELD_UNSUPPORTED=0,
5194 if (s.compare(
"STA_FIELD_STORAGE_C")==0)
5197 if (s.compare(
"STA_FIELD_STORAGE_R")==0)
5200 if (s.compare(
"STA_FIELD_STORAGE_RF")==0)
5202 return STA_FIELD_STORAGE_UNSUPPORTED;
5208 if (s.compare(
"STA_OFIELD_EVEN")==0)
5211 if (s.compare(
"STA_OFIELD_ODD")==0)
5214 if (s.compare(
"STA_OFIELD_FULL")==0)
5217 if (s.compare(
"STA_OFIELD_SINGLE")==0)
5219 return STA_OFIELD_UNSUPPORTED;
5226 return "STA_FIELD_STORAGE_C";
5229 return "STA_FIELD_STORAGE_R";
5232 return "STA_FIELD_STORAGE_RF";
5234 return "STA_FIELD_STORAGE_UNSUPPORTED";
5243 return "STA_OFIELD_EVEN";
5246 return "STA_OFIELD_ODD";
5249 return "STA_OFIELD_FULL";
5252 return "STA_OFIELD_SINGLE";
5254 return "STA_OFIELD_UNSUPPORTED";
5259 static inline int numComponents2order(
5270 return (ncomponents-1)/2;
5274 return std::sqrt(1.0*ncomponents)-1;
5278 return (-3/2+std::sqrt((3.0/2.0)*(3.0/2.0)+2*ncomponents-2));
5282 return (-3/2+std::sqrt((3.0/2.0)*(3.0/2.0)+2*ncomponents-2));
5293 return ncomponents-1;
5297 return (-3/2+std::sqrt((3.0/2.0)*(3.0/2.0)+2*ncomponents-2));
5301 return std::sqrt(1+4.0*ncomponents)-2;
5305 return std::sqrt(4.0*ncomponents)-2;
5316 static inline int order2numComponents(
5331 return ((L+1)*(L+1));
5335 return ((L+1)*(L+2))/2;
5339 return ((L+1)*(L+2))/2;
5354 return ((L+1)*(L+2))/2;
5358 return ((L+1)*(L+3))/4;
5362 return ((L+2)*(L+2))/4;
5372 static inline int getComponentOffset(
5414 return ((L+1)*(L+3))/4-L-1;
5418 return ((L+2)*(L+2))/4-L-1;
5432 template<
typename T>
5434 const std::complex<T> * stIn1,
5435 const std::complex<T> * stIn2,
5436 const std::complex<T> * stIn3,
5437 std::complex<T> * stOut,
5438 const std::size_t shape[],
5444 std::complex<T> alpha,
5445 bool normalize =
false,
5447 int stride_in1 = -1,
5448 int stride_in2 = -1,
5449 int stride_in3 = -1,
5450 int stride_out = -1,
5451 bool clear_field =
false )
5454 printf(
"tripple product experimental ! not tested yet, and slow\n");
5455 if ((stIn1==stOut)||(stIn2==stOut))
5456 return STA_RESULT_SAME_ADDRESS;
5459 bool alpha_real=(alpha.imag()==0);
5462 switch (field_storage)
5468 return sta_tripleproduct_R (
5487 return STA_RESULT_STORAGE_MISMATCH;
5527 template<
typename T>
5529 const std::complex<T> * stIn1,
5530 const std::complex<T> * stIn2,
5531 std::complex<T> * stOut,
5532 const std::size_t shape[],
5536 std::complex<T> alpha = T( 1 ),
5537 bool normalize =
false,
5539 int stride_in1 = -1,
5540 int stride_in2 = -1,
5541 int stride_out = -1,
5542 bool clear_field =
false )
5544 if ((stIn1==stOut)||(stIn2==stOut))
5545 return STA_RESULT_SAME_ADDRESS;
5548 bool alpha_real=(alpha.imag()==0);
5555 if ((J1==0)&&(J2==0)&&(J==0))
5558 return sta_product0(stIn1,
5569 return sta_product0(stIn1,
5579 return STA_RESULT_STORAGE_MISMATCH;
5584 switch (field_storage)
5589 result=sta_product_C (stIn1,
5603 result=sta_product_C (stIn1,
5622 result=sta_product_R (stIn1,
5636 return STA_RESULT_STORAGE_MISMATCH;
5657 result=sta_product_Rft (stIn1,
5671 return STA_RESULT_STORAGE_MISMATCH;
5676 printf(
"unsupported\n");
5699 template<
typename T,
typename S>
5701 const std::complex<T> * stIn,
5702 std::complex<T> * stOut,
5703 const std::size_t shape[],
5706 bool conjugate=
false,
5708 int stride_out = -1,
5709 bool clear_field =
false )
5711 bool doalpha= ( alpha!=S ( 1 ) );
5714 stride_in=ncomponents;
5716 stride_out=ncomponents;
5719 std::size_t jumpz=shape[1]*shape[2];
5723 #pragma omp parallel for num_threads(hanalysis::get_numCPUs())
5724 for (std::size_t a=0; a<shape[0]; a++ )
5726 std::complex<T> *resultp=stOut+a*jumpz*stride_out;
5727 const std::complex<T> *inp=stIn+a*jumpz*stride_in;
5729 for ( std::size_t i=0; i<jumpz; i++ )
5731 for (
int b=0; b<ncomponents; b++)
5733 std::complex<T> tmp=inp[b];
5735 tmp=std::conj ( tmp );
5745 resultp+=stride_out;
5749 return STA_RESULT_SUCCESS;
5764 template<
typename T>
5766 const std::complex<T> * stIn,
5767 std::complex<T> * stOut,
5768 const std::size_t shape[],
5772 int stride_out = -1,
5773 bool clear_field =
false )
5776 return STA_RESULT_SAME_ADDRESS;
5788 std::size_t jumpz=shape[1]*shape[2];
5791 #pragma omp parallel for num_threads(hanalysis::get_numCPUs())
5792 for (std::size_t a=0; a<shape[0]; a++ )
5794 std::complex<T> *resultp=stOut+a*jumpz*stride_out;
5795 const std::complex<T> *inp=stIn+a*jumpz*stride_in+J;
5797 for ( std::size_t i=0; i<jumpz; i++ )
5799 const std::complex<T> * current=inp;
5805 for (
int b=-J; b<=J; b++)
5807 tmp+=std::norm(current[b]);
5811 for (
int b=-J; b<0; b++)
5813 tmp+=T( 2 )*std::norm(current[b]);
5815 tmp+=std::norm(current[0]);
5820 *resultp+=std::sqrt(tmp);
5822 resultp+=stride_out;
5827 return STA_RESULT_SUCCESS;
5863 template<
typename T>
5865 const std::complex<T> * stIn,
5866 std::complex<T> * stOut ,
5867 const std::size_t shape[],
5870 bool conjugate=
false,
5871 std::complex<T> alpha= ( T ) 1.0,
5873 const T v_size[]=NULL,
5875 int stride_out = -1,
5876 bool clear_field =
false,
5880 return STA_RESULT_SAME_ADDRESS;
5883 bool alpha_real=(alpha.imag()==0);
5885 switch (field_storage)
5889 result=sta_derivatives_C (stIn,
5909 result=sta_derivatives_R (stIn,
5929 result=sta_derivatives_R4th (stIn,
5942 return STA_RESULT_STORAGE_MISMATCH;
5948 printf(
"unsupported derivative\n");
5990 template<
typename T>
5992 const std::complex<T> * stIn,
5993 std::complex<T> * stOut ,
5994 const std::size_t shape[],
5997 bool conjugate=
false,
5998 std::complex<T> alpha= ( T ) 1.0,
6000 const T v_size[]=NULL,
6002 int stride_out = -1,
6003 bool clear_field =
false )
6006 return STA_RESULT_SAME_ADDRESS;
6009 bool alpha_real=(alpha.imag()==0);
6011 switch (field_storage)
6015 result=sta_derivatives2_C (stIn,
6032 result=sta_derivatives2_R (stIn,
6044 return STA_RESULT_STORAGE_MISMATCH;
6049 printf(
"unsupported\n");
6077 template<
typename T>
6079 const std::complex<T> * stIn,
6080 std::complex<T> * stOut ,
6081 const std::size_t shape[],
6084 std::complex<T> alpha=1,
6086 const T v_size[]=NULL,
6088 int stride_out = -1,
6089 bool clear_field =
false )
6092 return STA_RESULT_SAME_ADDRESS;
6095 bool alpha_real=(alpha.imag()==0);
6097 return STA_RESULT_STORAGE_MISMATCH;
6100 if ( components<=0 )
return STA_RESULT_INVALID_TENSOR_RANK;
6102 if (( components==1 )&&(stride_in<=1)&&(stride_out<=1))
6103 sta_laplace_1component ( stIn,stOut,shape,type,alpha,v_size,clear_field);
6107 sta_laplace_Ncomponents_R ( stIn,stOut,shape,components,type,alpha.real(),v_size,stride_in,stride_out,clear_field);
6109 sta_laplace_Ncomponents_C ( stIn,stOut,shape,components,type,alpha,v_size,stride_in,stride_out,clear_field);
6111 return STA_RESULT_SUCCESS;
6139 template<
typename T,
typename S>
6141 std::complex<T> * stOut,
6142 const std::size_t shape[],
6145 bool conjugate=
false,
6148 #ifdef _STA_LINK_FFTW
6149 int flag=FFTW_ESTIMATE )
6155 return STA_RESULT_SAME_ADDRESS;
6163 result=fft ( stIn,stOut,shape_,components,forward,flag );
6167 if (conjugate || (alpha!=T(1)) )
6189 template<
typename T>
6193 virtual std::complex<T> fspecial(
const std::complex<T> & value)
const =0;
6198 template<
typename T>
6200 const std::complex<T> * stIn,
6201 std::complex<T> * stOut,
6202 const std::size_t shape[],
6206 int stride_out = -1,
6207 bool clear_field =
false )
6211 stride_in=ncomponents;
6213 stride_out=ncomponents;
6216 std::size_t jumpz=shape[1]*shape[2];
6220 #pragma omp parallel for num_threads(hanalysis::get_numCPUs())
6221 for (std::size_t a=0; a<shape[0]; a++ )
6223 std::complex<T> *resultp=stOut+a*jumpz*stride_out;
6224 const std::complex<T> *inp=stIn+a*jumpz*stride_in;
6226 for ( std::size_t i=0; i<jumpz; i++ )
6228 for (
int b=0; b<ncomponents; b++)
6230 const std::complex<T> & in=inp[b];
6231 std::complex<T> & out=resultp[b];
6234 out+=fspecial.fspecial(in);
6236 resultp+=stride_out;
6240 return hanalysis::STA_RESULT_SUCCESS;
STA_FIELD_STORAGE
tensor field data storage
Definition: stensor.h:5163
tensor field has all components of even ranks :
Definition: stensor.h:5184
STA_RESULT sta_derivatives(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int J, int Jupdown, bool conjugate=false, std::complex< T > alpha=(T) 1.0, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, const T v_size[]=NULL, int stride_in=-1, int stride_out=-1, bool clear_field=false, int accuracy=0)
spherical tensor derivative:
Definition: stensor.h:5864
tensor field has one single component of rank :
Definition: stensor.h:5180
STA_FIELD_TYPE
tensor field data interpretations according to certain symmetries
Definition: stensor.h:5177
STA_RESULT
function return value
Definition: stensor.h:124
Definition: stensor.h:6190
STA_RESULT sta_norm(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int J, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, int stride_in=-1, int stride_out=-1, bool clear_field=false)
returns lengths of vectors component by compnent
Definition: stensor.h:5765
Definition: stensor.h:5173
STA_RESULT sta_product(const std::complex< T > *stIn1, const std::complex< T > *stIn2, std::complex< T > *stOut, const std::size_t shape[], int J1, int J2, int J, std::complex< T > alpha=T(1), bool normalize=false, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, int stride_in1=-1, int stride_in2=-1, int stride_out=-1, bool clear_field=false)
spherical tensor product: and , respectively
Definition: stensor.h:5528
Definition: stensor.h:5167
STA_RESULT sta_fft(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int components, bool forward, bool conjugate=false, S alpha=(S) 1, int flag=0)
tensor fft component by component
Definition: stensor.h:6140
The STA-ImageAnalysisToolkit namespace.
Definition: stafield.h:55
tensor field has all components of odd ranks :
Definition: stensor.h:5186
STA_RESULT sta_derivatives2(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int J, int Jupdown, bool conjugate=false, std::complex< T > alpha=(T) 1.0, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, const T v_size[]=NULL, int stride_in=-1, int stride_out=-1, bool clear_field=false)
spherical tensor double-derivative:
Definition: stensor.h:5991
STA_RESULT sta_mult(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int ncomponents, S alpha=S(1), bool conjugate=false, int stride_in=-1, int stride_out=-1, bool clear_field=false)
computes
Definition: stensor.h:5700
tensor field has all components of ranks :
Definition: stensor.h:5182
STA_RESULT sta_laplace(const std::complex< T > *stIn, std::complex< T > *stOut, const std::size_t shape[], int components=1, int type=1, std::complex< T > alpha=1, STA_FIELD_STORAGE field_storage=STA_FIELD_STORAGE_C, const T v_size[]=NULL, int stride_in=-1, int stride_out=-1, bool clear_field=false)
Laplacian: .
Definition: stensor.h:6078
Definition: stensor.h:5170